[𝘀𝗽𝗿] changes introduced through rebaseusers/vitalybuka/spr/main.codegencfi-generalize-transparent-union-parameters

Created using spr 1.3.6 [skip ci]
author: lntue <lntue@google.com> 2025-09-12 18:51:24 -0700
committer: Vitaly Buka <vitalybuka@google.com> 2025-09-12 18:51:24 -0700
commit: 2166ce80fdb0744dbbb67da5cfad5db4668e2b8e (patch)
tree: eefcfd57d644659eb9a97e3481e9b9d610f1b8e3
parent: 8864b35f8cad6f5e0fcb041ca9d560c56edd246e (diff)
parent: 7aad3830fb208771254b4ae63a01042744471091 (diff)
download: llvm-users/vitalybuka/spr/main.codegencfi-generalize-transparent-union-parameters.zip
llvm-users/vitalybuka/spr/main.codegencfi-generalize-transparent-union-parameters.tar.gz
llvm-users/vitalybuka/spr/main.codegencfi-generalize-transparent-union-parameters.tar.bz2
722 files changed, 42911 insertions, 26925 deletions
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
deleted file mode 100644
index 8c1dfd3..0000000
--- a/.github/dependabot.yml
+++ /dev/null
@@ -1,18 +0,0 @@
-version: 2
-updates:
-  - package-ecosystem: "github-actions"
-    directory: "/"
-    schedule:
-      interval: "monthly"
-    groups:
-      github-actions:
-        patterns:
-          - "*"
-  - package-ecosystem: "pip"
-    directory: "/llvm/docs"
-    schedule:
-      interval: "monthly"
-    groups:
-      llvm-docs-requirements:
-        patterns:
-          - "*"
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
index b627803..8cdd39c 100644
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -60,7 +60,7 @@ jobs:
           fetch-depth: 2
       - name: Get subprojects that have doc changes
         id: docs-changed-subprojects
-        uses: step-security/changed-files@3dbe17c78367e7d60f00d78ae6781a35be47b4a1 # v45.0.1
+        uses: tj-actions/changed-files@ed68ef82c095e0d48ec87eccea555d944a631a4c # v46.0.5
         with:
           skip_initial_fetch: true
           base_sha: 'HEAD~1'
diff --git a/.github/workflows/pr-code-format.yml b/.github/workflows/pr-code-format.yml
index 9341eaf..9396bf0 100644
--- a/.github/workflows/pr-code-format.yml
+++ b/.github/workflows/pr-code-format.yml
@@ -25,7 +25,7 @@ jobs:
 
       - name: Get changed files
         id: changed-files
-        uses: step-security/changed-files@3dbe17c78367e7d60f00d78ae6781a35be47b4a1 # v45.0.1
+        uses: tj-actions/changed-files@ed68ef82c095e0d48ec87eccea555d944a631a4c # v46.0.5
         with:
           separator: ","
           skip_initial_fetch: true
diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h
index 1c630ab..90129d4 100644
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
@@ -2216,7 +2216,8 @@ public:
   }
 
   /// Print each annotation attached to \p Inst.
-  void printAnnotations(const MCInst &Inst, raw_ostream &OS) const;
+  void printAnnotations(const MCInst &Inst, raw_ostream &OS,
+                        bool PrintMemData = false) const;
 
   /// Remove annotation with a given \p Index.
   ///
diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp
index 6d16edf..8e2224b 100644
--- a/bolt/lib/Core/BinaryContext.cpp
+++ b/bolt/lib/Core/BinaryContext.cpp
@@ -2044,7 +2044,7 @@ void BinaryContext::printInstruction(raw_ostream &OS, const MCInst &Instruction,
   if (MCSymbol *Label = MIB->getInstLabel(Instruction))
     OS << " # Label: " << *Label;
 
-  MIB->printAnnotations(Instruction, OS);
+  MIB->printAnnotations(Instruction, OS, PrintMemData || opts::PrintMemData);
 
   if (opts::PrintDebugInfo)
     printDebugInfo(OS, Instruction, Function, DwCtx.get());
diff --git a/bolt/lib/Core/MCPlusBuilder.cpp b/bolt/lib/Core/MCPlusBuilder.cpp
index 7f962e1..5247522 100644
--- a/bolt/lib/Core/MCPlusBuilder.cpp
+++ b/bolt/lib/Core/MCPlusBuilder.cpp
@@ -378,8 +378,8 @@ void MCPlusBuilder::stripAnnotations(MCInst &Inst, bool KeepTC) const {
     setTailCall(Inst);
 }
 
-void MCPlusBuilder::printAnnotations(const MCInst &Inst,
-                                     raw_ostream &OS) const {
+void MCPlusBuilder::printAnnotations(const MCInst &Inst, raw_ostream &OS,
+                                     bool PrintMemData) const {
   std::optional<unsigned> FirstAnnotationOp = getFirstAnnotationOpIndex(Inst);
   if (!FirstAnnotationOp)
     return;
@@ -390,7 +390,11 @@ void MCPlusBuilder::printAnnotations(const MCInst &Inst,
     const int64_t Value = extractAnnotationValue(Imm);
     const auto *Annotation = reinterpret_cast<const MCAnnotation *>(Value);
     if (Index >= MCAnnotation::kGeneric) {
-      OS << " # " << AnnotationNames[Index - MCAnnotation::kGeneric] << ": ";
+      std::string AnnotationName =
+          AnnotationNames[Index - MCAnnotation::kGeneric];
+      if (!PrintMemData && AnnotationName == "MemoryAccessProfile")
+        continue;
+      OS << " # " << AnnotationName << ": ";
       Annotation->print(OS);
     }
   }
diff --git a/bolt/test/AArch64/print-mem-data.test b/bolt/test/AArch64/print-mem-data.test
new file mode 100644
index 0000000..09d4f46
--- /dev/null
+++ b/bolt/test/AArch64/print-mem-data.test
@@ -0,0 +1,40 @@
+# Check that --print-mem-data option works properly in llvm-bolt
+
+# RUN: split-file %s %t
+# RUN: %clang %cflags -fPIC -pie %t/main.s -o %t.exe -nostdlib -Wl,-q
+# RUN: llvm-bolt %t.exe -o %t.bolt --print-mem-data=true --print-cfg \
+# RUN:   --data %t/fdata | FileCheck %s -check-prefix=CHECK-PRINT
+# RUN: llvm-bolt %t.exe -o %t.bolt --print-cfg \
+# RUN:   --data %t/fdata | FileCheck %s -check-prefix=CHECK-DEFAULT
+
+# CHECK-PRINT: ldr	w2, [x1], #0x4 # MemoryAccessProfile: 7 total counts :
+# CHECK-PRINT-NEXT: { 0x123: 1 },
+# CHECK-PRINT-NEXT: { 0x456: 2 },
+# CHECK-PRINT-NEXT: { 0xabc: 4 }
+# CHECK-DEFAULT-NOT: MemoryAccessProfile
+
+#--- main.s
+  .text
+  .align 4
+  .global main
+  .type	main, %function
+main:
+  sub sp, sp, #48
+  add x1, sp, 8
+  add x3, sp, 48
+  mov w0, 0
+.L2:
+  ldr w2, [x1], 4
+  add w0, w0, w2
+  cmp x1, x3
+  bne .L2
+  add sp, sp, 48
+  ret
+  .size main, .-main
+
+# The three memory access data generated by the load at
+# offset 0x10 in the main.
+#--- fdata
+4 main 10 4 otherSym 123 1
+4 main 10 4 otherSym 456 2
+4 main 10 4 otherSym abc 4
diff --git a/clang-tools-extra/clang-tidy/utils/RenamerClangTidyCheck.cpp b/clang-tools-extra/clang-tidy/utils/RenamerClangTidyCheck.cpp
index 90539ea..24d346b 100644
--- a/clang-tools-extra/clang-tidy/utils/RenamerClangTidyCheck.cpp
+++ b/clang-tools-extra/clang-tidy/utils/RenamerClangTidyCheck.cpp
@@ -350,6 +350,8 @@ public:
     const TemplateDecl *Decl =
         Loc.getTypePtr()->getTemplateName().getAsTemplateDecl(
             /*IgnoreDeduced=*/true);
+    if (!Decl)
+      return true;
 
     if (const auto *ClassDecl = dyn_cast<TemplateDecl>(Decl))
       if (const NamedDecl *TemplDecl = ClassDecl->getTemplatedDecl())
diff --git a/clang-tools-extra/clangd/FindTarget.cpp b/clang-tools-extra/clangd/FindTarget.cpp
index 32018d1..8aae414 100644
--- a/clang-tools-extra/clangd/FindTarget.cpp
+++ b/clang-tools-extra/clangd/FindTarget.cpp
@@ -406,15 +406,6 @@ public:
           }
         }
       }
-      void VisitDependentTemplateSpecializationType(
-          const DependentTemplateSpecializationType *DTST) {
-        if (Outer.Resolver) {
-          for (const NamedDecl *ND :
-               Outer.Resolver->resolveTemplateSpecializationType(DTST)) {
-            Outer.add(ND, Flags);
-          }
-        }
-      }
       void VisitTypedefType(const TypedefType *TT) {
         if (shouldSkipTypedef(TT->getDecl()))
           return;
@@ -455,11 +446,13 @@ public:
         // class template specializations have a (specialized) CXXRecordDecl.
         else if (const CXXRecordDecl *RD = TST->getAsCXXRecordDecl())
           Outer.add(RD, Flags); // add(Decl) will despecialize if needed.
-        else {
+        else if (auto *TD = TST->getTemplateName().getAsTemplateDecl())
           // fallback: the (un-specialized) declaration from primary template.
-          if (auto *TD = TST->getTemplateName().getAsTemplateDecl())
-            Outer.add(TD->getTemplatedDecl(), Flags | Rel::TemplatePattern);
-        }
+          Outer.add(TD->getTemplatedDecl(), Flags | Rel::TemplatePattern);
+        else if (Outer.Resolver)
+          for (const NamedDecl *ND :
+               Outer.Resolver->resolveTemplateSpecializationType(TST))
+            Outer.add(ND, Flags);
       }
       void
       VisitSubstTemplateTypeParmType(const SubstTemplateTypeParmType *STTPT) {
@@ -900,15 +893,6 @@ refInTypeLoc(TypeLoc L, const HeuristicResolver *Resolver) {
                                    DeclRelation::Alias, Resolver)});
     }
 
-    void VisitDependentTemplateSpecializationTypeLoc(
-        DependentTemplateSpecializationTypeLoc L) {
-      Refs.push_back(
-          ReferenceLoc{L.getQualifierLoc(), L.getTemplateNameLoc(),
-                       /*IsDecl=*/false,
-                       explicitReferenceTargets(
-                           DynTypedNode::create(L.getType()), {}, Resolver)});
-    }
-
     void VisitDependentNameTypeLoc(DependentNameTypeLoc L) {
       Refs.push_back(
           ReferenceLoc{L.getQualifierLoc(), L.getNameLoc(),
diff --git a/clang-tools-extra/clangd/SemanticHighlighting.cpp b/clang-tools-extra/clangd/SemanticHighlighting.cpp
index 2b151b1..ab720ebe 100644
--- a/clang-tools-extra/clangd/SemanticHighlighting.cpp
+++ b/clang-tools-extra/clangd/SemanticHighlighting.cpp
@@ -728,11 +728,6 @@ public:
     return true;
   }
 
-  bool VisitTemplateSpecializationTypeLoc(TemplateSpecializationTypeLoc L) {
-    H.addAngleBracketTokens(L.getLAngleLoc(), L.getRAngleLoc());
-    return true;
-  }
-
   bool VisitFunctionDecl(FunctionDecl *D) {
     if (D->isOverloadedOperator()) {
       const auto AddOpDeclToken = [&](SourceLocation Loc) {
@@ -1087,11 +1082,12 @@ public:
     return true;
   }
 
-  bool VisitDependentTemplateSpecializationTypeLoc(
-      DependentTemplateSpecializationTypeLoc L) {
-    H.addToken(L.getTemplateNameLoc(), HighlightingKind::Type)
-        .addModifier(HighlightingModifier::DependentName)
-        .addModifier(HighlightingModifier::ClassScope);
+  bool VisitTemplateSpecializationTypeLoc(TemplateSpecializationTypeLoc L) {
+    if (!L.getTypePtr()->getTemplateName().getAsTemplateDecl(
+            /*IgnoreDeduced=*/true))
+      H.addToken(L.getTemplateNameLoc(), HighlightingKind::Type)
+          .addModifier(HighlightingModifier::DependentName)
+          .addModifier(HighlightingModifier::ClassScope);
     H.addAngleBracketTokens(L.getLAngleLoc(), L.getRAngleLoc());
     return true;
   }
diff --git a/clang-tools-extra/clangd/unittests/FindTargetTests.cpp b/clang-tools-extra/clangd/unittests/FindTargetTests.cpp
index f369e1b..dd26182 100644
--- a/clang-tools-extra/clangd/unittests/FindTargetTests.cpp
+++ b/clang-tools-extra/clangd/unittests/FindTargetTests.cpp
@@ -1029,8 +1029,7 @@ TEST_F(TargetDeclTest, DependentTypes) {
         template <typename T>
         void foo(typename A<T>::template [[B]]<int>);
       )cpp";
-  EXPECT_DECLS("DependentTemplateSpecializationTypeLoc",
-               "template <typename> struct B");
+  EXPECT_DECLS("TemplateSpecializationTypeLoc", "template <typename> struct B");
 
   // Dependent name with recursive definition. We don't expect a
   // result, but we shouldn't get into a stack overflow either.
diff --git a/clang-tools-extra/include-cleaner/lib/WalkAST.cpp b/clang-tools-extra/include-cleaner/lib/WalkAST.cpp
index 0cbf9a0..7bbdc8b 100644
--- a/clang-tools-extra/include-cleaner/lib/WalkAST.cpp
+++ b/clang-tools-extra/include-cleaner/lib/WalkAST.cpp
@@ -321,6 +321,8 @@ public:
 
   // TypeLoc visitors.
   void reportType(SourceLocation RefLoc, NamedDecl *ND) {
+    if (!ND)
+      return;
     // Reporting explicit references to types nested inside classes can cause
     // issues, e.g. a type accessed through a derived class shouldn't require
     // inclusion of the base.
diff --git a/clang/cmake/modules/CMakeLists.txt b/clang/cmake/modules/CMakeLists.txt
index d2d6812..90fbd88 100644
--- a/clang/cmake/modules/CMakeLists.txt
+++ b/clang/cmake/modules/CMakeLists.txt
@@ -8,15 +8,14 @@ include(FindPrefixFromConfig)
 # the usual CMake convention seems to be ${Project}Targets.cmake.
 set(CLANG_INSTALL_PACKAGE_DIR "${CMAKE_INSTALL_PACKAGEDIR}/clang" CACHE STRING
   "Path for CMake subdirectory for Clang (defaults to '${CMAKE_INSTALL_PACKAGEDIR}/clang')")
-# CMAKE_INSTALL_PACKAGEDIR might be absolute, so don't reuse below.
-set(clang_cmake_builddir "${CMAKE_BINARY_DIR}/lib${LLVM_LIBDIR_SUFFIX}/cmake/clang")
 
 # Keep this in sync with llvm/cmake/CMakeLists.txt!
 set(LLVM_INSTALL_PACKAGE_DIR "${CMAKE_INSTALL_PACKAGEDIR}/llvm" CACHE STRING
   "Path for CMake subdirectory for LLVM (defaults to '${CMAKE_INSTALL_PACKAGEDIR}/llvm')")
 # CMAKE_INSTALL_PACKAGEDIR might be absolute, so don't reuse below.
-string(REPLACE "${CMAKE_CFG_INTDIR}" "." llvm_cmake_builddir "${LLVM_LIBRARY_DIR}")
-set(llvm_cmake_builddir "${llvm_cmake_builddir}/cmake/llvm")
+string(REPLACE "${CMAKE_CFG_INTDIR}" "." llvm_builddir "${LLVM_LIBRARY_DIR}")
+set(llvm_cmake_builddir "${llvm_builddir}/cmake/llvm")
+set(clang_cmake_builddir "${llvm_builddir}/cmake/clang")
 
 get_property(CLANG_EXPORTS GLOBAL PROPERTY CLANG_EXPORTS)
 export(TARGETS ${CLANG_EXPORTS} FILE ${clang_cmake_builddir}/ClangTargets.cmake)
diff --git a/clang/docs/ClangFormatStyleOptions.rst b/clang/docs/ClangFormatStyleOptions.rst
index 3ac9e37..6be4d51 100644
--- a/clang/docs/ClangFormatStyleOptions.rst
+++ b/clang/docs/ClangFormatStyleOptions.rst
@@ -5079,6 +5079,113 @@ the configuration (without a prefix: ``Auto``).
 
   For example: TESTSUITE
 
+.. _NumericLiteralCase:
+
+**NumericLiteralCase** (``NumericLiteralCaseStyle``) :versionbadge:`clang-format 22` :ref:`¶ <NumericLiteralCase>`
+  Capitalization style for numeric literals.
+
+  Nested configuration flags:
+
+  Separate control for each numeric literal component.
+
+  For example, the config below will leave exponent letters alone, reformat
+  hexadecimal digits in lowercase, reformat numeric literal prefixes in
+  uppercase, and reformat suffixes in lowercase.
+
+  .. code-block:: c++
+
+    NumericLiteralCase:
+      ExponentLetter: Leave
+      HexDigit: Lower
+      Prefix: Upper
+      Suffix: Lower
+
+  * ``NumericLiteralComponentStyle ExponentLetter``
+    Format floating point exponent separator letter case.
+
+    .. code-block:: c++
+
+      float a = 6.02e23 + 1.0E10; // Leave
+      float a = 6.02E23 + 1.0E10; // Upper
+      float a = 6.02e23 + 1.0e10; // Lower
+
+    Possible values:
+
+    * ``NLCS_Leave`` (in configuration: ``Leave``)
+      Leave this component of the literal as is.
+
+    * ``NLCS_Upper`` (in configuration: ``Upper``)
+      Format this component with uppercase characters.
+
+    * ``NLCS_Lower`` (in configuration: ``Lower``)
+      Format this component with lowercase characters.
+
+
+  * ``NumericLiteralComponentStyle HexDigit``
+    Format hexadecimal digit case.
+
+    .. code-block:: c++
+
+      a = 0xaBcDeF; // Leave
+      a = 0xABCDEF; // Upper
+      a = 0xabcdef; // Lower
+
+    Possible values:
+
+    * ``NLCS_Leave`` (in configuration: ``Leave``)
+      Leave this component of the literal as is.
+
+    * ``NLCS_Upper`` (in configuration: ``Upper``)
+      Format this component with uppercase characters.
+
+    * ``NLCS_Lower`` (in configuration: ``Lower``)
+      Format this component with lowercase characters.
+
+
+  * ``NumericLiteralComponentStyle Prefix``
+    Format integer prefix case.
+
+    .. code-block:: c++
+
+       a = 0XF0 | 0b1; // Leave
+       a = 0XF0 | 0B1; // Upper
+       a = 0xF0 | 0b1; // Lower
+
+    Possible values:
+
+    * ``NLCS_Leave`` (in configuration: ``Leave``)
+      Leave this component of the literal as is.
+
+    * ``NLCS_Upper`` (in configuration: ``Upper``)
+      Format this component with uppercase characters.
+
+    * ``NLCS_Lower`` (in configuration: ``Lower``)
+      Format this component with lowercase characters.
+
+
+  * ``NumericLiteralComponentStyle Suffix``
+    Format suffix case. This option excludes case-sensitive reserved
+    suffixes, such as ``min`` in C++.
+
+    .. code-block:: c++
+
+      a = 1uLL; // Leave
+      a = 1ULL; // Upper
+      a = 1ull; // Lower
+
+    Possible values:
+
+    * ``NLCS_Leave`` (in configuration: ``Leave``)
+      Leave this component of the literal as is.
+
+    * ``NLCS_Upper`` (in configuration: ``Upper``)
+      Format this component with uppercase characters.
+
+    * ``NLCS_Lower`` (in configuration: ``Lower``)
+      Format this component with lowercase characters.
+
+
+
 .. _ObjCBinPackProtocolList:
 
 **ObjCBinPackProtocolList** (``BinPackStyle``) :versionbadge:`clang-format 7` :ref:`¶ <ObjCBinPackProtocolList>`
diff --git a/clang/docs/OpenMPSupport.rst b/clang/docs/OpenMPSupport.rst
index cb8ea5e..47a8109a 100644
--- a/clang/docs/OpenMPSupport.rst
+++ b/clang/docs/OpenMPSupport.rst
@@ -348,12 +348,97 @@ implementation.
 +------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
 
 
+.. _OpenMP 5.2 implementation details:
+
+OpenMP 5.2 Implementation Details
+=================================
+
+The following table provides a quick overview of various OpenMP 5.2 features
+and their implementation status. Please post on the
+`Discourse forums (Runtimes - OpenMP category)`_ for more
+information or if you want to help with the
+implementation.
+
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+|Feature                                                      | C/C++ Status              |  Fortran Status           | Reviews                                                                  |
++=============================================================+===========================+===========================+==========================================================================+
+| omp_in_explicit_task()                                      | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| semantics of explicit_task_var and implicit_task_var        | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| ompx sentinel for C/C++ directive extensions                | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| ompx prefix for clause extensions                           | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| if clause on teams construct                                | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| step modifier added                                         | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| declare mapper: Add iterator modifier on map clause         | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| declare mapper: Add iterator modifier on map clause         | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| memspace and traits modifiers to uses allocator         i   | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| Add otherwise clause to metadirectives                      | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| doacross clause with support for omp_cur_iteration          | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| position of interop_type in init clause on iterop           | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| implicit map type for target enter/exit data                | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| work OMPT type for work-sharing loop constructs             | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| allocate and firstprivate on scope directive                | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| Change loop consistency for order clause                    | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| Add memspace and traits modifiers to uses_allocators        | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| Keep original base pointer on map w/o matched candidate     | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| Pure procedure support for certain directives               | :none:`N/A`               | :none:`unclaimed`         |                                                                          |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| ALLOCATE statement support for allocators                   | :none:`N/A`               | :none:`unclaimed`         |                                                                          |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| dispatch construct extension to support end directive       | :none:`N/A`               | :none:`unclaimed`         |                                                                          |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+|OpenMP 5.2 Deprecations                                      | C/C++ Status              |  Fortran Status           | Reviews                                                                  |
++=============================================================+===========================+===========================+==========================================================================+
+| Linear clause syntax                                        | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| The minus operator                                          | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| Map clause modifiers without commas                         | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| The use of allocate directives with ALLOCATE statement      | :good:`N/A`               | :none:`unclaimed`         |                                                                          |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| uses_allocators list syntax                                 | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| The default clause on metadirectives                        | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| The delimited form of the declare target directive          | :none:`unclaimed`         | :good:`N/A`               |                                                                          |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| The use of the to clause on the declare target directive    | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| The syntax of the destroy clause on the depobj construct    | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| keyword source and sink as task-dependence modifiers        | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| interop types in any position on init clause of interop     | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| ompd prefix usage for some ICVs                             | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+
 .. _OpenMP 6.0 implementation details:
 
 OpenMP 6.0 Implementation Details
 =================================
 
-The following table provides a quick overview over various OpenMP 6.0 features
+The following table provides a quick overview of various OpenMP 6.0 features
 and their implementation status. Please post on the
 `Discourse forums (Runtimes - OpenMP category)`_ for more
 information or if you want to help with the
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 060f3d9..51e5973 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -458,7 +458,9 @@ AST Matchers
   following the corresponding changes in the clang AST.
 - Ensure ``hasBitWidth`` doesn't crash on bit widths that are dependent on template
   parameters.
-
+- Remove the ``dependentTemplateSpecializationType`` matcher, as the
+  corresponding AST node was removed. This matcher was never very useful, since
+  there was no way to match on its template name.
 - Add a boolean member ``IgnoreSystemHeaders`` to ``MatchFinderOptions``. This
   allows it to ignore nodes in system headers when traversing the AST.
 
@@ -468,6 +470,8 @@ AST Matchers
 clang-format
 ------------
 - Add ``SpaceInEmptyBraces`` option and set it to ``Always`` for WebKit style.
+- Add ``NumericLiteralCase`` option for enforcing character case in numeric
+  literals.
 
 libclang
 --------
diff --git a/clang/include/clang/AST/ASTContext.h b/clang/include/clang/AST/ASTContext.h
index 1c17333..b8f6de6 100644
--- a/clang/include/clang/AST/ASTContext.h
+++ b/clang/include/clang/AST/ASTContext.h
@@ -241,9 +241,6 @@ class ASTContext : public RefCountedBase<ASTContext> {
   mutable llvm::FoldingSet<UsingType> UsingTypes;
   mutable llvm::FoldingSet<FoldingSetPlaceholder<TypedefType>> TypedefTypes;
   mutable llvm::FoldingSet<DependentNameType> DependentNameTypes;
-  mutable llvm::DenseMap<llvm::FoldingSetNodeID,
-                         DependentTemplateSpecializationType *>
-      DependentTemplateSpecializationTypes;
   mutable llvm::FoldingSet<PackExpansionType> PackExpansionTypes;
   mutable llvm::FoldingSet<ObjCObjectTypeImpl> ObjCObjectTypes;
   mutable llvm::FoldingSet<ObjCObjectPointerType> ObjCObjectPointerTypes;
@@ -1904,7 +1901,8 @@ public:
                           TemplateTypeParmDecl *ParmDecl = nullptr) const;
 
   QualType getCanonicalTemplateSpecializationType(
-      TemplateName T, ArrayRef<TemplateArgument> CanonicalArgs) const;
+      ElaboratedTypeKeyword Keyword, TemplateName T,
+      ArrayRef<TemplateArgument> CanonicalArgs) const;
 
   QualType
   getTemplateSpecializationType(ElaboratedTypeKeyword Keyword, TemplateName T,
@@ -1935,13 +1933,6 @@ public:
                                 NestedNameSpecifier NNS,
                                 const IdentifierInfo *Name) const;
 
-  QualType getDependentTemplateSpecializationType(
-      ElaboratedTypeKeyword Keyword, const DependentTemplateStorage &Name,
-      ArrayRef<TemplateArgumentLoc> Args) const;
-  QualType getDependentTemplateSpecializationType(
-      ElaboratedTypeKeyword Keyword, const DependentTemplateStorage &Name,
-      ArrayRef<TemplateArgument> Args, bool IsCanonical = false) const;
-
   TemplateArgument getInjectedTemplateArg(NamedDecl *ParamDecl) const;
 
   /// Form a pack expansion type with the given pattern.
diff --git a/clang/include/clang/AST/ASTNodeTraverser.h b/clang/include/clang/AST/ASTNodeTraverser.h
index fe08d637..ea68cc7 100644
--- a/clang/include/clang/AST/ASTNodeTraverser.h
+++ b/clang/include/clang/AST/ASTNodeTraverser.h
@@ -533,11 +533,6 @@ public:
     for (unsigned I=0, N=TL.getNumArgs(); I < N; ++I)
       dumpTemplateArgumentLoc(TL.getArgLoc(I));
   }
-  void VisitDependentTemplateSpecializationTypeLoc(
-      DependentTemplateSpecializationTypeLoc TL) {
-    for (unsigned I=0, N=TL.getNumArgs(); I < N; ++I)
-      dumpTemplateArgumentLoc(TL.getArgLoc(I));
-  }
 
   void VisitTypedefDecl(const TypedefDecl *D) { Visit(D->getUnderlyingType()); }
 
diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h
index 02581c8..c194448 100644
--- a/clang/include/clang/AST/RecursiveASTVisitor.h
+++ b/clang/include/clang/AST/RecursiveASTVisitor.h
@@ -1192,13 +1192,6 @@ DEF_TRAVERSE_TYPE(DependentNameType, {
     TRY_TO(TraverseNestedNameSpecifier(T->getQualifier()));
 })
 
-DEF_TRAVERSE_TYPE(DependentTemplateSpecializationType, {
-  const DependentTemplateStorage &S = T->getDependentTemplateName();
-  if (TraverseQualifier)
-    TRY_TO(TraverseNestedNameSpecifier(S.getQualifier()));
-  TRY_TO(TraverseTemplateArguments(T->template_arguments()));
-})
-
 DEF_TRAVERSE_TYPE(TemplateSpecializationType, {
   if (TraverseQualifier) {
     TRY_TO(TraverseTemplateName(T->getTemplateName()));
@@ -1546,15 +1539,6 @@ DEF_TRAVERSE_TYPELOC(DependentNameType, {
     TRY_TO(TraverseNestedNameSpecifierLoc(TL.getQualifierLoc()));
 })
 
-DEF_TRAVERSE_TYPELOC(DependentTemplateSpecializationType, {
-  if (TraverseQualifier)
-    TRY_TO(TraverseNestedNameSpecifierLoc(TL.getQualifierLoc()));
-
-  for (unsigned I = 0, E = TL.getNumArgs(); I != E; ++I) {
-    TRY_TO(TraverseTemplateArgumentLoc(TL.getArgLoc(I)));
-  }
-})
-
 DEF_TRAVERSE_TYPELOC(TemplateSpecializationType, {
   if (TraverseQualifier)
     TRY_TO(TraverseNestedNameSpecifierLoc(TL.getQualifierLoc()));
diff --git a/clang/include/clang/AST/TemplateName.h b/clang/include/clang/AST/TemplateName.h
index abb0669..b6999a1 100644
--- a/clang/include/clang/AST/TemplateName.h
+++ b/clang/include/clang/AST/TemplateName.h
@@ -297,10 +297,10 @@ public:
   /// set of function templates, returns NULL.
   TemplateDecl *getAsTemplateDecl(bool IgnoreDeduced = false) const;
 
-  /// Retrieves the underlying template declaration that
+  /// Retrieves the underlying template name that
   /// this template name refers to, along with the
   /// deduced default arguments, if any.
-  std::pair<TemplateDecl *, DefaultArguments>
+  std::pair<TemplateName, DefaultArguments>
   getTemplateDeclAndDefaultArgs() const;
 
   /// Retrieve the underlying, overloaded function template
diff --git a/clang/include/clang/AST/TypeBase.h b/clang/include/clang/AST/TypeBase.h
index db2ab04..9074992 100644
--- a/clang/include/clang/AST/TypeBase.h
+++ b/clang/include/clang/AST/TypeBase.h
@@ -2250,22 +2250,6 @@ protected:
     unsigned NumArgs;
   };
 
-  class DependentTemplateSpecializationTypeBitfields {
-    friend class DependentTemplateSpecializationType;
-
-    LLVM_PREFERRED_TYPE(KeywordWrapperBitfields)
-    unsigned : NumTypeWithKeywordBits;
-
-    /// The number of template arguments named in this class template
-    /// specialization, which is expected to be able to hold at least 1024
-    /// according to [implimits]. However, as this limit is somewhat easy to
-    /// hit with template metaprogramming we'd prefer to keep it as large
-    /// as possible. At the moment it has been left as a non-bitfield since
-    /// this type safely fits in 64 bits as an unsigned, so there is no reason
-    /// to introduce the performance impact of a bitfield.
-    unsigned NumArgs;
-  };
-
   class PackExpansionTypeBitfields {
     friend class PackExpansionType;
 
@@ -2346,8 +2330,6 @@ protected:
     SubstTemplateTypeParmTypeBitfields SubstTemplateTypeParmTypeBits;
     SubstPackTypeBitfields SubstPackTypeBits;
     TemplateSpecializationTypeBitfields TemplateSpecializationTypeBits;
-    DependentTemplateSpecializationTypeBitfields
-      DependentTemplateSpecializationTypeBits;
     PackExpansionTypeBitfields PackExpansionTypeBits;
     CountAttributedTypeBitfields CountAttributedTypeBits;
     PresefinedSugarTypeBitfields PredefinedSugarTypeBits;
@@ -7366,9 +7348,9 @@ public:
   }
 
   void Profile(llvm::FoldingSetNodeID &ID, const ASTContext &Ctx);
-  static void Profile(llvm::FoldingSetNodeID &ID, TemplateName T,
-                      ArrayRef<TemplateArgument> Args, QualType Underlying,
-                      const ASTContext &Context);
+  static void Profile(llvm::FoldingSetNodeID &ID, ElaboratedTypeKeyword Keyword,
+                      TemplateName T, ArrayRef<TemplateArgument> Args,
+                      QualType Underlying, const ASTContext &Context);
 
   static bool classof(const Type *T) {
     return T->getTypeClass() == TemplateSpecialization;
@@ -7459,46 +7441,6 @@ public:
   }
 };
 
-/// Represents a template specialization type whose template cannot be
-/// resolved, e.g.
-///   A<T>::template B<T>
-class DependentTemplateSpecializationType : public TypeWithKeyword {
-  friend class ASTContext; // ASTContext creates these
-
-  DependentTemplateStorage Name;
-
-  DependentTemplateSpecializationType(ElaboratedTypeKeyword Keyword,
-                                      const DependentTemplateStorage &Name,
-                                      ArrayRef<TemplateArgument> Args,
-                                      QualType Canon);
-
-public:
-  const DependentTemplateStorage &getDependentTemplateName() const {
-    return Name;
-  }
-
-  ArrayRef<TemplateArgument> template_arguments() const {
-    return {reinterpret_cast<const TemplateArgument *>(this + 1),
-            DependentTemplateSpecializationTypeBits.NumArgs};
-  }
-
-  bool isSugared() const { return false; }
-  QualType desugar() const { return QualType(this, 0); }
-
-  void Profile(llvm::FoldingSetNodeID &ID, const ASTContext &Context) {
-    Profile(ID, Context, getKeyword(), Name, template_arguments());
-  }
-
-  static void Profile(llvm::FoldingSetNodeID &ID, const ASTContext &Context,
-                      ElaboratedTypeKeyword Keyword,
-                      const DependentTemplateStorage &Name,
-                      ArrayRef<TemplateArgument> Args);
-
-  static bool classof(const Type *T) {
-    return T->getTypeClass() == DependentTemplateSpecialization;
-  }
-};
-
 /// Represents a pack expansion of types.
 ///
 /// Pack expansions are part of C++11 variadic templates. A pack
diff --git a/clang/include/clang/AST/TypeLoc.h b/clang/include/clang/AST/TypeLoc.h
index d52e104..38e8fba 100644
--- a/clang/include/clang/AST/TypeLoc.h
+++ b/clang/include/clang/AST/TypeLoc.h
@@ -2598,134 +2598,6 @@ public:
   void initializeLocal(ASTContext &Context, SourceLocation Loc);
 };
 
-struct DependentTemplateSpecializationLocInfo : DependentNameLocInfo {
-  SourceLocation TemplateKWLoc;
-  SourceLocation LAngleLoc;
-  SourceLocation RAngleLoc;
-  // followed by a TemplateArgumentLocInfo[]
-};
-
-class DependentTemplateSpecializationTypeLoc :
-    public ConcreteTypeLoc<UnqualTypeLoc,
-                           DependentTemplateSpecializationTypeLoc,
-                           DependentTemplateSpecializationType,
-                           DependentTemplateSpecializationLocInfo> {
-public:
-  SourceLocation getElaboratedKeywordLoc() const {
-    return this->getLocalData()->ElaboratedKWLoc;
-  }
-
-  void setElaboratedKeywordLoc(SourceLocation Loc) {
-    this->getLocalData()->ElaboratedKWLoc = Loc;
-  }
-
-  NestedNameSpecifierLoc getQualifierLoc() const {
-    if (!getLocalData()->QualifierData)
-      return NestedNameSpecifierLoc();
-
-    return NestedNameSpecifierLoc(
-        getTypePtr()->getDependentTemplateName().getQualifier(),
-        getLocalData()->QualifierData);
-  }
-
-  void setQualifierLoc(NestedNameSpecifierLoc QualifierLoc) {
-    if (!QualifierLoc) {
-      // Even if we have a nested-name-specifier in the dependent
-      // template specialization type, we won't record the nested-name-specifier
-      // location information when this type-source location information is
-      // part of a nested-name-specifier.
-      getLocalData()->QualifierData = nullptr;
-      return;
-    }
-
-    assert(QualifierLoc.getNestedNameSpecifier() ==
-               getTypePtr()->getDependentTemplateName().getQualifier() &&
-           "Inconsistent nested-name-specifier pointer");
-    getLocalData()->QualifierData = QualifierLoc.getOpaqueData();
-  }
-
-  SourceLocation getTemplateKeywordLoc() const {
-    return getLocalData()->TemplateKWLoc;
-  }
-
-  void setTemplateKeywordLoc(SourceLocation Loc) {
-    getLocalData()->TemplateKWLoc = Loc;
-  }
-
-  SourceLocation getTemplateNameLoc() const {
-    return this->getLocalData()->NameLoc;
-  }
-
-  void setTemplateNameLoc(SourceLocation Loc) {
-    this->getLocalData()->NameLoc = Loc;
-  }
-
-  SourceLocation getLAngleLoc() const {
-    return this->getLocalData()->LAngleLoc;
-  }
-
-  void setLAngleLoc(SourceLocation Loc) {
-    this->getLocalData()->LAngleLoc = Loc;
-  }
-
-  SourceLocation getRAngleLoc() const {
-    return this->getLocalData()->RAngleLoc;
-  }
-
-  void setRAngleLoc(SourceLocation Loc) {
-    this->getLocalData()->RAngleLoc = Loc;
-  }
-
-  unsigned getNumArgs() const {
-    return getTypePtr()->template_arguments().size();
-  }
-
-  void setArgLocInfo(unsigned i, TemplateArgumentLocInfo AI) {
-    getArgInfos()[i] = AI;
-  }
-
-  TemplateArgumentLocInfo getArgLocInfo(unsigned i) const {
-    return getArgInfos()[i];
-  }
-
-  TemplateArgumentLoc getArgLoc(unsigned i) const {
-    return TemplateArgumentLoc(getTypePtr()->template_arguments()[i],
-                               getArgLocInfo(i));
-  }
-
-  SourceRange getLocalSourceRange() const {
-    if (getElaboratedKeywordLoc().isValid())
-      return SourceRange(getElaboratedKeywordLoc(), getRAngleLoc());
-    else if (getQualifierLoc())
-      return SourceRange(getQualifierLoc().getBeginLoc(), getRAngleLoc());
-    else if (getTemplateKeywordLoc().isValid())
-      return SourceRange(getTemplateKeywordLoc(), getRAngleLoc());
-    else
-      return SourceRange(getTemplateNameLoc(), getRAngleLoc());
-  }
-
-  void copy(DependentTemplateSpecializationTypeLoc Loc) {
-    unsigned size = getFullDataSize();
-    assert(size == Loc.getFullDataSize());
-    memcpy(Data, Loc.Data, size);
-  }
-
-  void initializeLocal(ASTContext &Context, SourceLocation Loc);
-
-  unsigned getExtraLocalDataSize() const {
-    return getNumArgs() * sizeof(TemplateArgumentLocInfo);
-  }
-
-  unsigned getExtraLocalDataAlignment() const {
-    return alignof(TemplateArgumentLocInfo);
-  }
-
-private:
-  TemplateArgumentLocInfo *getArgInfos() const {
-    return static_cast<TemplateArgumentLocInfo*>(getExtraLocalData());
-  }
-};
-
 struct PackExpansionTypeLocInfo {
   SourceLocation EllipsisLoc;
 };
diff --git a/clang/include/clang/AST/TypeProperties.td b/clang/include/clang/AST/TypeProperties.td
index 185a968..b3932a6 100644
--- a/clang/include/clang/AST/TypeProperties.td
+++ b/clang/include/clang/AST/TypeProperties.td
@@ -729,41 +729,6 @@ let Class = TemplateSpecializationType in {
   }]>;
 }
 
-let Class = DependentTemplateSpecializationType in {
-  def : ReadHelper<[{
-    const auto &dtn = node->getDependentTemplateName();
-    auto name = dtn.getName();
-  }]>;
-
-  def : Property<"qualifier", NestedNameSpecifier> {
-    let Read = [{ dtn.getQualifier() }];
-  }
-  def : Property<"identifier", Optional<Identifier>> {
-    let Read = [{ makeOptionalFromPointer(name.getIdentifier()) }];
-  }
-  def : Property<"operatorKind", OverloadedOperatorKind> {
-    let Conditional = [{ !identifier }];
-    let Read = [{ name.getOperator() }];
-  }
-  def : Property<"HasTemplateKeyword", Bool> {
-    let Read = [{ dtn.hasTemplateKeyword() }];
-  }
-
-  def : Property<"keyword", ElaboratedTypeKeyword> {
-    let Read = [{ node->getKeyword() }];
-  }
-  def : Property<"templateArguments", Array<TemplateArgument>> {
-    let Read = [{ node->template_arguments() }];
-  }
-
-  def : Creator<[{
-    DependentTemplateStorage S(qualifier, identifier ? IdentifierOrOverloadedOperator(*identifier) :
-                                                       IdentifierOrOverloadedOperator(*operatorKind),
-                               HasTemplateKeyword);
-    return ctx.getDependentTemplateSpecializationType(keyword, S, templateArguments);
-  }]>;
-}
-
 let Class = TemplateTypeParmType in {
   def : Property<"depth", UInt32> {
     let Read = [{ node->getDepth() }];
diff --git a/clang/include/clang/ASTMatchers/ASTMatchers.h b/clang/include/clang/ASTMatchers/ASTMatchers.h
index f1d88a9..492863d 100644
--- a/clang/include/clang/ASTMatchers/ASTMatchers.h
+++ b/clang/include/clang/ASTMatchers/ASTMatchers.h
@@ -7712,18 +7712,6 @@ AST_MATCHER_P(DecayedType, hasDecayedType, internal::Matcher<QualType>,
 /// \endcode
 extern const AstTypeMatcher<DependentNameType> dependentNameType;
 
-/// Matches a dependent template specialization type
-///
-/// Example matches A<T>::template B<T>
-/// \code
-///   template<typename T> struct A;
-///   template<typename T> struct declToImport {
-///     typename A<T>::template B<T> a;
-///   };
-/// \endcode
-extern const AstTypeMatcher<DependentTemplateSpecializationType>
-    dependentTemplateSpecializationType;
-
 /// Matches declarations whose declaration context, interpreted as a
 /// Decl, matches \c InnerMatcher.
 ///
diff --git a/clang/include/clang/Basic/TypeNodes.td b/clang/include/clang/Basic/TypeNodes.td
index fb6862b..db43a85 100644
--- a/clang/include/clang/Basic/TypeNodes.td
+++ b/clang/include/clang/Basic/TypeNodes.td
@@ -5,10 +5,11 @@ class TypeNode<TypeNode base, bit abstract = 0> : ASTNode {
   bit Abstract = abstract;
 }
 
-/// A type node that is only used to represent dependent types in C++.  For
-/// example, DependentTemplateSpecializationType is used to represent types
-/// where the base template-id is dependent (such as `T::foo<U>`).  Code
-/// that only works with non-dependent types can ignore these type nodes.
+/// A type node that is only used to represent dependent types in C++.
+/// For example, DependentSizedArrayType is used to represent types where the
+/// size expression is dependent (such as `T[V]`, where V is a constant template
+/// parameter). Code that only works with non-dependent types can ignore these
+/// type nodes.
 class AlwaysDependent {}
 
 /// A type node that is never used to represent a canonical type, which is to
@@ -96,7 +97,6 @@ def DeducedType : TypeNode<Type, 1>;
 def AutoType : TypeNode<DeducedType>;
 def DeducedTemplateSpecializationType : TypeNode<DeducedType>;
 def DependentNameType : TypeNode<Type>, AlwaysDependent;
-def DependentTemplateSpecializationType : TypeNode<Type>, AlwaysDependent;
 def PackExpansionType : TypeNode<Type>, AlwaysDependent;
 def PackIndexingType  : TypeNode<Type>, NeverCanonicalUnlessDependent;
 def ObjCTypeParamType : TypeNode<Type>, NeverCanonical;
diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td
index a4eb92e..5f6a6ea 100644
--- a/clang/include/clang/Basic/arm_sme.td
+++ b/clang/include/clang/Basic/arm_sme.td
@@ -156,16 +156,10 @@ let SMETargetGuard = "sme2p1" in {
 ////////////////////////////////////////////////////////////////////////////////
 // SME - Counting elements in a streaming vector
 
-multiclass ZACount<string n_suffix> {
-  def NAME : SInst<"sv" # n_suffix, "nv", "", MergeNone,
-                    "aarch64_sme_" # n_suffix,
-                    [IsOverloadNone, IsStreamingCompatible]>;
-}
-
-defm SVCNTSB : ZACount<"cntsb">;
-defm SVCNTSH : ZACount<"cntsh">;
-defm SVCNTSW : ZACount<"cntsw">;
-defm SVCNTSD : ZACount<"cntsd">;
+def SVCNTSB : SInst<"svcntsb", "nv", "", MergeNone, "", [IsOverloadNone, IsStreamingCompatible]>;
+def SVCNTSH : SInst<"svcntsh", "nv", "", MergeNone, "", [IsOverloadNone, IsStreamingCompatible]>;
+def SVCNTSW : SInst<"svcntsw", "nv", "", MergeNone, "", [IsOverloadNone, IsStreamingCompatible]>;
+def SVCNTSD : SInst<"svcntsd", "nv", "", MergeNone, "aarch64_sme_cntsd", [IsOverloadNone, IsStreamingCompatible]>;
 
 ////////////////////////////////////////////////////////////////////////////////
 // SME - ADDHA/ADDVA
diff --git a/clang/include/clang/Format/Format.h b/clang/include/clang/Format/Format.h
index 5dfdb23..03cff5f 100644
--- a/clang/include/clang/Format/Format.h
+++ b/clang/include/clang/Format/Format.h
@@ -3558,6 +3558,73 @@ struct FormatStyle {
   /// \version 9
   std::vector<std::string> NamespaceMacros;
 
+  /// Control over each component in a numeric literal.
+  enum NumericLiteralComponentStyle : int8_t {
+    /// Leave this component of the literal as is.
+    NLCS_Leave,
+    /// Format this component with uppercase characters.
+    NLCS_Upper,
+    /// Format this component with lowercase characters.
+    NLCS_Lower,
+  };
+
+  /// Separate control for each numeric literal component.
+  ///
+  /// For example, the config below will leave exponent letters alone, reformat
+  /// hexadecimal digits in lowercase, reformat numeric literal prefixes in
+  /// uppercase, and reformat suffixes in lowercase.
+  /// \code
+  ///   NumericLiteralCase:
+  ///     ExponentLetter: Leave
+  ///     HexDigit: Lower
+  ///     Prefix: Upper
+  ///     Suffix: Lower
+  /// \endcode
+  struct NumericLiteralCaseStyle {
+    /// Format floating point exponent separator letter case.
+    /// \code
+    ///   float a = 6.02e23 + 1.0E10; // Leave
+    ///   float a = 6.02E23 + 1.0E10; // Upper
+    ///   float a = 6.02e23 + 1.0e10; // Lower
+    /// \endcode
+    NumericLiteralComponentStyle ExponentLetter;
+    /// Format hexadecimal digit case.
+    /// \code
+    ///   a = 0xaBcDeF; // Leave
+    ///   a = 0xABCDEF; // Upper
+    ///   a = 0xabcdef; // Lower
+    /// \endcode
+    NumericLiteralComponentStyle HexDigit;
+    /// Format integer prefix case.
+    /// \code
+    ///    a = 0XF0 | 0b1; // Leave
+    ///    a = 0XF0 | 0B1; // Upper
+    ///    a = 0xF0 | 0b1; // Lower
+    /// \endcode
+    NumericLiteralComponentStyle Prefix;
+    /// Format suffix case. This option excludes case-sensitive reserved
+    /// suffixes, such as ``min`` in C++.
+    /// \code
+    ///   a = 1uLL; // Leave
+    ///   a = 1ULL; // Upper
+    ///   a = 1ull; // Lower
+    /// \endcode
+    NumericLiteralComponentStyle Suffix;
+
+    bool operator==(const NumericLiteralCaseStyle &R) const {
+      return ExponentLetter == R.ExponentLetter && HexDigit == R.HexDigit &&
+             Prefix == R.Prefix && Suffix == R.Suffix;
+    }
+
+    bool operator!=(const NumericLiteralCaseStyle &R) const {
+      return !(*this == R);
+    }
+  };
+
+  /// Capitalization style for numeric literals.
+  /// \version 22
+  NumericLiteralCaseStyle NumericLiteralCase;
+
   /// Controls bin-packing Objective-C protocol conformance list
   /// items into as few lines as possible when they go over ``ColumnLimit``.
   ///
@@ -5469,6 +5536,7 @@ struct FormatStyle {
            MaxEmptyLinesToKeep == R.MaxEmptyLinesToKeep &&
            NamespaceIndentation == R.NamespaceIndentation &&
            NamespaceMacros == R.NamespaceMacros &&
+           NumericLiteralCase == R.NumericLiteralCase &&
            ObjCBinPackProtocolList == R.ObjCBinPackProtocolList &&
            ObjCBlockIndentWidth == R.ObjCBlockIndentWidth &&
            ObjCBreakBeforeNestedBlockParam ==
diff --git a/clang/include/clang/Interpreter/Interpreter.h b/clang/include/clang/Interpreter/Interpreter.h
index 61af7bf..fcc270a 100644
--- a/clang/include/clang/Interpreter/Interpreter.h
+++ b/clang/include/clang/Interpreter/Interpreter.h
@@ -135,11 +135,13 @@ public:
     std::string OrcRuntimePath = "";
     /// PID of the out-of-process JIT executor.
     uint32_t ExecutorPID = 0;
+    /// An optional code model to provide to the JITTargetMachineBuilder
+    std::optional<llvm::CodeModel::Model> CM = std::nullopt;
 
     JITConfig()
         : IsOutOfProcess(false), OOPExecutor(""), OOPExecutorConnect(""),
           UseSharedMemory(false), SlabAllocateSize(0), OrcRuntimePath(""),
-          ExecutorPID(0) {}
+          ExecutorPID(0), CM(std::nullopt) {}
   };
 
 protected:
diff --git a/clang/include/clang/Sema/HeuristicResolver.h b/clang/include/clang/Sema/HeuristicResolver.h
index 71588be..9a220ba 100644
--- a/clang/include/clang/Sema/HeuristicResolver.h
+++ b/clang/include/clang/Sema/HeuristicResolver.h
@@ -62,7 +62,7 @@ public:
   std::vector<const NamedDecl *>
   resolveDependentNameType(const DependentNameType *DNT) const;
   std::vector<const NamedDecl *> resolveTemplateSpecializationType(
-      const DependentTemplateSpecializationType *DTST) const;
+      const TemplateSpecializationType *TST) const;
 
   // Try to heuristically resolve a dependent nested name specifier
   // to the type it likely denotes. Note that *dependent* name specifiers always
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index a7600ab..7e00085 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -11399,10 +11399,6 @@ public:
                                        SourceLocation NameLoc,
                                        IdentifierInfo *&II);
 
-  bool resolveAssumedTemplateNameAsType(Scope *S, TemplateName &Name,
-                                        SourceLocation NameLoc,
-                                        bool Diagnose = true);
-
   /// Determine whether a particular identifier might be the name in a C++1z
   /// deduction-guide declaration.
   bool isDeductionGuideName(Scope *S, const IdentifierInfo &Name,
@@ -11643,7 +11639,8 @@ public:
   QualType CheckTemplateIdType(ElaboratedTypeKeyword Keyword,
                                TemplateName Template,
                                SourceLocation TemplateLoc,
-                               TemplateArgumentListInfo &TemplateArgs);
+                               TemplateArgumentListInfo &TemplateArgs,
+                               Scope *Scope, bool ForNestedNameSpecifier);
 
   TypeResult
   ActOnTemplateIdType(Scope *S, ElaboratedTypeKeyword ElaboratedKeyword,
diff --git a/clang/include/clang/Serialization/TypeBitCodes.def b/clang/include/clang/Serialization/TypeBitCodes.def
index bea1525..d6c4845 100644
--- a/clang/include/clang/Serialization/TypeBitCodes.def
+++ b/clang/include/clang/Serialization/TypeBitCodes.def
@@ -39,7 +39,6 @@ TYPE_BIT_CODE(ObjCObject, OBJC_OBJECT, 28)
 TYPE_BIT_CODE(TemplateTypeParm, TEMPLATE_TYPE_PARM, 29)
 TYPE_BIT_CODE(TemplateSpecialization, TEMPLATE_SPECIALIZATION, 30)
 TYPE_BIT_CODE(DependentName, DEPENDENT_NAME, 31)
-TYPE_BIT_CODE(DependentTemplateSpecialization, DEPENDENT_TEMPLATE_SPECIALIZATION, 32)
 TYPE_BIT_CODE(DependentSizedArray, DEPENDENT_SIZED_ARRAY, 33)
 TYPE_BIT_CODE(Paren, PAREN, 34)
 TYPE_BIT_CODE(PackExpansion, PACK_EXPANSION, 35)
diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index ed4c6b0..5240054 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -4286,7 +4286,6 @@ QualType ASTContext::getVariableArrayDecayedType(QualType type) const {
   case Type::DependentName:
   case Type::InjectedClassName:
   case Type::TemplateSpecialization:
-  case Type::DependentTemplateSpecialization:
   case Type::TemplateTypeParm:
   case Type::SubstTemplateTypeParmPack:
   case Type::SubstBuiltinTemplatePack:
@@ -5932,6 +5931,30 @@ QualType ASTContext::getTemplateTypeParmType(unsigned Depth, unsigned Index,
   return QualType(TypeParm, 0);
 }
 
+static ElaboratedTypeKeyword
+getCanonicalElaboratedTypeKeyword(ElaboratedTypeKeyword Keyword) {
+  switch (Keyword) {
+  // These are just themselves.
+  case ElaboratedTypeKeyword::None:
+  case ElaboratedTypeKeyword::Struct:
+  case ElaboratedTypeKeyword::Union:
+  case ElaboratedTypeKeyword::Enum:
+  case ElaboratedTypeKeyword::Interface:
+    return Keyword;
+
+  // These are equivalent.
+  case ElaboratedTypeKeyword::Typename:
+    return ElaboratedTypeKeyword::None;
+
+  // These are functionally equivalent, so relying on their equivalence is
+  // IFNDR. By making them equivalent, we disallow overloading, which at least
+  // can produce a diagnostic.
+  case ElaboratedTypeKeyword::Class:
+    return ElaboratedTypeKeyword::Struct;
+  }
+  llvm_unreachable("unexpected keyword kind");
+}
+
 TypeSourceInfo *ASTContext::getTemplateSpecializationTypeInfo(
     ElaboratedTypeKeyword Keyword, SourceLocation ElaboratedKeywordLoc,
     NestedNameSpecifierLoc QualifierLoc, SourceLocation TemplateKeywordLoc,
@@ -5970,17 +5993,20 @@ hasAnyPackExpansions(ArrayRef<TemplateArgument> Args) {
 }
 
 QualType ASTContext::getCanonicalTemplateSpecializationType(
-    TemplateName Template, ArrayRef<TemplateArgument> Args) const {
+    ElaboratedTypeKeyword Keyword, TemplateName Template,
+    ArrayRef<TemplateArgument> Args) const {
   assert(Template ==
          getCanonicalTemplateName(Template, /*IgnoreDeduced=*/true));
-  assert(!Args.empty());
+  assert((Keyword == ElaboratedTypeKeyword::None ||
+          Template.getAsDependentTemplateName()));
 #ifndef NDEBUG
   for (const auto &Arg : Args)
     assert(Arg.structurallyEquals(getCanonicalTemplateArgument(Arg)));
 #endif
 
   llvm::FoldingSetNodeID ID;
-  TemplateSpecializationType::Profile(ID, Template, Args, QualType(), *this);
+  TemplateSpecializationType::Profile(ID, Keyword, Template, Args, QualType(),
+                                      *this);
   void *InsertPos = nullptr;
   if (auto *T = TemplateSpecializationTypes.FindNodeOrInsertPos(ID, InsertPos))
     return QualType(T, 0);
@@ -5988,9 +6014,9 @@ QualType ASTContext::getCanonicalTemplateSpecializationType(
   void *Mem = Allocate(sizeof(TemplateSpecializationType) +
                            sizeof(TemplateArgument) * Args.size(),
                        alignof(TemplateSpecializationType));
-  auto *Spec = new (Mem)
-      TemplateSpecializationType(ElaboratedTypeKeyword::None, Template,
-                                 /*IsAlias=*/false, Args, QualType());
+  auto *Spec =
+      new (Mem) TemplateSpecializationType(Keyword, Template,
+                                           /*IsAlias=*/false, Args, QualType());
   assert(Spec->isDependentType() &&
          "canonical template specialization must be dependent");
   Types.push_back(Spec);
@@ -6002,16 +6028,16 @@ QualType ASTContext::getTemplateSpecializationType(
     ElaboratedTypeKeyword Keyword, TemplateName Template,
     ArrayRef<TemplateArgument> SpecifiedArgs,
     ArrayRef<TemplateArgument> CanonicalArgs, QualType Underlying) const {
-  assert(!Template.getUnderlying().getAsDependentTemplateName() &&
-         "No dependent template names here!");
-
   const auto *TD = Template.getAsTemplateDecl(/*IgnoreDeduced=*/true);
   bool IsTypeAlias = TD && TD->isTypeAlias();
   if (Underlying.isNull()) {
     TemplateName CanonTemplate =
         getCanonicalTemplateName(Template, /*IgnoreDeduced=*/true);
-    bool NonCanonical =
-        Template != CanonTemplate || Keyword != ElaboratedTypeKeyword::None;
+    ElaboratedTypeKeyword CanonKeyword =
+        CanonTemplate.getAsDependentTemplateName()
+            ? getCanonicalElaboratedTypeKeyword(Keyword)
+            : ElaboratedTypeKeyword::None;
+    bool NonCanonical = Template != CanonTemplate || Keyword != CanonKeyword;
     SmallVector<TemplateArgument, 4> CanonArgsVec;
     if (CanonicalArgs.empty()) {
       CanonArgsVec = SmallVector<TemplateArgument, 4>(SpecifiedArgs);
@@ -6033,8 +6059,8 @@ QualType ASTContext::getTemplateSpecializationType(
            "Caller must compute aliased type");
     IsTypeAlias = false;
 
-    Underlying =
-        getCanonicalTemplateSpecializationType(CanonTemplate, CanonicalArgs);
+    Underlying = getCanonicalTemplateSpecializationType(
+        CanonKeyword, CanonTemplate, CanonicalArgs);
     if (!NonCanonical)
       return Underlying;
   }
@@ -6085,30 +6111,6 @@ ASTContext::getMacroQualifiedType(QualType UnderlyingTy,
   return QualType(newType, 0);
 }
 
-static ElaboratedTypeKeyword
-getCanonicalElaboratedTypeKeyword(ElaboratedTypeKeyword Keyword) {
-  switch (Keyword) {
-  // These are just themselves.
-  case ElaboratedTypeKeyword::None:
-  case ElaboratedTypeKeyword::Struct:
-  case ElaboratedTypeKeyword::Union:
-  case ElaboratedTypeKeyword::Enum:
-  case ElaboratedTypeKeyword::Interface:
-    return Keyword;
-
-  // These are equivalent.
-  case ElaboratedTypeKeyword::Typename:
-    return ElaboratedTypeKeyword::None;
-
-  // These are functionally equivalent, so relying on their equivalence is
-  // IFNDR. By making them equivalent, we disallow overloading, which at least
-  // can produce a diagnostic.
-  case ElaboratedTypeKeyword::Class:
-    return ElaboratedTypeKeyword::Struct;
-  }
-  llvm_unreachable("unexpected keyword kind");
-}
-
 QualType ASTContext::getDependentNameType(ElaboratedTypeKeyword Keyword,
                                           NestedNameSpecifier NNS,
                                           const IdentifierInfo *Name) const {
@@ -6140,68 +6142,6 @@ QualType ASTContext::getDependentNameType(ElaboratedTypeKeyword Keyword,
   return QualType(T, 0);
 }
 
-QualType ASTContext::getDependentTemplateSpecializationType(
-    ElaboratedTypeKeyword Keyword, const DependentTemplateStorage &Name,
-    ArrayRef<TemplateArgumentLoc> Args) const {
-  // TODO: avoid this copy
-  SmallVector<TemplateArgument, 16> ArgCopy;
-  for (unsigned I = 0, E = Args.size(); I != E; ++I)
-    ArgCopy.push_back(Args[I].getArgument());
-  return getDependentTemplateSpecializationType(Keyword, Name, ArgCopy);
-}
-
-QualType ASTContext::getDependentTemplateSpecializationType(
-    ElaboratedTypeKeyword Keyword, const DependentTemplateStorage &Name,
-    ArrayRef<TemplateArgument> Args, bool IsCanonical) const {
-  llvm::FoldingSetNodeID ID;
-  DependentTemplateSpecializationType::Profile(ID, *this, Keyword, Name, Args);
-
-  if (auto const T_iter = DependentTemplateSpecializationTypes.find(ID);
-      T_iter != DependentTemplateSpecializationTypes.end())
-    return QualType(T_iter->getSecond(), 0);
-
-  NestedNameSpecifier NNS = Name.getQualifier();
-
-  QualType Canon;
-  if (!IsCanonical) {
-    ElaboratedTypeKeyword CanonKeyword =
-        getCanonicalElaboratedTypeKeyword(Keyword);
-    NestedNameSpecifier CanonNNS = NNS.getCanonical();
-    bool AnyNonCanonArgs = false;
-    auto CanonArgs =
-        ::getCanonicalTemplateArguments(*this, Args, AnyNonCanonArgs);
-
-    if (CanonKeyword != Keyword || AnyNonCanonArgs || CanonNNS != NNS ||
-        !Name.hasTemplateKeyword()) {
-      Canon = getDependentTemplateSpecializationType(
-          CanonKeyword, {CanonNNS, Name.getName(), /*HasTemplateKeyword=*/true},
-          CanonArgs,
-          /*IsCanonical=*/true);
-    }
-  } else {
-    assert(Keyword == getCanonicalElaboratedTypeKeyword(Keyword));
-    assert(Name.hasTemplateKeyword());
-    assert(NNS.isCanonical());
-#ifndef NDEBUG
-    for (const auto &Arg : Args)
-      assert(Arg.structurallyEquals(getCanonicalTemplateArgument(Arg)));
-#endif
-  }
-  void *Mem = Allocate((sizeof(DependentTemplateSpecializationType) +
-                        sizeof(TemplateArgument) * Args.size()),
-                       alignof(DependentTemplateSpecializationType));
-  auto *T =
-      new (Mem) DependentTemplateSpecializationType(Keyword, Name, Args, Canon);
-#ifndef NDEBUG
-  llvm::FoldingSetNodeID InsertedID;
-  T->Profile(InsertedID, *this);
-  assert(InsertedID == ID && "ID does not match");
-#endif
-  Types.push_back(T);
-  DependentTemplateSpecializationTypes.try_emplace(ID, T);
-  return QualType(T, 0);
-}
-
 TemplateArgument ASTContext::getInjectedTemplateArg(NamedDecl *Param) const {
   TemplateArgument Arg;
   if (const auto *TTP = dyn_cast<TemplateTypeParmDecl>(Param)) {
@@ -14327,21 +14267,6 @@ static QualType getCommonNonSugarTypeNode(const ASTContext &Ctx, const Type *X,
         getCommonTypeKeyword(NX, NY, /*IsSame=*/true),
         getCommonQualifier(Ctx, NX, NY, /*IsSame=*/true), NX->getIdentifier());
   }
-  case Type::DependentTemplateSpecialization: {
-    const auto *TX = cast<DependentTemplateSpecializationType>(X),
-               *TY = cast<DependentTemplateSpecializationType>(Y);
-    auto As = getCommonTemplateArguments(Ctx, TX->template_arguments(),
-                                         TY->template_arguments());
-    const DependentTemplateStorage &SX = TX->getDependentTemplateName(),
-                                   &SY = TY->getDependentTemplateName();
-    assert(SX.getName() == SY.getName());
-    DependentTemplateStorage Name(
-        getCommonNNS(Ctx, SX.getQualifier(), SY.getQualifier(),
-                     /*IsSame=*/true),
-        SX.getName(), SX.hasTemplateKeyword() || SY.hasTemplateKeyword());
-    return Ctx.getDependentTemplateSpecializationType(
-        getCommonTypeKeyword(TX, TY, /*IsSame=*/true), Name, As);
-  }
   case Type::UnaryTransform: {
     const auto *TX = cast<UnaryTransformType>(X),
                *TY = cast<UnaryTransformType>(Y);
diff --git a/clang/lib/AST/ASTImporter.cpp b/clang/lib/AST/ASTImporter.cpp
index fe7f1e5e..1c8fd83 100644
--- a/clang/lib/AST/ASTImporter.cpp
+++ b/clang/lib/AST/ASTImporter.cpp
@@ -1745,15 +1745,13 @@ ExpectedType ASTNodeImporter::VisitTagType(const TagType *T) {
   if (!ToDeclOrErr)
     return ToDeclOrErr.takeError();
 
-  if (DeclForType->isUsed()) {
-    // If there is a definition of the 'OriginalDecl', it should be imported to
-    // have all information for the type in the "To" AST. (In some cases no
-    // other reference may exist to the definition decl and it would not be
-    // imported otherwise.)
-    Expected<TagDecl *> ToDefDeclOrErr = import(DeclForType->getDefinition());
-    if (!ToDefDeclOrErr)
-      return ToDefDeclOrErr.takeError();
-  }
+  // If there is a definition of the 'OriginalDecl', it should be imported to
+  // have all information for the type in the "To" AST. (In some cases no
+  // other reference may exist to the definition decl and it would not be
+  // imported otherwise.)
+  Expected<TagDecl *> ToDefDeclOrErr = import(DeclForType->getDefinition());
+  if (!ToDefDeclOrErr)
+    return ToDefDeclOrErr.takeError();
 
   if (T->isCanonicalUnqualified())
     return Importer.getToContext().getCanonicalTagType(*ToDeclOrErr);
@@ -1892,25 +1890,6 @@ ASTNodeImporter::VisitPackExpansionType(const PackExpansionType *T) {
                                                       /*ExpactPack=*/false);
 }
 
-ExpectedType ASTNodeImporter::VisitDependentTemplateSpecializationType(
-    const DependentTemplateSpecializationType *T) {
-  const DependentTemplateStorage &DTN = T->getDependentTemplateName();
-  auto QualifierOrErr = import(DTN.getQualifier());
-  if (!QualifierOrErr)
-    return QualifierOrErr.takeError();
-
-  SmallVector<TemplateArgument, 2> ToPack;
-  ToPack.reserve(T->template_arguments().size());
-  if (Error Err = ImportTemplateArguments(T->template_arguments(), ToPack))
-    return std::move(Err);
-
-  return Importer.getToContext().getDependentTemplateSpecializationType(
-      T->getKeyword(),
-      {*QualifierOrErr, Importer.Import(DTN.getName()),
-       DTN.hasTemplateKeyword()},
-      ToPack);
-}
-
 ExpectedType
 ASTNodeImporter::VisitDependentNameType(const DependentNameType *T) {
   auto ToQualifierOrErr = import(T->getQualifier());
diff --git a/clang/lib/AST/ASTStructuralEquivalence.cpp b/clang/lib/AST/ASTStructuralEquivalence.cpp
index 1292c30..1557346 100644
--- a/clang/lib/AST/ASTStructuralEquivalence.cpp
+++ b/clang/lib/AST/ASTStructuralEquivalence.cpp
@@ -1384,20 +1384,6 @@ static bool IsStructurallyEquivalent(StructuralEquivalenceContext &Context,
     break;
   }
 
-  case Type::DependentTemplateSpecialization: {
-    const auto *Spec1 = cast<DependentTemplateSpecializationType>(T1);
-    const auto *Spec2 = cast<DependentTemplateSpecializationType>(T2);
-    if (Spec1->getKeyword() != Spec2->getKeyword())
-      return false;
-    if (!IsStructurallyEquivalent(Context, Spec1->getDependentTemplateName(),
-                                  Spec2->getDependentTemplateName()))
-      return false;
-    if (!IsStructurallyEquivalent(Context, Spec1->template_arguments(),
-                                  Spec2->template_arguments()))
-      return false;
-    break;
-  }
-
   case Type::PackExpansion:
     if (!IsStructurallyEquivalent(Context,
                                   cast<PackExpansionType>(T1)->getPattern(),
diff --git a/clang/lib/AST/ASTTypeTraits.cpp b/clang/lib/AST/ASTTypeTraits.cpp
index d2f7fdb..84eb777 100644
--- a/clang/lib/AST/ASTTypeTraits.cpp
+++ b/clang/lib/AST/ASTTypeTraits.cpp
@@ -249,10 +249,6 @@ SourceRange DynTypedNode::getSourceRange(bool IncludeQualifier) const {
       auto T = TL->castAs<TemplateSpecializationTypeLoc>();
       return SourceRange(T.getTemplateNameLoc(), T.getEndLoc());
     }
-    case TypeLoc::DependentTemplateSpecialization: {
-      auto T = TL->castAs<DependentTemplateSpecializationTypeLoc>();
-      return SourceRange(T.getTemplateNameLoc(), T.getEndLoc());
-    }
     case TypeLoc::Enum:
     case TypeLoc::Record:
     case TypeLoc::InjectedClassName:
diff --git a/clang/lib/AST/DeclTemplate.cpp b/clang/lib/AST/DeclTemplate.cpp
index 3162857..b6bb611 100644
--- a/clang/lib/AST/DeclTemplate.cpp
+++ b/clang/lib/AST/DeclTemplate.cpp
@@ -663,6 +663,7 @@ CanQualType ClassTemplateDecl::getCanonicalInjectedSpecializationType(
     Ctx.canonicalizeTemplateArguments(CanonicalArgs);
     CommonPtr->CanonInjectedTST =
         CanQualType::CreateUnsafe(Ctx.getCanonicalTemplateSpecializationType(
+            ElaboratedTypeKeyword::None,
             TemplateName(const_cast<ClassTemplateDecl *>(getCanonicalDecl())),
             CanonicalArgs));
   }
@@ -1209,6 +1210,7 @@ ClassTemplatePartialSpecializationDecl::getCanonicalInjectedSpecializationType(
   if (CanonInjectedTST.isNull()) {
     CanonInjectedTST =
         CanQualType::CreateUnsafe(Ctx.getCanonicalTemplateSpecializationType(
+            ElaboratedTypeKeyword::None,
             TemplateName(getSpecializedTemplate()->getCanonicalDecl()),
             getTemplateArgs().asArray()));
   }
diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp
index 163cd43..2173aed 100644
--- a/clang/lib/AST/ItaniumMangle.cpp
+++ b/clang/lib/AST/ItaniumMangle.cpp
@@ -1311,19 +1311,6 @@ void CXXNameMangler::manglePrefix(QualType type) {
       mangleTemplateArgs(TST->getTemplateName(), TST->template_arguments());
       addSubstitution(QualType(TST, 0));
     }
-  } else if (const auto *DTST =
-                 type->getAs<DependentTemplateSpecializationType>()) {
-    if (!mangleSubstitution(QualType(DTST, 0))) {
-      TemplateName Template = getASTContext().getDependentTemplateName(
-          DTST->getDependentTemplateName());
-      mangleTemplatePrefix(Template);
-
-      // FIXME: GCC does not appear to mangle the template arguments when
-      // the template in question is a dependent template name. Should we
-      // emulate that badness?
-      mangleTemplateArgs(Template, DTST->template_arguments());
-      addSubstitution(QualType(DTST, 0));
-    }
   } else if (const auto *DNT = type->getAs<DependentNameType>()) {
     // Clang 14 and before did not consider this substitutable.
     bool Clang14Compat = isCompatibleWith(LangOptions::ClangABI::Ver14);
@@ -2525,10 +2512,14 @@ bool CXXNameMangler::mangleUnresolvedTypeOrSimpleId(QualType Ty,
       mangleSourceNameWithAbiTags(TD);
       break;
     }
+    case TemplateName::DependentTemplate: {
+      const DependentTemplateStorage *S = TN.getAsDependentTemplateName();
+      mangleSourceName(S->getName().getIdentifier());
+      break;
+    }
 
     case TemplateName::OverloadedTemplate:
     case TemplateName::AssumedTemplate:
-    case TemplateName::DependentTemplate:
     case TemplateName::DeducedTemplate:
       llvm_unreachable("invalid base for a template specialization type");
 
@@ -2574,17 +2565,6 @@ bool CXXNameMangler::mangleUnresolvedTypeOrSimpleId(QualType Ty,
     mangleSourceName(cast<DependentNameType>(Ty)->getIdentifier());
     break;
 
-  case Type::DependentTemplateSpecialization: {
-    const DependentTemplateSpecializationType *DTST =
-        cast<DependentTemplateSpecializationType>(Ty);
-    TemplateName Template = getASTContext().getDependentTemplateName(
-        DTST->getDependentTemplateName());
-    const DependentTemplateStorage &S = DTST->getDependentTemplateName();
-    mangleSourceName(S.getName().getIdentifier());
-    mangleTemplateArgs(Template, DTST->template_arguments());
-    break;
-  }
-
   case Type::Using:
     return mangleUnresolvedTypeOrSimpleId(cast<UsingType>(Ty)->desugar(),
                                           Prefix);
@@ -4458,16 +4438,14 @@ void CXXNameMangler::mangleType(const TemplateSpecializationType *T) {
   if (TemplateDecl *TD = T->getTemplateName().getAsTemplateDecl()) {
     mangleTemplateName(TD, T->template_arguments());
   } else {
-    if (mangleSubstitution(QualType(T, 0)))
-      return;
-
+    Out << 'N';
     mangleTemplatePrefix(T->getTemplateName());
 
     // FIXME: GCC does not appear to mangle the template arguments when
     // the template in question is a dependent template name. Should we
     // emulate that badness?
     mangleTemplateArgs(T->getTemplateName(), T->template_arguments());
-    addSubstitution(QualType(T, 0));
+    Out << 'E';
   }
 }
 
@@ -4505,21 +4483,6 @@ void CXXNameMangler::mangleType(const DependentNameType *T) {
   Out << 'E';
 }
 
-void CXXNameMangler::mangleType(const DependentTemplateSpecializationType *T) {
-  // Dependently-scoped template types are nested if they have a prefix.
-  Out << 'N';
-
-  TemplateName Prefix =
-      getASTContext().getDependentTemplateName(T->getDependentTemplateName());
-  mangleTemplatePrefix(Prefix);
-
-  // FIXME: GCC does not appear to mangle the template arguments when
-  // the template in question is a dependent template name. Should we
-  // emulate that badness?
-  mangleTemplateArgs(Prefix, T->template_arguments());
-  Out << 'E';
-}
-
 void CXXNameMangler::mangleType(const TypeOfType *T) {
   // FIXME: this is pretty unsatisfactory, but there isn't an obvious
   // "extension with parameters" mangling.
diff --git a/clang/lib/AST/MicrosoftMangle.cpp b/clang/lib/AST/MicrosoftMangle.cpp
index d96472e..8cbc72b 100644
--- a/clang/lib/AST/MicrosoftMangle.cpp
+++ b/clang/lib/AST/MicrosoftMangle.cpp
@@ -3655,12 +3655,6 @@ void MicrosoftCXXNameMangler::mangleType(const DependentNameType *T, Qualifiers,
   Error(Range.getBegin(), "dependent name type") << Range;
 }
 
-void MicrosoftCXXNameMangler::mangleType(
-    const DependentTemplateSpecializationType *T, Qualifiers,
-    SourceRange Range) {
-  Error(Range.getBegin(), "dependent template specialization type") << Range;
-}
-
 void MicrosoftCXXNameMangler::mangleType(const PackExpansionType *T, Qualifiers,
                                          SourceRange Range) {
   Error(Range.getBegin(), "pack expansion") << Range;
diff --git a/clang/lib/AST/ODRHash.cpp b/clang/lib/AST/ODRHash.cpp
index fb95f58..6842038 100644
--- a/clang/lib/AST/ODRHash.cpp
+++ b/clang/lib/AST/ODRHash.cpp
@@ -1213,16 +1213,6 @@ public:
     VisitTypeWithKeyword(T);
   }
 
-  void VisitDependentTemplateSpecializationType(
-      const DependentTemplateSpecializationType *T) {
-    Hash.AddDependentTemplateName(T->getDependentTemplateName());
-    ID.AddInteger(T->template_arguments().size());
-    for (const auto &TA : T->template_arguments()) {
-      Hash.AddTemplateArgument(TA);
-    }
-    VisitTypeWithKeyword(T);
-  }
-
   void VisitUnaryTransformType(const UnaryTransformType *T) {
     AddQualType(T->getUnderlyingType());
     AddQualType(T->getBaseType());
diff --git a/clang/lib/AST/TemplateName.cpp b/clang/lib/AST/TemplateName.cpp
index f2cb15d..2b8044e 100644
--- a/clang/lib/AST/TemplateName.cpp
+++ b/clang/lib/AST/TemplateName.cpp
@@ -213,25 +213,25 @@ TemplateDecl *TemplateName::getAsTemplateDecl(bool IgnoreDeduced) const {
       dyn_cast_if_present<Decl *>(Name.Storage));
 }
 
-std::pair<TemplateDecl *, DefaultArguments>
+std::pair<TemplateName, DefaultArguments>
 TemplateName::getTemplateDeclAndDefaultArgs() const {
+  DefaultArguments DefArgs;
   for (TemplateName Name = *this; /**/; /**/) {
-    if (Name.getKind() == TemplateName::DeducedTemplate) {
-      DeducedTemplateStorage *DTS = Name.getAsDeducedTemplateName();
-      TemplateDecl *TD =
-          DTS->getUnderlying().getAsTemplateDecl(/*IgnoreDeduced=*/true);
-      DefaultArguments DefArgs = DTS->getDefaultArguments();
-      if (TD && DefArgs)
+    if (DeducedTemplateStorage *DTS = Name.getAsDeducedTemplateName()) {
+      assert(!DefArgs && "multiple default args?");
+      DefArgs = DTS->getDefaultArguments();
+      if (TemplateDecl *TD = DTS->getUnderlying().getAsTemplateDecl();
+          TD && DefArgs)
         assert(DefArgs.StartPos + DefArgs.Args.size() <=
                TD->getTemplateParameters()->size());
-      return {TD, DTS->getDefaultArguments()};
+      Name = DTS->getUnderlying();
     }
     if (std::optional<TemplateName> UnderlyingOrNone =
             Name.desugar(/*IgnoreDeduced=*/false)) {
       Name = *UnderlyingOrNone;
       continue;
     }
-    return {cast_if_present<TemplateDecl>(Name.Storage.dyn_cast<Decl *>()), {}};
+    return {Name, DefArgs};
   }
 }
 
diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp
index 8662179..9794314 100644
--- a/clang/lib/AST/Type.cpp
+++ b/clang/lib/AST/Type.cpp
@@ -1933,10 +1933,6 @@ NestedNameSpecifier Type::getPrefix() const {
     return cast<TemplateSpecializationType>(this)
         ->getTemplateName()
         .getQualifier();
-  case Type::DependentTemplateSpecialization:
-    return cast<DependentTemplateSpecializationType>(this)
-        ->getDependentTemplateName()
-        .getQualifier();
   case Type::Enum:
   case Type::Record:
   case Type::InjectedClassName:
@@ -3215,7 +3211,6 @@ bool Type::isSpecifierType() const {
   case SubstTemplateTypeParm:
   case TemplateSpecialization:
   case DependentName:
-  case DependentTemplateSpecialization:
   case ObjCInterface:
   case ObjCObject:
     return true;
@@ -3333,42 +3328,12 @@ StringRef KeywordHelpers::getKeywordName(ElaboratedTypeKeyword Keyword) {
   llvm_unreachable("Unknown elaborated type keyword.");
 }
 
-DependentTemplateSpecializationType::DependentTemplateSpecializationType(
-    ElaboratedTypeKeyword Keyword, const DependentTemplateStorage &Name,
-    ArrayRef<TemplateArgument> Args, QualType Canon)
-    : TypeWithKeyword(Keyword, DependentTemplateSpecialization, Canon,
-
-                      toTypeDependence(Name.getDependence())),
-      Name(Name) {
-  DependentTemplateSpecializationTypeBits.NumArgs = Args.size();
-  auto *ArgBuffer = const_cast<TemplateArgument *>(template_arguments().data());
-  for (const TemplateArgument &Arg : Args) {
-    addDependence(toTypeDependence(Arg.getDependence() &
-                                   TemplateArgumentDependence::UnexpandedPack));
-
-    new (ArgBuffer++) TemplateArgument(Arg);
-  }
-}
-
-void DependentTemplateSpecializationType::Profile(
-    llvm::FoldingSetNodeID &ID, const ASTContext &Context,
-    ElaboratedTypeKeyword Keyword, const DependentTemplateStorage &Name,
-    ArrayRef<TemplateArgument> Args) {
-  ID.AddInteger(llvm::to_underlying(Keyword));
-  Name.Profile(ID);
-  for (const TemplateArgument &Arg : Args)
-    Arg.Profile(ID, Context);
-}
-
 bool Type::isElaboratedTypeSpecifier() const {
   ElaboratedTypeKeyword Keyword;
   if (const auto *TST = dyn_cast<TemplateSpecializationType>(this))
     Keyword = TST->getKeyword();
   else if (const auto *DepName = dyn_cast<DependentNameType>(this))
     Keyword = DepName->getKeyword();
-  else if (const auto *DepTST =
-               dyn_cast<DependentTemplateSpecializationType>(this))
-    Keyword = DepTST->getKeyword();
   else if (const auto *T = dyn_cast<TagType>(this))
     Keyword = T->getKeyword();
   else if (const auto *T = dyn_cast<TypedefType>(this))
@@ -4641,17 +4606,6 @@ TemplateSpecializationType::TemplateSpecializationType(
   TemplateSpecializationTypeBits.NumArgs = Args.size();
   TemplateSpecializationTypeBits.TypeAlias = IsAlias;
 
-  assert(!T.getAsDependentTemplateName() &&
-         "Use DependentTemplateSpecializationType for dependent template-name");
-  assert((T.getKind() == TemplateName::Template ||
-          T.getKind() == TemplateName::SubstTemplateTemplateParm ||
-          T.getKind() == TemplateName::SubstTemplateTemplateParmPack ||
-          T.getKind() == TemplateName::UsingTemplate ||
-          T.getKind() == TemplateName::QualifiedTemplate ||
-          T.getKind() == TemplateName::DeducedTemplate ||
-          T.getKind() == TemplateName::AssumedTemplate) &&
-         "Unexpected template name for TemplateSpecializationType");
-
   auto *TemplateArgs =
       const_cast<TemplateArgument *>(template_arguments().data());
   for (const TemplateArgument &Arg : Args) {
@@ -4690,15 +4644,17 @@ bool clang::TemplateSpecializationType::isSugared() const {
 
 void TemplateSpecializationType::Profile(llvm::FoldingSetNodeID &ID,
                                          const ASTContext &Ctx) {
-  Profile(ID, Template, template_arguments(),
+  Profile(ID, getKeyword(), Template, template_arguments(),
           isSugared() ? desugar() : QualType(), Ctx);
 }
 
 void TemplateSpecializationType::Profile(llvm::FoldingSetNodeID &ID,
+                                         ElaboratedTypeKeyword Keyword,
                                          TemplateName T,
                                          ArrayRef<TemplateArgument> Args,
                                          QualType Underlying,
                                          const ASTContext &Context) {
+  ID.AddInteger(llvm::to_underlying(Keyword));
   T.Profile(ID);
   Underlying.Profile(ID);
 
@@ -5105,7 +5061,6 @@ bool Type::canHaveNullability(bool ResultIfUnknown) const {
   case Type::SubstTemplateTypeParmPack:
   case Type::SubstBuiltinTemplatePack:
   case Type::DependentName:
-  case Type::DependentTemplateSpecialization:
   case Type::Auto:
     return ResultIfUnknown;
 
diff --git a/clang/lib/AST/TypeLoc.cpp b/clang/lib/AST/TypeLoc.cpp
index 3e9597f..55476e2 100644
--- a/clang/lib/AST/TypeLoc.cpp
+++ b/clang/lib/AST/TypeLoc.cpp
@@ -477,8 +477,6 @@ NestedNameSpecifierLoc TypeLoc::getPrefix() const {
     return castAs<DependentNameTypeLoc>().getQualifierLoc();
   case TypeLoc::TemplateSpecialization:
     return castAs<TemplateSpecializationTypeLoc>().getQualifierLoc();
-  case TypeLoc::DependentTemplateSpecialization:
-    return castAs<DependentTemplateSpecializationTypeLoc>().getQualifierLoc();
   case TypeLoc::DeducedTemplateSpecialization:
     return castAs<DeducedTemplateSpecializationTypeLoc>().getQualifierLoc();
   case TypeLoc::Enum:
@@ -505,13 +503,6 @@ SourceLocation TypeLoc::getNonPrefixBeginLoc() const {
       Loc = TL.getTemplateNameLoc();
     return Loc;
   }
-  case TypeLoc::DependentTemplateSpecialization: {
-    auto TL = castAs<DependentTemplateSpecializationTypeLoc>();
-    SourceLocation Loc = TL.getTemplateKeywordLoc();
-    if (!Loc.isValid())
-      Loc = TL.getTemplateNameLoc();
-    return Loc;
-  }
   case TypeLoc::DeducedTemplateSpecialization: {
     auto TL = castAs<DeducedTemplateSpecializationTypeLoc>();
     SourceLocation Loc = TL.getTemplateKeywordLoc();
@@ -550,12 +541,6 @@ SourceLocation TypeLoc::getNonElaboratedBeginLoc() const {
       return QualifierLoc.getBeginLoc();
     return T.getTemplateNameLoc();
   }
-  case TypeLoc::DependentTemplateSpecialization: {
-    auto T = castAs<DependentTemplateSpecializationTypeLoc>();
-    if (NestedNameSpecifierLoc QualifierLoc = T.getQualifierLoc())
-      return QualifierLoc.getBeginLoc();
-    return T.getTemplateNameLoc();
-  }
   case TypeLoc::DeducedTemplateSpecialization: {
     auto T = castAs<DeducedTemplateSpecializationTypeLoc>();
     if (NestedNameSpecifierLoc QualifierLoc = T.getQualifierLoc())
@@ -690,20 +675,6 @@ void DependentNameTypeLoc::initializeLocal(ASTContext &Context,
   setNameLoc(Loc);
 }
 
-void
-DependentTemplateSpecializationTypeLoc::initializeLocal(ASTContext &Context,
-                                                        SourceLocation Loc) {
-  initializeElaboratedKeyword(*this, Loc);
-  setQualifierLoc(initializeQualifier(
-      Context, getTypePtr()->getDependentTemplateName().getQualifier(), Loc));
-  setTemplateKeywordLoc(Loc);
-  setTemplateNameLoc(Loc);
-  setLAngleLoc(Loc);
-  setRAngleLoc(Loc);
-  TemplateSpecializationTypeLoc::initializeArgLocs(
-      Context, getTypePtr()->template_arguments(), getArgInfos(), Loc);
-}
-
 void TemplateSpecializationTypeLoc::set(SourceLocation ElaboratedKeywordLoc,
                                         NestedNameSpecifierLoc QualifierLoc,
                                         SourceLocation TemplateKeywordLoc,
@@ -949,8 +920,5 @@ AutoTypeLoc TypeLoc::getContainedAutoTypeLoc() const {
 SourceLocation TypeLoc::getTemplateKeywordLoc() const {
   if (const auto TSTL = getAsAdjusted<TemplateSpecializationTypeLoc>())
     return TSTL.getTemplateKeywordLoc();
-  if (const auto DTSTL =
-          getAsAdjusted<DependentTemplateSpecializationTypeLoc>())
-    return DTSTL.getTemplateKeywordLoc();
   return SourceLocation();
 }
diff --git a/clang/lib/AST/TypePrinter.cpp b/clang/lib/AST/TypePrinter.cpp
index 54ca42d..cd59678 100644
--- a/clang/lib/AST/TypePrinter.cpp
+++ b/clang/lib/AST/TypePrinter.cpp
@@ -237,7 +237,6 @@ bool TypePrinter::canPrefixQualifiers(const Type *T,
     case Type::TemplateSpecialization:
     case Type::InjectedClassName:
     case Type::DependentName:
-    case Type::DependentTemplateSpecialization:
     case Type::ObjCObject:
     case Type::ObjCTypeParam:
     case Type::ObjCInterface:
@@ -1836,22 +1835,6 @@ void TypePrinter::printDependentNameBefore(const DependentNameType *T,
 void TypePrinter::printDependentNameAfter(const DependentNameType *T,
                                           raw_ostream &OS) {}
 
-void TypePrinter::printDependentTemplateSpecializationBefore(
-        const DependentTemplateSpecializationType *T, raw_ostream &OS) {
-  IncludeStrongLifetimeRAII Strong(Policy);
-
-  OS << TypeWithKeyword::getKeywordName(T->getKeyword());
-  if (T->getKeyword() != ElaboratedTypeKeyword::None)
-    OS << " ";
-
-  T->getDependentTemplateName().print(OS, Policy);
-  printTemplateArgumentList(OS, T->template_arguments(), Policy);
-  spaceBeforePlaceHolder(OS);
-}
-
-void TypePrinter::printDependentTemplateSpecializationAfter(
-        const DependentTemplateSpecializationType *T, raw_ostream &OS) {}
-
 void TypePrinter::printPackExpansionBefore(const PackExpansionType *T,
                                            raw_ostream &OS) {
   printBefore(T->getPattern(), OS);
diff --git a/clang/lib/ASTMatchers/ASTMatchersInternal.cpp b/clang/lib/ASTMatchers/ASTMatchersInternal.cpp
index 653b381..1f0e007 100644
--- a/clang/lib/ASTMatchers/ASTMatchersInternal.cpp
+++ b/clang/lib/ASTMatchers/ASTMatchersInternal.cpp
@@ -1109,8 +1109,6 @@ const AstTypeMatcher<TemplateTypeParmType> templateTypeParmType;
 const AstTypeMatcher<InjectedClassNameType> injectedClassNameType;
 const AstTypeMatcher<DecayedType> decayedType;
 const AstTypeMatcher<DependentNameType> dependentNameType;
-const AstTypeMatcher<DependentTemplateSpecializationType>
-    dependentTemplateSpecializationType;
 AST_TYPELOC_TRAVERSE_MATCHER_DEF(hasElementType,
                                  AST_POLYMORPHIC_SUPPORTED_TYPES(ArrayType,
                                                                  ComplexType));
diff --git a/clang/lib/ASTMatchers/Dynamic/Registry.cpp b/clang/lib/ASTMatchers/Dynamic/Registry.cpp
index 48a7b91..01c03f3 100644
--- a/clang/lib/ASTMatchers/Dynamic/Registry.cpp
+++ b/clang/lib/ASTMatchers/Dynamic/Registry.cpp
@@ -222,7 +222,6 @@ RegistryMaps::RegistryMaps() {
   REGISTER_MATCHER(declRefExpr);
   REGISTER_MATCHER(dependentNameType);
   REGISTER_MATCHER(dependentScopeDeclRefExpr);
-  REGISTER_MATCHER(dependentTemplateSpecializationType);
   REGISTER_MATCHER(declStmt);
   REGISTER_MATCHER(declaratorDecl);
   REGISTER_MATCHER(decltypeType);
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index 823bdcc..d25ce31 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -2339,10 +2339,10 @@ llvm::ConstantInt *CodeGenModule::CreateCrossDsoCfiTypeId(llvm::Metadata *MD) {
   return llvm::ConstantInt::get(Int64Ty, llvm::MD5Hash(MDS->getString()));
 }
 
-// Generalize pointer types to a void pointer with the qualifiers of the
-// originally pointed-to type, e.g. 'const char *' and 'char * const *'
-// generalize to 'const void *' while 'char *' and 'const char **' generalize to
-// 'void *'.
+// If `GeneralizePointers` is true, generalizes types to a void pointer with the
+// qualifiers of the originally pointed-to type, e.g. 'const char *' and 'char *
+// const *' generalize to 'const void *' while 'char *' and 'const char **'
+// generalize to 'void *'.
 static QualType GeneralizeType(ASTContext &Ctx, QualType Ty,
                                bool GeneralizePointers) {
   // TODO: Add other generalizations.
diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
index f4baf8c..82b71e3 100644
--- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
@@ -4304,9 +4304,11 @@ Value *CodeGenFunction::EmitSMELd1St1(const SVETypeFlags &TypeFlags,
   // size in bytes.
   if (Ops.size() == 5) {
     Function *StreamingVectorLength =
-        CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb);
+        CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsd);
     llvm::Value *StreamingVectorLengthCall =
-        Builder.CreateCall(StreamingVectorLength);
+        Builder.CreateMul(Builder.CreateCall(StreamingVectorLength),
+                          llvm::ConstantInt::get(Int64Ty, 8), "svl",
+                          /* HasNUW */ true, /* HasNSW */ true);
     llvm::Value *Mulvl =
         Builder.CreateMul(StreamingVectorLengthCall, Ops[4], "mulvl");
     // The type of the ptr parameter is void *, so use Int8Ty here.
@@ -4918,6 +4920,26 @@ Value *CodeGenFunction::EmitAArch64SMEBuiltinExpr(unsigned BuiltinID,
   // Handle builtins which require their multi-vector operands to be swapped
   swapCommutativeSMEOperands(BuiltinID, Ops);
 
+  auto isCntsBuiltin = [&]() {
+    switch (BuiltinID) {
+    default:
+      return 0;
+    case SME::BI__builtin_sme_svcntsb:
+      return 8;
+    case SME::BI__builtin_sme_svcntsh:
+      return 4;
+    case SME::BI__builtin_sme_svcntsw:
+      return 2;
+    }
+  };
+
+  if (auto Mul = isCntsBuiltin()) {
+    llvm::Value *Cntd =
+        Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsd));
+    return Builder.CreateMul(Cntd, llvm::ConstantInt::get(Int64Ty, Mul),
+                             "mulsvl", /* HasNUW */ true, /* HasNSW */ true);
+  }
+
   // Should not happen!
   if (Builtin->LLVMIntrinsic == 0)
     return nullptr;
diff --git a/clang/lib/Format/CMakeLists.txt b/clang/lib/Format/CMakeLists.txt
index 24f435d..50c0683 100644
--- a/clang/lib/Format/CMakeLists.txt
+++ b/clang/lib/Format/CMakeLists.txt
@@ -13,6 +13,7 @@ add_clang_library(clangFormat
   MacroExpander.cpp
   MatchFilePath.cpp
   NamespaceEndCommentsFixer.cpp
+  NumericLiteralCaseFixer.cpp
   NumericLiteralInfo.cpp
   ObjCPropertyAttributeOrderFixer.cpp
   QualifierAlignmentFixer.cpp
diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp
index e3b22cd..f095d2c 100644
--- a/clang/lib/Format/Format.cpp
+++ b/clang/lib/Format/Format.cpp
@@ -16,6 +16,7 @@
 #include "DefinitionBlockSeparator.h"
 #include "IntegerLiteralSeparatorFixer.h"
 #include "NamespaceEndCommentsFixer.h"
+#include "NumericLiteralCaseFixer.h"
 #include "ObjCPropertyAttributeOrderFixer.h"
 #include "QualifierAlignmentFixer.h"
 #include "SortJavaScriptImports.h"
@@ -472,6 +473,25 @@ struct ScalarEnumerationTraits<FormatStyle::NamespaceIndentationKind> {
   }
 };
 
+template <>
+struct ScalarEnumerationTraits<FormatStyle::NumericLiteralComponentStyle> {
+  static void enumeration(IO &IO,
+                          FormatStyle::NumericLiteralComponentStyle &Value) {
+    IO.enumCase(Value, "Leave", FormatStyle::NLCS_Leave);
+    IO.enumCase(Value, "Upper", FormatStyle::NLCS_Upper);
+    IO.enumCase(Value, "Lower", FormatStyle::NLCS_Lower);
+  }
+};
+
+template <> struct MappingTraits<FormatStyle::NumericLiteralCaseStyle> {
+  static void mapping(IO &IO, FormatStyle::NumericLiteralCaseStyle &Value) {
+    IO.mapOptional("ExponentLetter", Value.ExponentLetter);
+    IO.mapOptional("HexDigit", Value.HexDigit);
+    IO.mapOptional("Prefix", Value.Prefix);
+    IO.mapOptional("Suffix", Value.Suffix);
+  }
+};
+
 template <> struct ScalarEnumerationTraits<FormatStyle::OperandAlignmentStyle> {
   static void enumeration(IO &IO, FormatStyle::OperandAlignmentStyle &Value) {
     IO.enumCase(Value, "DontAlign", FormatStyle::OAS_DontAlign);
@@ -1121,6 +1141,7 @@ template <> struct MappingTraits<FormatStyle> {
     IO.mapOptional("MaxEmptyLinesToKeep", Style.MaxEmptyLinesToKeep);
     IO.mapOptional("NamespaceIndentation", Style.NamespaceIndentation);
     IO.mapOptional("NamespaceMacros", Style.NamespaceMacros);
+    IO.mapOptional("NumericLiteralCase", Style.NumericLiteralCase);
     IO.mapOptional("ObjCBinPackProtocolList", Style.ObjCBinPackProtocolList);
     IO.mapOptional("ObjCBlockIndentWidth", Style.ObjCBlockIndentWidth);
     IO.mapOptional("ObjCBreakBeforeNestedBlockParam",
@@ -1653,6 +1674,10 @@ FormatStyle getLLVMStyle(FormatStyle::LanguageKind Language) {
   LLVMStyle.LineEnding = FormatStyle::LE_DeriveLF;
   LLVMStyle.MaxEmptyLinesToKeep = 1;
   LLVMStyle.NamespaceIndentation = FormatStyle::NI_None;
+  LLVMStyle.NumericLiteralCase = {/*ExponentLetter=*/FormatStyle::NLCS_Leave,
+                                  /*HexDigit=*/FormatStyle::NLCS_Leave,
+                                  /*Prefix=*/FormatStyle::NLCS_Leave,
+                                  /*Suffix=*/FormatStyle::NLCS_Leave};
   LLVMStyle.ObjCBinPackProtocolList = FormatStyle::BPS_Auto;
   LLVMStyle.ObjCBlockIndentWidth = 2;
   LLVMStyle.ObjCBreakBeforeNestedBlockParam = true;
@@ -3890,6 +3915,10 @@ reformat(const FormatStyle &Style, StringRef Code,
     return IntegerLiteralSeparatorFixer().process(Env, Expanded);
   });
 
+  Passes.emplace_back([&](const Environment &Env) {
+    return NumericLiteralCaseFixer().process(Env, Expanded);
+  });
+
   if (Style.isCpp()) {
     if (Style.QualifierAlignment != FormatStyle::QAS_Leave)
       addQualifierAlignmentFixerPasses(Expanded, Passes);
diff --git a/clang/lib/Format/NumericLiteralCaseFixer.cpp b/clang/lib/Format/NumericLiteralCaseFixer.cpp
new file mode 100644
index 0000000..b58b3c7
--- /dev/null
+++ b/clang/lib/Format/NumericLiteralCaseFixer.cpp
@@ -0,0 +1,177 @@
+//===--- NumericLiteralCaseFixer.cpp ----------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file implements NumericLiteralCaseFixer that standardizes character
+/// case within numeric literals.
+///
+//===----------------------------------------------------------------------===//
+
+#include "NumericLiteralCaseFixer.h"
+#include "NumericLiteralInfo.h"
+
+#include "llvm/ADT/StringExtras.h"
+
+#include <algorithm>
+
+namespace clang {
+namespace format {
+
+static bool isNumericLiteralCaseFixerNeeded(const FormatStyle &Style) {
+  // Check if language is supported.
+  switch (Style.Language) {
+  case FormatStyle::LK_C:
+  case FormatStyle::LK_Cpp:
+  case FormatStyle::LK_ObjC:
+  case FormatStyle::LK_CSharp:
+  case FormatStyle::LK_Java:
+  case FormatStyle::LK_JavaScript:
+    break;
+  default:
+    return false;
+  }
+
+  // Check if style options are set.
+  const auto &Option = Style.NumericLiteralCase;
+  const auto Leave = FormatStyle::NLCS_Leave;
+  return Option.Prefix != Leave || Option.HexDigit != Leave ||
+         Option.ExponentLetter != Leave || Option.Suffix != Leave;
+}
+
+static std::string
+transformComponent(StringRef Component,
+                   FormatStyle::NumericLiteralComponentStyle ConfigValue) {
+  switch (ConfigValue) {
+  case FormatStyle::NLCS_Upper:
+    return Component.upper();
+  case FormatStyle::NLCS_Lower:
+    return Component.lower();
+  default:
+    // Covers FormatStyle::NLCS_Leave.
+    return Component.str();
+  }
+}
+
+/// Test if Suffix matches a C++ literal reserved by the library.
+/// Matches against all suffixes reserved in the C++23 standard.
+static bool matchesReservedSuffix(StringRef Suffix) {
+  static constexpr std::array<StringRef, 11> SortedReservedSuffixes = {
+      "d", "h", "i", "if", "il", "min", "ms", "ns", "s", "us", "y",
+  };
+
+  // This can be static_assert when we have access to constexpr is_sorted in
+  // C++ 20.
+  assert(llvm::is_sorted(SortedReservedSuffixes) &&
+         "Must be sorted as precondition for lower_bound().");
+
+  auto entry = llvm::lower_bound(SortedReservedSuffixes, Suffix);
+  if (entry == SortedReservedSuffixes.cend())
+    return false;
+  return *entry == Suffix;
+}
+
+static std::string format(StringRef NumericLiteral, const FormatStyle &Style) {
+  const char Separator = Style.isCpp() ? '\'' : '_';
+  const NumericLiteralInfo Info(NumericLiteral, Separator);
+  const bool HasBaseLetter = Info.BaseLetterPos != StringRef::npos;
+  const bool HasExponent = Info.ExponentLetterPos != StringRef::npos;
+  const bool HasSuffix = Info.SuffixPos != StringRef::npos;
+
+  std::string Formatted;
+
+  if (HasBaseLetter) {
+    Formatted +=
+        transformComponent(NumericLiteral.take_front(1 + Info.BaseLetterPos),
+                           Style.NumericLiteralCase.Prefix);
+  }
+  // Reformat this slice as HexDigit whether or not the digit has hexadecimal
+  // characters because binary/decimal/octal digits are unchanged.
+  Formatted += transformComponent(
+      NumericLiteral.slice(HasBaseLetter ? 1 + Info.BaseLetterPos : 0,
+                           HasExponent ? Info.ExponentLetterPos
+                           : HasSuffix ? Info.SuffixPos
+                                       : NumericLiteral.size()),
+      Style.NumericLiteralCase.HexDigit);
+
+  if (HasExponent) {
+    Formatted += transformComponent(
+        NumericLiteral.slice(Info.ExponentLetterPos,
+                             HasSuffix ? Info.SuffixPos
+                                       : NumericLiteral.size()),
+        Style.NumericLiteralCase.ExponentLetter);
+  }
+
+  if (HasSuffix) {
+    StringRef Suffix = NumericLiteral.drop_front(Info.SuffixPos);
+    if (matchesReservedSuffix(Suffix) || Suffix.front() == '_') {
+      // In C++, it is idiomatic, but NOT standardized to define user-defined
+      // literals with a leading '_'. Omit user defined literals and standard
+      // reserved suffixes from transformation.
+      Formatted += Suffix.str();
+    } else {
+      Formatted += transformComponent(Suffix, Style.NumericLiteralCase.Suffix);
+    }
+  }
+
+  return Formatted;
+}
+
+std::pair<tooling::Replacements, unsigned>
+NumericLiteralCaseFixer::process(const Environment &Env,
+                                 const FormatStyle &Style) {
+  if (!isNumericLiteralCaseFixerNeeded(Style))
+    return {};
+
+  const auto &SourceMgr = Env.getSourceManager();
+  AffectedRangeManager AffectedRangeMgr(SourceMgr, Env.getCharRanges());
+
+  const auto ID = Env.getFileID();
+  const auto LangOpts = getFormattingLangOpts(Style);
+  Lexer Lex(ID, SourceMgr.getBufferOrFake(ID), SourceMgr, LangOpts);
+  Lex.SetCommentRetentionState(true);
+
+  Token Tok;
+  tooling::Replacements Result;
+
+  for (bool Skip = false; !Lex.LexFromRawLexer(Tok);) {
+    // Skip tokens that are too small to contain a formattable literal.
+    // Size=2 is the smallest possible literal that could contain formattable
+    // components, for example "1u".
+    auto Length = Tok.getLength();
+    if (Length < 2)
+      continue;
+
+    // Service clang-format off/on comments.
+    auto Location = Tok.getLocation();
+    auto Text = StringRef(SourceMgr.getCharacterData(Location), Length);
+    if (Tok.is(tok::comment)) {
+      if (isClangFormatOff(Text))
+        Skip = true;
+      else if (isClangFormatOn(Text))
+        Skip = false;
+      continue;
+    }
+
+    if (Skip || Tok.isNot(tok::numeric_constant) ||
+        !AffectedRangeMgr.affectsCharSourceRange(
+            CharSourceRange::getCharRange(Location, Tok.getEndLoc()))) {
+      continue;
+    }
+
+    const auto Formatted = format(Text, Style);
+    if (Formatted != Text) {
+      cantFail(Result.add(
+          tooling::Replacement(SourceMgr, Location, Length, Formatted)));
+    }
+  }
+
+  return {Result, 0};
+}
+
+} // namespace format
+} // namespace clang
diff --git a/clang/lib/Format/NumericLiteralCaseFixer.h b/clang/lib/Format/NumericLiteralCaseFixer.h
new file mode 100644
index 0000000..ac3ac30
--- /dev/null
+++ b/clang/lib/Format/NumericLiteralCaseFixer.h
@@ -0,0 +1,32 @@
+//===--- NumericLiteralCaseFixer.h ------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file declares NumericLiteralCaseFixer that standardizes character case
+/// within numeric literals.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_FORMAT_NUMERICLITERALCASEFIXER_H
+#define LLVM_CLANG_LIB_FORMAT_NUMERICLITERALCASEFIXER_H
+
+#include "TokenAnalyzer.h"
+
+namespace clang {
+namespace format {
+
+class NumericLiteralCaseFixer {
+public:
+  std::pair<tooling::Replacements, unsigned> process(const Environment &Env,
+                                                     const FormatStyle &Style);
+};
+
+} // end namespace format
+} // end namespace clang
+
+#endif
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index 931766d..76131081 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -1975,9 +1975,10 @@ bool CompilerInvocation::ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args,
   }
 
   const llvm::Triple::ArchType DebugEntryValueArchs[] = {
-      llvm::Triple::x86, llvm::Triple::x86_64, llvm::Triple::aarch64,
-      llvm::Triple::arm, llvm::Triple::armeb, llvm::Triple::mips,
-      llvm::Triple::mipsel, llvm::Triple::mips64, llvm::Triple::mips64el};
+      llvm::Triple::x86,     llvm::Triple::x86_64, llvm::Triple::aarch64,
+      llvm::Triple::arm,     llvm::Triple::armeb,  llvm::Triple::mips,
+      llvm::Triple::mipsel,  llvm::Triple::mips64, llvm::Triple::mips64el,
+      llvm::Triple::riscv32, llvm::Triple::riscv64};
 
   if (Opts.OptimizationLevel > 0 && Opts.hasReducedDebugInfo() &&
       llvm::is_contained(DebugEntryValueArchs, T.getArch()))
diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp
index e65c8b4..edf0a09 100644
--- a/clang/lib/Frontend/InitPreprocessor.cpp
+++ b/clang/lib/Frontend/InitPreprocessor.cpp
@@ -766,7 +766,7 @@ static void InitializeCPlusPlusFeatureTestMacros(const LangOptions &LangOpts,
   Builder.defineMacro("__cpp_pack_indexing", "202311L");
   Builder.defineMacro("__cpp_deleted_function", "202403L");
   Builder.defineMacro("__cpp_variadic_friend", "202403L");
-  // Builder.defineMacro("__cpp_trivial_relocatability", "202502L");
+  Builder.defineMacro("__cpp_trivial_relocatability", "202502L");
 
   if (LangOpts.Char8)
     Builder.defineMacro("__cpp_char8_t", "202207L");
diff --git a/clang/lib/Headers/f16cintrin.h b/clang/lib/Headers/f16cintrin.h
index ede67af..8396533 100644
--- a/clang/lib/Headers/f16cintrin.h
+++ b/clang/lib/Headers/f16cintrin.h
@@ -20,6 +20,14 @@
 #define __DEFAULT_FN_ATTRS256 \
   __attribute__((__always_inline__, __nodebug__, __target__("f16c"), __min_vector_width__(256)))
 
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr
+#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr
+#else
+#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
+#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256
+#endif
+
 /* NOTE: Intel documents the 128-bit versions of these as being in emmintrin.h,
  * but that's because icc can emulate these without f16c using a library call.
  * Since we don't do that let's leave these in f16cintrin.h.
@@ -35,7 +43,7 @@
 /// \param __a
 ///    A 16-bit half-precision float value.
 /// \returns The converted 32-bit float value.
-static __inline float __DEFAULT_FN_ATTRS128
+static __inline float __DEFAULT_FN_ATTRS128_CONSTEXPR
 _cvtsh_ss(unsigned short __a)
 {
   return (float)__builtin_bit_cast(__fp16, __a);
@@ -104,7 +112,7 @@ _cvtsh_ss(unsigned short __a)
 ///    A 128-bit vector containing 16-bit half-precision float values. The lower
 ///    64 bits are used in the conversion.
 /// \returns A 128-bit vector of [4 x float] containing converted float values.
-static __inline __m128 __DEFAULT_FN_ATTRS128
+static __inline __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_cvtph_ps(__m128i __a)
 {
   typedef __fp16 __v4fp16 __attribute__((__vector_size__(8)));
@@ -151,7 +159,7 @@ _mm_cvtph_ps(__m128i __a)
 ///    converted to 32-bit single-precision float values.
 /// \returns A vector of [8 x float] containing the converted 32-bit
 ///    single-precision float values.
-static __inline __m256 __DEFAULT_FN_ATTRS256
+static __inline __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_cvtph_ps(__m128i __a)
 {
   typedef __fp16 __v8fp16 __attribute__((__vector_size__(16), __aligned__(16)));
@@ -161,5 +169,7 @@ _mm256_cvtph_ps(__m128i __a)
 
 #undef __DEFAULT_FN_ATTRS128
 #undef __DEFAULT_FN_ATTRS256
+#undef __DEFAULT_FN_ATTRS128_CONSTEXPR
+#undef __DEFAULT_FN_ATTRS256_CONSTEXPR
 
 #endif /* __F16CINTRIN_H */
diff --git a/clang/lib/Interpreter/Interpreter.cpp b/clang/lib/Interpreter/Interpreter.cpp
index 043e0c1..84f1c36 100644
--- a/clang/lib/Interpreter/Interpreter.cpp
+++ b/clang/lib/Interpreter/Interpreter.cpp
@@ -647,6 +647,8 @@ llvm::Error Interpreter::CreateExecutor(JITConfig Config) {
     auto JTMB = createJITTargetMachineBuilder(TT);
     if (!JTMB)
       return JTMB.takeError();
+    if (Config.CM)
+      JTMB->setCodeModel(Config.CM);
     auto JB = IncrementalExecutor::createDefaultJITBuilder(std::move(*JTMB));
     if (!JB)
       return JB.takeError();
diff --git a/clang/lib/Sema/HeuristicResolver.cpp b/clang/lib/Sema/HeuristicResolver.cpp
index 6d79f3f..a5d1f5d 100644
--- a/clang/lib/Sema/HeuristicResolver.cpp
+++ b/clang/lib/Sema/HeuristicResolver.cpp
@@ -13,7 +13,6 @@
 #include "clang/AST/ExprCXX.h"
 #include "clang/AST/TemplateBase.h"
 #include "clang/AST/Type.h"
-#include "llvm/ADT/identity.h"
 
 namespace clang {
 
@@ -42,8 +41,8 @@ public:
   resolveUsingValueDecl(const UnresolvedUsingValueDecl *UUVD);
   std::vector<const NamedDecl *>
   resolveDependentNameType(const DependentNameType *DNT);
-  std::vector<const NamedDecl *> resolveTemplateSpecializationType(
-      const DependentTemplateSpecializationType *DTST);
+  std::vector<const NamedDecl *>
+  resolveTemplateSpecializationType(const TemplateSpecializationType *TST);
   QualType resolveNestedNameSpecifierToType(NestedNameSpecifier NNS);
   QualType getPointeeType(QualType T);
   std::vector<const NamedDecl *>
@@ -374,8 +373,9 @@ HeuristicResolverImpl::resolveDependentNameType(const DependentNameType *DNT) {
 
 std::vector<const NamedDecl *>
 HeuristicResolverImpl::resolveTemplateSpecializationType(
-    const DependentTemplateSpecializationType *DTST) {
-  const DependentTemplateStorage &DTN = DTST->getDependentTemplateName();
+    const TemplateSpecializationType *TST) {
+  const DependentTemplateStorage &DTN =
+      *TST->getTemplateName().getAsDependentTemplateName();
   return resolveDependentMember(
       resolveNestedNameSpecifierToType(DTN.getQualifier()),
       DTN.getName().getIdentifier(), TemplateFilter);
@@ -562,7 +562,7 @@ HeuristicResolverImpl::getFunctionProtoTypeLoc(const Expr *Fn) {
     // In some edge cases the AST can contain a "trivial" FunctionProtoTypeLoc
     // which has null parameters. Avoid these as they don't contain useful
     // information.
-    if (llvm::all_of(F.getParams(), llvm::identity<ParmVarDecl *>()))
+    if (!llvm::is_contained(F.getParams(), nullptr))
       return F;
   }
 
@@ -597,8 +597,8 @@ std::vector<const NamedDecl *> HeuristicResolver::resolveDependentNameType(
 }
 std::vector<const NamedDecl *>
 HeuristicResolver::resolveTemplateSpecializationType(
-    const DependentTemplateSpecializationType *DTST) const {
-  return HeuristicResolverImpl(Ctx).resolveTemplateSpecializationType(DTST);
+    const TemplateSpecializationType *TST) const {
+  return HeuristicResolverImpl(Ctx).resolveTemplateSpecializationType(TST);
 }
 QualType HeuristicResolver::resolveNestedNameSpecifierToType(
     NestedNameSpecifier NNS) const {
diff --git a/clang/lib/Sema/SemaAttr.cpp b/clang/lib/Sema/SemaAttr.cpp
index 3eed6ad..8411a3d 100644
--- a/clang/lib/Sema/SemaAttr.cpp
+++ b/clang/lib/Sema/SemaAttr.cpp
@@ -157,8 +157,8 @@ void Sema::inferGslPointerAttribute(TypedefNameDecl *TD) {
     if (auto *TST =
             dyn_cast<TemplateSpecializationType>(Canonical.getTypePtr())) {
 
-      RD = dyn_cast_or_null<CXXRecordDecl>(
-          TST->getTemplateName().getAsTemplateDecl()->getTemplatedDecl());
+      if (const auto *TD = TST->getTemplateName().getAsTemplateDecl())
+        RD = dyn_cast_or_null<CXXRecordDecl>(TD->getTemplatedDecl());
     }
   }
 
diff --git a/clang/lib/Sema/SemaCXXScopeSpec.cpp b/clang/lib/Sema/SemaCXXScopeSpec.cpp
index 437c69a..e89243b 100644
--- a/clang/lib/Sema/SemaCXXScopeSpec.cpp
+++ b/clang/lib/Sema/SemaCXXScopeSpec.cpp
@@ -896,64 +896,15 @@ bool Sema::ActOnCXXNestedNameSpecifier(Scope *S,
   if (SS.isInvalid())
     return true;
 
-  TemplateName Template = OpaqueTemplate.get();
-
   // Translate the parser's template argument list in our AST format.
   TemplateArgumentListInfo TemplateArgs(LAngleLoc, RAngleLoc);
   translateTemplateArguments(TemplateArgsIn, TemplateArgs);
 
-  DependentTemplateName *DTN = Template.getAsDependentTemplateName();
-  if (DTN && DTN->getName().getIdentifier()) {
-    // Handle a dependent template specialization for which we cannot resolve
-    // the template name.
-    assert(DTN->getQualifier() == SS.getScopeRep());
-    QualType T = Context.getDependentTemplateSpecializationType(
-        ElaboratedTypeKeyword::None,
-        {SS.getScopeRep(), DTN->getName().getIdentifier(),
-         TemplateKWLoc.isValid()},
-        TemplateArgs.arguments());
-
-    // Create source-location information for this type.
-    TypeLocBuilder Builder;
-    DependentTemplateSpecializationTypeLoc SpecTL
-      = Builder.push<DependentTemplateSpecializationTypeLoc>(T);
-    SpecTL.setElaboratedKeywordLoc(SourceLocation());
-    SpecTL.setQualifierLoc(SS.getWithLocInContext(Context));
-    SpecTL.setTemplateKeywordLoc(TemplateKWLoc);
-    SpecTL.setTemplateNameLoc(TemplateNameLoc);
-    SpecTL.setLAngleLoc(LAngleLoc);
-    SpecTL.setRAngleLoc(RAngleLoc);
-    for (unsigned I = 0, N = TemplateArgs.size(); I != N; ++I)
-      SpecTL.setArgLocInfo(I, TemplateArgs[I].getLocInfo());
-
-    SS.clear();
-    SS.Make(Context, Builder.getTypeLocInContext(Context, T), CCLoc);
-    return false;
-  }
-
-  // If we assumed an undeclared identifier was a template name, try to
-  // typo-correct it now.
-  if (Template.getAsAssumedTemplateName() &&
-      resolveAssumedTemplateNameAsType(S, Template, TemplateNameLoc))
-    return true;
-
-  TemplateDecl *TD = Template.getAsTemplateDecl();
-  if (Template.getAsOverloadedTemplate() || DTN ||
-      isa<FunctionTemplateDecl>(TD) || isa<VarTemplateDecl>(TD)) {
-    SourceRange R(TemplateNameLoc, RAngleLoc);
-    if (SS.getRange().isValid())
-      R.setBegin(SS.getRange().getBegin());
-
-    Diag(CCLoc, diag::err_non_type_template_in_nested_name_specifier)
-        << isa_and_nonnull<VarTemplateDecl>(TD) << Template << R;
-    NoteAllFoundTemplates(Template);
-    return true;
-  }
-
   // We were able to resolve the template name to an actual template.
   // Build an appropriate nested-name-specifier.
-  QualType T = CheckTemplateIdType(ElaboratedTypeKeyword::None, Template,
-                                   TemplateNameLoc, TemplateArgs);
+  QualType T = CheckTemplateIdType(
+      ElaboratedTypeKeyword::None, OpaqueTemplate.get(), TemplateNameLoc,
+      TemplateArgs, /*Scope=*/S, /*ForNestedNameSpecifier=*/true);
   if (T.isNull())
     return true;
 
@@ -961,7 +912,7 @@ bool Sema::ActOnCXXNestedNameSpecifier(Scope *S,
   // nested name specifiers.
   if (!T->isDependentType() && !isa<TagType>(T.getCanonicalType())) {
     Diag(TemplateNameLoc, diag::err_nested_name_spec_non_tag) << T;
-    NoteAllFoundTemplates(Template);
+    NoteAllFoundTemplates(OpaqueTemplate.get());
     return true;
   }
 
diff --git a/clang/lib/Sema/SemaCoroutine.cpp b/clang/lib/Sema/SemaCoroutine.cpp
index cc03616..229e91e 100644
--- a/clang/lib/Sema/SemaCoroutine.cpp
+++ b/clang/lib/Sema/SemaCoroutine.cpp
@@ -90,7 +90,8 @@ static QualType lookupPromiseType(Sema &S, const FunctionDecl *FD,
 
   // Build the template-id.
   QualType CoroTrait = S.CheckTemplateIdType(
-      ElaboratedTypeKeyword::None, TemplateName(CoroTraits), KwLoc, Args);
+      ElaboratedTypeKeyword::None, TemplateName(CoroTraits), KwLoc, Args,
+      /*Scope=*/nullptr, /*ForNestedNameSpecifier=*/false);
   if (CoroTrait.isNull())
     return QualType();
   if (S.RequireCompleteType(KwLoc, CoroTrait,
@@ -163,7 +164,8 @@ static QualType lookupCoroutineHandleType(Sema &S, QualType PromiseType,
 
   // Build the template-id.
   QualType CoroHandleType = S.CheckTemplateIdType(
-      ElaboratedTypeKeyword::None, TemplateName(CoroHandle), Loc, Args);
+      ElaboratedTypeKeyword::None, TemplateName(CoroHandle), Loc, Args,
+      /*Scope=*/nullptr, /*ForNestedNameSpecifier=*/false);
   if (CoroHandleType.isNull())
     return QualType();
   if (S.RequireCompleteType(Loc, CoroHandleType,
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index 7c1459e..2b0ddb5 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -6392,12 +6392,6 @@ bool Sema::diagnoseQualifiedDeclaration(CXXScopeSpec &SS, DeclContext *DC,
       NextTL =
           TL.castAs<DependentNameTypeLoc>().getQualifierLoc().getAsTypeLoc();
       break;
-    case TypeLoc::DependentTemplateSpecialization: {
-      auto TST = TL.castAs<DependentTemplateSpecializationTypeLoc>();
-      TemplateKeywordLoc = TST.getTemplateKeywordLoc();
-      NextTL = TST.getQualifierLoc().getAsTypeLoc();
-      break;
-    }
     default:
       break;
     }
diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp
index 63ce87b..8008c7b 100644
--- a/clang/lib/Sema/SemaDeclCXX.cpp
+++ b/clang/lib/Sema/SemaDeclCXX.cpp
@@ -1138,8 +1138,9 @@ static QualType getStdTrait(Sema &S, SourceLocation Loc, StringRef Trait,
   }
 
   // Build the template-id.
-  QualType TraitTy = S.CheckTemplateIdType(ElaboratedTypeKeyword::None,
-                                           TemplateName(TraitTD), Loc, Args);
+  QualType TraitTy = S.CheckTemplateIdType(
+      ElaboratedTypeKeyword::None, TemplateName(TraitTD), Loc, Args,
+      /*Scope=*/nullptr, /*ForNestedNameSpecifier=*/false);
   if (TraitTy.isNull())
     return QualType();
 
@@ -12315,7 +12316,8 @@ static QualType BuildStdClassTemplate(Sema &S, ClassTemplateDecl *CTD,
   Args.addArgument(TemplateArgumentLoc(TemplateArgument(TypeParam), TSI));
 
   return S.CheckTemplateIdType(ElaboratedTypeKeyword::None, TemplateName(CTD),
-                               Loc, Args);
+                               Loc, Args, /*Scope=*/nullptr,
+                               /*ForNestedNameSpecifier=*/false);
 }
 
 QualType Sema::BuildStdInitializerList(QualType Element, SourceLocation Loc) {
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index bd62ac6..4394442 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -21360,8 +21360,9 @@ ExprResult Sema::CheckPlaceholderExpr(Expr *E) {
     QualType TST;
     {
       SFINAETrap Trap(*this);
-      TST = CheckTemplateIdType(ElaboratedTypeKeyword::None, TN,
-                                NameInfo.getBeginLoc(), TAL);
+      TST = CheckTemplateIdType(
+          ElaboratedTypeKeyword::None, TN, NameInfo.getBeginLoc(), TAL,
+          /*Scope=*/nullptr, /*ForNestedNameSpecifier=*/false);
     }
     if (TST.isNull())
       TST = Context.getTemplateSpecializationType(
diff --git a/clang/lib/Sema/SemaLookup.cpp b/clang/lib/Sema/SemaLookup.cpp
index 54918c5..25728de 100644
--- a/clang/lib/Sema/SemaLookup.cpp
+++ b/clang/lib/Sema/SemaLookup.cpp
@@ -4575,6 +4575,13 @@ static void getNestedNameSpecifierIdentifiers(
       case Type::TemplateSpecialization: {
         TemplateName Name =
             cast<TemplateSpecializationType>(T)->getTemplateName();
+        if (const DependentTemplateName *DTN =
+                Name.getAsDependentTemplateName()) {
+          getNestedNameSpecifierIdentifiers(DTN->getQualifier(), Identifiers);
+          if (const auto *II = DTN->getName().getIdentifier())
+            Identifiers.push_back(II);
+          return;
+        }
         if (const QualifiedTemplateName *QTN =
                 Name.getAsQualifiedTemplateName()) {
           getNestedNameSpecifierIdentifiers(QTN->getQualifier(), Identifiers);
@@ -4584,15 +4591,6 @@ static void getNestedNameSpecifierIdentifiers(
           Identifiers.push_back(TD->getIdentifier());
         return;
       }
-      case Type::DependentTemplateSpecialization: {
-        const DependentTemplateStorage &S =
-            cast<DependentTemplateSpecializationType>(T)
-                ->getDependentTemplateName();
-        getNestedNameSpecifierIdentifiers(S.getQualifier(), Identifiers);
-        // FIXME: Should this dig into the Name as well?
-        // Identifiers.push_back(S.getName().getIdentifier());
-        return;
-      }
       case Type::SubstTemplateTypeParm:
         T = cast<SubstTemplateTypeParmType>(T)
                 ->getReplacementType()
diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp
index 58dae32..d6b25c2 100644
--- a/clang/lib/Sema/SemaTemplate.cpp
+++ b/clang/lib/Sema/SemaTemplate.cpp
@@ -2845,6 +2845,16 @@ TemplateParameterList *Sema::MatchTemplateParametersToScopeSpecifier(
 
     if (const TemplateSpecializationType *TST
                                      = T->getAs<TemplateSpecializationType>()) {
+      TemplateName Name = TST->getTemplateName();
+      if (const auto *DTS = Name.getAsDependentTemplateName()) {
+        // Look one step prior in a dependent template specialization type.
+        if (NestedNameSpecifier NNS = DTS->getQualifier();
+            NNS.getKind() == NestedNameSpecifier::Kind::Type)
+          T = QualType(NNS.getAsType(), 0);
+        else
+          T = QualType();
+        continue;
+      }
       if (TemplateDecl *Template = TST->getTemplateName().getAsTemplateDecl()) {
         if (TypeDecl *Parent = dyn_cast<TypeDecl>(Template->getDeclContext()))
           T = Context.getTypeDeclType(Parent);
@@ -2854,18 +2864,6 @@ TemplateParameterList *Sema::MatchTemplateParametersToScopeSpecifier(
       }
     }
 
-    // Look one step prior in a dependent template specialization type.
-    if (const DependentTemplateSpecializationType *DependentTST
-                          = T->getAs<DependentTemplateSpecializationType>()) {
-      if (NestedNameSpecifier NNS =
-              DependentTST->getDependentTemplateName().getQualifier();
-          NNS.getKind() == NestedNameSpecifier::Kind::Type)
-        T = QualType(NNS.getAsType(), 0);
-      else
-        T = QualType();
-      continue;
-    }
-
     // Look one step prior in a dependent name type.
     if (const DependentNameType *DependentName = T->getAs<DependentNameType>()){
       if (NestedNameSpecifier NNS = DependentName->getQualifier();
@@ -2985,16 +2983,16 @@ TemplateParameterList *Sema::MatchTemplateParametersToScopeSpecifier(
 
         continue;
       }
-    } else if (const TemplateSpecializationType *TST
-                                     = T->getAs<TemplateSpecializationType>()) {
-      if (TemplateDecl *Template = TST->getTemplateName().getAsTemplateDecl()) {
+    } else if (const auto *TST = T->getAs<TemplateSpecializationType>()) {
+      TemplateName Name = TST->getTemplateName();
+      if (TemplateDecl *Template = Name.getAsTemplateDecl()) {
         ExpectedTemplateParams = Template->getTemplateParameters();
         NeedNonemptyTemplateHeader = true;
+      } else if (Name.getAsDeducedTemplateName()) {
+        // FIXME:  We actually could/should check the template arguments here
+        // against the corresponding template parameter list.
+        NeedNonemptyTemplateHeader = false;
       }
-    } else if (T->getAs<DependentTemplateSpecializationType>()) {
-      // FIXME:  We actually could/should check the template arguments here
-      // against the corresponding template parameter list.
-      NeedNonemptyTemplateHeader = false;
     }
 
     // C++ [temp.expl.spec]p16:
@@ -3203,8 +3201,9 @@ static QualType builtinCommonTypeImpl(Sema &S, ElaboratedTypeKeyword Keyword,
     Sema::SFINAETrap SFINAE(S, /*ForValidityCheck=*/true);
     Sema::ContextRAII TUContext(S, S.Context.getTranslationUnitDecl());
 
-    QualType BaseTemplateInst =
-        S.CheckTemplateIdType(Keyword, BaseTemplate, TemplateLoc, Args);
+    QualType BaseTemplateInst = S.CheckTemplateIdType(
+        Keyword, BaseTemplate, TemplateLoc, Args,
+        /*Scope=*/nullptr, /*ForNestedNameSpecifier=*/false);
 
     if (SFINAE.hasErrorOccurred())
       return QualType();
@@ -3422,7 +3421,9 @@ static QualType checkBuiltinTemplateIdType(
     // The first template argument will be reused as the template decl that
     // our synthetic template arguments will be applied to.
     return SemaRef.CheckTemplateIdType(Keyword, Converted[0].getAsTemplate(),
-                                       TemplateLoc, SyntheticTemplateArgs);
+                                       TemplateLoc, SyntheticTemplateArgs,
+                                       /*Scope=*/nullptr,
+                                       /*ForNestedNameSpecifier=*/false);
   }
 
   case BTK__type_pack_element: {
@@ -3467,7 +3468,8 @@ static QualType checkBuiltinTemplateIdType(
                                     CT, TemplateArgs[1].getLocation())));
       TemplateName HasTypeMember = Converted[1].getAsTemplate();
       return SemaRef.CheckTemplateIdType(Keyword, HasTypeMember, TemplateLoc,
-                                         TAs);
+                                         TAs, /*Scope=*/nullptr,
+                                         /*ForNestedNameSpecifier=*/false);
     }
     QualType HasNoTypeMember = Converted[2].getAsType();
     return HasNoTypeMember;
@@ -3666,40 +3668,81 @@ Sema::findFailedBooleanCondition(Expr *Cond) {
   return { FailedCond, Description };
 }
 
+static TemplateName
+resolveAssumedTemplateNameAsType(Sema &S, Scope *Scope,
+                                 const AssumedTemplateStorage *ATN,
+                                 SourceLocation NameLoc) {
+  // We assumed this undeclared identifier to be an (ADL-only) function
+  // template name, but it was used in a context where a type was required.
+  // Try to typo-correct it now.
+  LookupResult R(S, ATN->getDeclName(), NameLoc, S.LookupOrdinaryName);
+  struct CandidateCallback : CorrectionCandidateCallback {
+    bool ValidateCandidate(const TypoCorrection &TC) override {
+      return TC.getCorrectionDecl() &&
+             getAsTypeTemplateDecl(TC.getCorrectionDecl());
+    }
+    std::unique_ptr<CorrectionCandidateCallback> clone() override {
+      return std::make_unique<CandidateCallback>(*this);
+    }
+  } FilterCCC;
+
+  TypoCorrection Corrected =
+      S.CorrectTypo(R.getLookupNameInfo(), R.getLookupKind(), Scope,
+                    /*SS=*/nullptr, FilterCCC, CorrectTypoKind::ErrorRecovery);
+  if (Corrected && Corrected.getFoundDecl()) {
+    S.diagnoseTypo(Corrected, S.PDiag(diag::err_no_template_suggest)
+                                  << ATN->getDeclName());
+    return S.Context.getQualifiedTemplateName(
+        /*Qualifier=*/std::nullopt, /*TemplateKeyword=*/false,
+        TemplateName(Corrected.getCorrectionDeclAs<TemplateDecl>()));
+  }
+
+  return TemplateName();
+}
+
 QualType Sema::CheckTemplateIdType(ElaboratedTypeKeyword Keyword,
                                    TemplateName Name,
                                    SourceLocation TemplateLoc,
-                                   TemplateArgumentListInfo &TemplateArgs) {
-  // FIXME: 'getUnderlying' loses SubstTemplateTemplateParm nodes from alias
-  // template substitutions.
-  if (DependentTemplateName *DTN =
-          Name.getUnderlying().getAsDependentTemplateName();
-      DTN && DTN->getName().getIdentifier())
-    // When building a template-id where the template-name is dependent,
-    // assume the template is a type template. Either our assumption is
-    // correct, or the code is ill-formed and will be diagnosed when the
-    // dependent name is substituted.
-    return Context.getDependentTemplateSpecializationType(
-        ElaboratedTypeKeyword::None, *DTN, TemplateArgs.arguments());
-
-  if (Name.getAsAssumedTemplateName() &&
-      resolveAssumedTemplateNameAsType(/*Scope=*/nullptr, Name, TemplateLoc))
-    return QualType();
+                                   TemplateArgumentListInfo &TemplateArgs,
+                                   Scope *Scope, bool ForNestedNameSpecifier) {
+  auto [UnderlyingName, DefaultArgs] = Name.getTemplateDeclAndDefaultArgs();
 
-  TemplateDecl *Template;
-  DefaultArguments DefaultArgs;
-  if (const SubstTemplateTemplateParmPackStorage *S =
-          Name.getAsSubstTemplateTemplateParmPack()) {
-    Template = S->getParameterPack();
-  } else {
-    std::tie(Template, DefaultArgs) = Name.getTemplateDeclAndDefaultArgs();
-    if (!Template || isa<FunctionTemplateDecl>(Template) ||
-        isa<VarTemplateDecl>(Template) || isa<ConceptDecl>(Template)) {
-      Diag(TemplateLoc, diag::err_template_id_not_a_type) << Name;
-      NoteAllFoundTemplates(Name);
-      return QualType();
+  TemplateDecl *Template = UnderlyingName.getAsTemplateDecl();
+  if (!Template) {
+    if (const auto *S = UnderlyingName.getAsSubstTemplateTemplateParmPack()) {
+      Template = S->getParameterPack();
+    } else if (const auto *DTN = UnderlyingName.getAsDependentTemplateName()) {
+      if (DTN->getName().getIdentifier())
+        // When building a template-id where the template-name is dependent,
+        // assume the template is a type template. Either our assumption is
+        // correct, or the code is ill-formed and will be diagnosed when the
+        // dependent name is substituted.
+        return Context.getTemplateSpecializationType(Keyword, Name,
+                                                     TemplateArgs.arguments(),
+                                                     /*CanonicalArgs=*/{});
+    } else if (const auto *ATN = UnderlyingName.getAsAssumedTemplateName()) {
+      if (TemplateName CorrectedName = ::resolveAssumedTemplateNameAsType(
+              *this, Scope, ATN, TemplateLoc);
+          CorrectedName.isNull()) {
+        Diag(TemplateLoc, diag::err_no_template) << ATN->getDeclName();
+        return QualType();
+      } else {
+        Name = CorrectedName;
+        Template = Name.getAsTemplateDecl();
+      }
     }
   }
+  if (!Template ||
+      isa<FunctionTemplateDecl, VarTemplateDecl, ConceptDecl>(Template)) {
+    SourceRange R(TemplateLoc, TemplateArgs.getRAngleLoc());
+    if (ForNestedNameSpecifier)
+      Diag(TemplateLoc, diag::err_non_type_template_in_nested_name_specifier)
+          << isa_and_nonnull<VarTemplateDecl>(Template) << Name << R;
+    else
+      Diag(TemplateLoc, diag::err_template_id_not_a_type) << Name << R;
+    NoteAllFoundTemplates(Name);
+    return QualType();
+  }
 
   // Check that the template argument list is well-formed for this
   // template.
@@ -3810,6 +3853,7 @@ QualType Sema::CheckTemplateIdType(ElaboratedTypeKeyword Keyword,
     //
     //   template<typename T, typename U = T> struct A;
     CanonType = Context.getCanonicalTemplateSpecializationType(
+        ElaboratedTypeKeyword::None,
         Context.getCanonicalTemplateName(Name, /*IgnoreDeduced=*/true),
         CTAI.CanonicalConverted);
     assert(CanonType->isCanonicalUnqualified());
@@ -3908,55 +3952,19 @@ void Sema::ActOnUndeclaredTypeTemplateName(Scope *S, TemplateTy &ParsedName,
                                            IdentifierInfo *&II) {
   assert(TNK == TNK_Undeclared_template && "not an undeclared template name");
 
-  TemplateName Name = ParsedName.get();
-  auto *ATN = Name.getAsAssumedTemplateName();
+  auto *ATN = ParsedName.get().getAsAssumedTemplateName();
   assert(ATN && "not an assumed template name");
   II = ATN->getDeclName().getAsIdentifierInfo();
 
-  if (!resolveAssumedTemplateNameAsType(S, Name, NameLoc, /*Diagnose*/false)) {
+  if (TemplateName Name =
+          ::resolveAssumedTemplateNameAsType(*this, S, ATN, NameLoc);
+      !Name.isNull()) {
     // Resolved to a type template name.
     ParsedName = TemplateTy::make(Name);
     TNK = TNK_Type_template;
   }
 }
 
-bool Sema::resolveAssumedTemplateNameAsType(Scope *S, TemplateName &Name,
-                                            SourceLocation NameLoc,
-                                            bool Diagnose) {
-  // We assumed this undeclared identifier to be an (ADL-only) function
-  // template name, but it was used in a context where a type was required.
-  // Try to typo-correct it now.
-  AssumedTemplateStorage *ATN = Name.getAsAssumedTemplateName();
-  assert(ATN && "not an assumed template name");
-
-  LookupResult R(*this, ATN->getDeclName(), NameLoc, LookupOrdinaryName);
-  struct CandidateCallback : CorrectionCandidateCallback {
-    bool ValidateCandidate(const TypoCorrection &TC) override {
-      return TC.getCorrectionDecl() &&
-             getAsTypeTemplateDecl(TC.getCorrectionDecl());
-    }
-    std::unique_ptr<CorrectionCandidateCallback> clone() override {
-      return std::make_unique<CandidateCallback>(*this);
-    }
-  } FilterCCC;
-
-  TypoCorrection Corrected =
-      CorrectTypo(R.getLookupNameInfo(), R.getLookupKind(), S, nullptr,
-                  FilterCCC, CorrectTypoKind::ErrorRecovery);
-  if (Corrected && Corrected.getFoundDecl()) {
-    diagnoseTypo(Corrected, PDiag(diag::err_no_template_suggest)
-                                << ATN->getDeclName());
-    Name = Context.getQualifiedTemplateName(
-        /*Qualifier=*/std::nullopt, /*TemplateKeyword=*/false,
-        TemplateName(Corrected.getCorrectionDeclAs<TemplateDecl>()));
-    return false;
-  }
-
-  if (Diagnose)
-    Diag(R.getNameLoc(), diag::err_no_template) << R.getLookupName();
-  return true;
-}
-
 TypeResult Sema::ActOnTemplateIdType(
     Scope *S, ElaboratedTypeKeyword ElaboratedKeyword,
     SourceLocation ElaboratedKeywordLoc, CXXScopeSpec &SS,
@@ -4013,36 +4021,13 @@ TypeResult Sema::ActOnTemplateIdType(
     }
   }
 
-  TemplateName Template = TemplateD.get();
-  if (Template.getAsAssumedTemplateName() &&
-      resolveAssumedTemplateNameAsType(S, Template, TemplateIILoc))
-    return true;
-
   // Translate the parser's template argument list in our AST format.
   TemplateArgumentListInfo TemplateArgs(LAngleLoc, RAngleLoc);
   translateTemplateArguments(TemplateArgsIn, TemplateArgs);
 
-  if (DependentTemplateName *DTN = Template.getAsDependentTemplateName()) {
-    assert(SS.getScopeRep() == DTN->getQualifier());
-    QualType T = Context.getDependentTemplateSpecializationType(
-        ElaboratedKeyword, *DTN, TemplateArgs.arguments());
-    // Build type-source information.
-    TypeLocBuilder TLB;
-    DependentTemplateSpecializationTypeLoc SpecTL
-      = TLB.push<DependentTemplateSpecializationTypeLoc>(T);
-    SpecTL.setElaboratedKeywordLoc(ElaboratedKeywordLoc);
-    SpecTL.setQualifierLoc(SS.getWithLocInContext(Context));
-    SpecTL.setTemplateKeywordLoc(TemplateKWLoc);
-    SpecTL.setTemplateNameLoc(TemplateIILoc);
-    SpecTL.setLAngleLoc(LAngleLoc);
-    SpecTL.setRAngleLoc(RAngleLoc);
-    for (unsigned I = 0, N = SpecTL.getNumArgs(); I != N; ++I)
-      SpecTL.setArgLocInfo(I, TemplateArgs[I].getLocInfo());
-    return CreateParsedType(T, TLB.getTypeSourceInfo(Context, T));
-  }
-
-  QualType SpecTy = CheckTemplateIdType(ElaboratedKeyword, Template,
-                                        TemplateIILoc, TemplateArgs);
+  QualType SpecTy = CheckTemplateIdType(
+      ElaboratedKeyword, TemplateD.get(), TemplateIILoc, TemplateArgs,
+      /*Scope=*/S, /*ForNestedNameSpecifier=*/false);
   if (SpecTy.isNull())
     return true;
 
@@ -4067,8 +4052,6 @@ TypeResult Sema::ActOnTagTemplateIdType(TagUseKind TUK,
   if (SS.isInvalid())
     return TypeResult(true);
 
-  TemplateName Template = TemplateD.get();
-
   // Translate the parser's template argument list in our AST format.
   TemplateArgumentListInfo TemplateArgs(LAngleLoc, RAngleLoc);
   translateTemplateArguments(TemplateArgsIn, TemplateArgs);
@@ -4078,28 +4061,9 @@ TypeResult Sema::ActOnTagTemplateIdType(TagUseKind TUK,
   ElaboratedTypeKeyword Keyword
     = TypeWithKeyword::getKeywordForTagTypeKind(TagKind);
 
-  if (DependentTemplateName *DTN = Template.getAsDependentTemplateName()) {
-    assert(SS.getScopeRep() == DTN->getQualifier());
-    QualType T = Context.getDependentTemplateSpecializationType(
-        Keyword, *DTN, TemplateArgs.arguments());
-
-    // Build type-source information.
-    TypeLocBuilder TLB;
-    DependentTemplateSpecializationTypeLoc SpecTL
-      = TLB.push<DependentTemplateSpecializationTypeLoc>(T);
-    SpecTL.setElaboratedKeywordLoc(TagLoc);
-    SpecTL.setQualifierLoc(SS.getWithLocInContext(Context));
-    SpecTL.setTemplateKeywordLoc(TemplateKWLoc);
-    SpecTL.setTemplateNameLoc(TemplateLoc);
-    SpecTL.setLAngleLoc(LAngleLoc);
-    SpecTL.setRAngleLoc(RAngleLoc);
-    for (unsigned I = 0, N = SpecTL.getNumArgs(); I != N; ++I)
-      SpecTL.setArgLocInfo(I, TemplateArgs[I].getLocInfo());
-    return CreateParsedType(T, TLB.getTypeSourceInfo(Context, T));
-  }
-
   QualType Result =
-      CheckTemplateIdType(Keyword, Template, TemplateLoc, TemplateArgs);
+      CheckTemplateIdType(Keyword, TemplateD.get(), TemplateLoc, TemplateArgs,
+                          /*Scope=*/nullptr, /*ForNestedNameSpecifier=*/false);
   if (Result.isNull())
     return TypeResult(true);
 
@@ -6389,11 +6353,6 @@ bool UnnamedLocalNoLinkageFinder::VisitDependentNameType(
   return VisitNestedNameSpecifier(T->getQualifier());
 }
 
-bool UnnamedLocalNoLinkageFinder::VisitDependentTemplateSpecializationType(
-                                 const DependentTemplateSpecializationType* T) {
-  return VisitNestedNameSpecifier(T->getDependentTemplateName().getQualifier());
-}
-
 bool UnnamedLocalNoLinkageFinder::VisitPackExpansionType(
                                                    const PackExpansionType* T) {
   return Visit(T->getPattern());
@@ -7832,8 +7791,10 @@ bool Sema::CheckTemplateTemplateArgument(TemplateTemplateParmDecl *Param,
                                          bool PartialOrdering,
                                          bool *StrictPackMatch) {
   TemplateName Name = Arg.getArgument().getAsTemplateOrTemplatePattern();
-  auto [Template, DefaultArgs] = Name.getTemplateDeclAndDefaultArgs();
+  auto [UnderlyingName, DefaultArgs] = Name.getTemplateDeclAndDefaultArgs();
+  TemplateDecl *Template = UnderlyingName.getAsTemplateDecl();
   if (!Template) {
+    // FIXME: Handle AssumedTemplateNames
     // Any dependent template name is fine.
     assert(Name.isDependent() && "Non-dependent template isn't a declaration?");
     return false;
@@ -8949,6 +8910,7 @@ DeclResult Sema::ActOnClassTemplateSpecialization(
   } else {
     CanQualType CanonType = CanQualType::CreateUnsafe(
         Context.getCanonicalTemplateSpecializationType(
+            ElaboratedTypeKeyword::None,
             TemplateName(ClassTemplate->getCanonicalDecl()),
             CTAI.CanonicalConverted));
     if (Context.hasSameType(
@@ -11128,43 +11090,11 @@ Sema::ActOnTypenameType(Scope *S, SourceLocation TypenameLoc,
   TemplateArgumentListInfo TemplateArgs(LAngleLoc, RAngleLoc);
   translateTemplateArguments(TemplateArgsIn, TemplateArgs);
 
-  auto Keyword = TypenameLoc.isValid() ? ElaboratedTypeKeyword::Typename
-                                       : ElaboratedTypeKeyword::None;
-
-  TemplateName Template = TemplateIn.get();
-  if (DependentTemplateName *DTN = Template.getAsDependentTemplateName()) {
-    // Construct a dependent template specialization type.
-    assert(DTN && "dependent template has non-dependent name?");
-    assert(DTN->getQualifier() == SS.getScopeRep());
-
-    if (!DTN->getName().getIdentifier()) {
-      Diag(TemplateIILoc, diag::err_template_id_not_a_type) << Template;
-      NoteAllFoundTemplates(Template);
-      return true;
-    }
-
-    QualType T = Context.getDependentTemplateSpecializationType(
-        Keyword, *DTN, TemplateArgs.arguments());
-
-    // Create source-location information for this type.
-    TypeLocBuilder Builder;
-    DependentTemplateSpecializationTypeLoc SpecTL
-    = Builder.push<DependentTemplateSpecializationTypeLoc>(T);
-    SpecTL.setElaboratedKeywordLoc(TypenameLoc);
-    SpecTL.setQualifierLoc(SS.getWithLocInContext(Context));
-    SpecTL.setTemplateKeywordLoc(TemplateKWLoc);
-    SpecTL.setTemplateNameLoc(TemplateIILoc);
-    SpecTL.setLAngleLoc(LAngleLoc);
-    SpecTL.setRAngleLoc(RAngleLoc);
-    for (unsigned I = 0, N = TemplateArgs.size(); I != N; ++I)
-      SpecTL.setArgLocInfo(I, TemplateArgs[I].getLocInfo());
-    return CreateParsedType(T, Builder.getTypeSourceInfo(Context, T));
-  }
-
-  QualType T = CheckTemplateIdType(TypenameLoc.isValid()
-                                       ? ElaboratedTypeKeyword::Typename
-                                       : ElaboratedTypeKeyword::None,
-                                   Template, TemplateIILoc, TemplateArgs);
+  QualType T = CheckTemplateIdType(
+      TypenameLoc.isValid() ? ElaboratedTypeKeyword::Typename
+                            : ElaboratedTypeKeyword::None,
+      TemplateIn.get(), TemplateIILoc, TemplateArgs,
+      /*Scope=*/S, /*ForNestedNameSpecifier=*/false);
   if (T.isNull())
     return true;
 
diff --git a/clang/lib/Sema/SemaTemplateDeduction.cpp b/clang/lib/Sema/SemaTemplateDeduction.cpp
index cce40c0..64be2aa 100644
--- a/clang/lib/Sema/SemaTemplateDeduction.cpp
+++ b/clang/lib/Sema/SemaTemplateDeduction.cpp
@@ -696,6 +696,11 @@ DeduceTemplateSpecArguments(Sema &S, TemplateParameterList *TemplateParams,
   if (isa<TemplateSpecializationType>(P.getCanonicalType())) {
     const TemplateSpecializationType *TP = ::getLastTemplateSpecType(P);
     TNP = TP->getTemplateName();
+
+    // No deduction for specializations of dependent template names.
+    if (TNP.getAsDependentTemplateName())
+      return TemplateDeductionResult::Success;
+
     // FIXME: To preserve sugar, the TST needs to carry sugared resolved
     // arguments.
     PResolved =
@@ -2540,7 +2545,6 @@ static TemplateDeductionResult DeduceTemplateArgumentsByTypeMatch(
     case Type::Decltype:
     case Type::UnaryTransform:
     case Type::DeducedTemplateSpecialization:
-    case Type::DependentTemplateSpecialization:
     case Type::PackExpansion:
     case Type::Pipe:
     case Type::ArrayParameter:
@@ -6495,9 +6499,9 @@ Sema::getMoreSpecializedPartialSpecialization(
          " the same template.");
   TemplateName Name(PS1->getSpecializedTemplate()->getCanonicalDecl());
   QualType PT1 = Context.getCanonicalTemplateSpecializationType(
-      Name, PS1->getTemplateArgs().asArray());
+      ElaboratedTypeKeyword::None, Name, PS1->getTemplateArgs().asArray());
   QualType PT2 = Context.getCanonicalTemplateSpecializationType(
-      Name, PS2->getTemplateArgs().asArray());
+      ElaboratedTypeKeyword::None, Name, PS2->getTemplateArgs().asArray());
 
   TemplateDeductionInfo Info(Loc);
   return getMoreSpecialized(*this, PT1, PT2, PS1, PS2, Info);
@@ -6512,10 +6516,10 @@ bool Sema::isMoreSpecializedThanPrimary(
       Primary->getInjectedTemplateArgs(Context));
   Context.canonicalizeTemplateArguments(PrimaryCanonArgs);
 
-  QualType PrimaryT =
-      Context.getCanonicalTemplateSpecializationType(Name, PrimaryCanonArgs);
+  QualType PrimaryT = Context.getCanonicalTemplateSpecializationType(
+      ElaboratedTypeKeyword::None, Name, PrimaryCanonArgs);
   QualType PartialT = Context.getCanonicalTemplateSpecializationType(
-      Name, Spec->getTemplateArgs().asArray());
+      ElaboratedTypeKeyword::None, Name, Spec->getTemplateArgs().asArray());
 
   VarTemplatePartialSpecializationDecl *MaybeSpec =
       getMoreSpecialized(*this, PartialT, PrimaryT, Spec, Primary, Info);
@@ -6993,8 +6997,12 @@ MarkUsedTemplateParameters(ASTContext &Ctx, QualType T,
   case Type::TemplateSpecialization: {
     const TemplateSpecializationType *Spec
       = cast<TemplateSpecializationType>(T);
-    MarkUsedTemplateParameters(Ctx, Spec->getTemplateName(), OnlyDeduced,
-                               Depth, Used);
+
+    TemplateName Name = Spec->getTemplateName();
+    if (OnlyDeduced && Name.getAsDependentTemplateName())
+      break;
+
+    MarkUsedTemplateParameters(Ctx, Name, OnlyDeduced, Depth, Used);
 
     // C++0x [temp.deduct.type]p9:
     //   If the template argument list of P contains a pack expansion that is
@@ -7030,31 +7038,6 @@ MarkUsedTemplateParameters(ASTContext &Ctx, QualType T,
                                  OnlyDeduced, Depth, Used);
     break;
 
-  case Type::DependentTemplateSpecialization: {
-    // C++14 [temp.deduct.type]p5:
-    //   The non-deduced contexts are:
-    //     -- The nested-name-specifier of a type that was specified using a
-    //        qualified-id
-    //
-    // C++14 [temp.deduct.type]p6:
-    //   When a type name is specified in a way that includes a non-deduced
-    //   context, all of the types that comprise that type name are also
-    //   non-deduced.
-    if (OnlyDeduced)
-      break;
-
-    const DependentTemplateSpecializationType *Spec
-      = cast<DependentTemplateSpecializationType>(T);
-
-    MarkUsedTemplateParameters(Ctx,
-                               Spec->getDependentTemplateName().getQualifier(),
-                               OnlyDeduced, Depth, Used);
-
-    for (const auto &Arg : Spec->template_arguments())
-      MarkUsedTemplateParameters(Ctx, Arg, OnlyDeduced, Depth, Used);
-    break;
-  }
-
   case Type::TypeOf:
     if (!OnlyDeduced)
       MarkUsedTemplateParameters(Ctx, cast<TypeOfType>(T)->getUnmodifiedType(),
diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
index b3cbd7f..df1a100 100644
--- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
+++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
@@ -6951,8 +6951,9 @@ NamedDecl *Sema::FindInstantiatedDecl(SourceLocation Loc, NamedDecl *D,
               Args.addArgument(
                   getTrivialTemplateArgumentLoc(UnpackedArg, QualType(), Loc));
           }
-          QualType T = CheckTemplateIdType(ElaboratedTypeKeyword::None,
-                                           TemplateName(TD), Loc, Args);
+          QualType T = CheckTemplateIdType(
+              ElaboratedTypeKeyword::None, TemplateName(TD), Loc, Args,
+              /*Scope=*/nullptr, /*ForNestedNameSpecifier=*/false);
           // We may get a non-null type with errors, in which case
           // `getAsCXXRecordDecl` will return `nullptr`. For instance, this
           // happens when one of the template arguments is an invalid
diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp
index 0f655d7..d723fb8 100644
--- a/clang/lib/Sema/SemaType.cpp
+++ b/clang/lib/Sema/SemaType.cpp
@@ -6036,15 +6036,6 @@ namespace {
       assert(TInfo);
       TL.copy(TInfo->getTypeLoc().castAs<DependentNameTypeLoc>());
     }
-    void VisitDependentTemplateSpecializationTypeLoc(
-                                 DependentTemplateSpecializationTypeLoc TL) {
-      assert(DS.getTypeSpecType() == TST_typename);
-      TypeSourceInfo *TInfo = nullptr;
-      Sema::GetTypeFromParser(DS.getRepAsType(), &TInfo);
-      assert(TInfo);
-      TL.copy(
-          TInfo->getTypeLoc().castAs<DependentTemplateSpecializationTypeLoc>());
-    }
     void VisitAutoTypeLoc(AutoTypeLoc TL) {
       assert(DS.getTypeSpecType() == TST_auto ||
              DS.getTypeSpecType() == TST_decltype_auto ||
diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h
index 0587a7d..6136937 100644
--- a/clang/lib/Sema/TreeTransform.h
+++ b/clang/lib/Sema/TreeTransform.h
@@ -744,10 +744,11 @@ public:
 
   StmtResult TransformSEHHandler(Stmt *Handler);
 
-  QualType TransformDependentTemplateSpecializationType(
-      TypeLocBuilder &TLB, DependentTemplateSpecializationTypeLoc TL,
-      QualType ObjectType, NamedDecl *UnqualLookup,
-      bool AllowInjectedClassName);
+  QualType TransformTemplateSpecializationType(TypeLocBuilder &TLB,
+                                               TemplateSpecializationTypeLoc TL,
+                                               QualType ObjectType,
+                                               NamedDecl *FirstQualifierInScope,
+                                               bool AllowInjectedClassName);
 
   QualType TransformTagType(TypeLocBuilder &TLB, TagTypeLoc TL);
 
@@ -1163,24 +1164,6 @@ public:
     return SemaRef.BuildParenType(InnerType);
   }
 
-  /// Build a new typename type that refers to a template-id.
-  ///
-  /// By default, builds a new DependentNameType type from the
-  /// nested-name-specifier and the given type. Subclasses may override
-  /// this routine to provide different behavior.
-  QualType RebuildDependentTemplateSpecializationType(
-      ElaboratedTypeKeyword Keyword, SourceLocation TemplateKWLoc,
-      TemplateName Name, SourceLocation NameLoc, TemplateArgumentListInfo &Args,
-      bool AllowInjectedClassName) {
-    // If it's still dependent, make a dependent specialization.
-    if (const DependentTemplateStorage *S = Name.getAsDependentTemplateName())
-      return SemaRef.Context.getDependentTemplateSpecializationType(
-          Keyword, *S, Args.arguments());
-
-    return getDerived().RebuildTemplateSpecializationType(Keyword, Name,
-                                                          NameLoc, Args);
-  }
-
   /// Build a new typename type that refers to an identifier.
   ///
   /// By default, performs semantic analysis when building the typename type
@@ -5526,19 +5509,18 @@ QualType TreeTransform<Derived>::RebuildQualifiedType(QualType T,
 template <typename Derived>
 QualType TreeTransform<Derived>::TransformTypeInObjectScope(
     TypeLocBuilder &TLB, TypeLoc TL, QualType ObjectType,
-    NamedDecl *UnqualLookup) {
+    NamedDecl *FirstQualifierInScope) {
   assert(!getDerived().AlreadyTransformed(TL.getType()));
 
   switch (TL.getTypeLocClass()) {
-  case TypeLoc::DependentTemplateSpecialization:
-    return getDerived().TransformDependentTemplateSpecializationType(
-        TLB, TL.castAs<DependentTemplateSpecializationTypeLoc>(), ObjectType,
-        UnqualLookup, /*AllowInjectedClassName=*/true);
-  case TypeLoc::DependentName: {
+  case TypeLoc::TemplateSpecialization:
+    return getDerived().TransformTemplateSpecializationType(
+        TLB, TL.castAs<TemplateSpecializationTypeLoc>(), ObjectType,
+        FirstQualifierInScope, /*AllowInjectedClassName=*/true);
+  case TypeLoc::DependentName:
     return getDerived().TransformDependentNameType(
         TLB, TL.castAs<DependentNameTypeLoc>(), /*DeducedTSTContext=*/false,
-        ObjectType, UnqualLookup);
-  }
+        ObjectType, FirstQualifierInScope);
   default:
     // Any dependent canonical type can appear here, through type alias
     // templates.
@@ -7504,12 +7486,22 @@ QualType TreeTransform<Derived>::TransformAutoType(TypeLocBuilder &TLB,
 template <typename Derived>
 QualType TreeTransform<Derived>::TransformTemplateSpecializationType(
     TypeLocBuilder &TLB, TemplateSpecializationTypeLoc TL) {
+  return getDerived().TransformTemplateSpecializationType(
+      TLB, TL, /*ObjectType=*/QualType(), /*FirstQualifierInScope=*/nullptr,
+      /*AllowInjectedClassName=*/false);
+}
+
+template <typename Derived>
+QualType TreeTransform<Derived>::TransformTemplateSpecializationType(
+    TypeLocBuilder &TLB, TemplateSpecializationTypeLoc TL, QualType ObjectType,
+    NamedDecl *FirstQualifierInScope, bool AllowInjectedClassName) {
   const TemplateSpecializationType *T = TL.getTypePtr();
 
   NestedNameSpecifierLoc QualifierLoc = TL.getQualifierLoc();
   TemplateName Template = getDerived().TransformTemplateName(
       QualifierLoc, TL.getTemplateKeywordLoc(), T->getTemplateName(),
-      TL.getTemplateNameLoc());
+      TL.getTemplateNameLoc(), ObjectType, FirstQualifierInScope,
+      AllowInjectedClassName);
   if (Template.isNull())
     return QualType();
 
@@ -7532,23 +7524,6 @@ QualType TreeTransform<Derived>::TransformTemplateSpecializationType(
       NewTemplateArgs);
 
   if (!Result.isNull()) {
-    // Specializations of template template parameters are represented as
-    // TemplateSpecializationTypes, and substitution of type alias templates
-    // within a dependent context can transform them into
-    // DependentTemplateSpecializationTypes.
-    if (isa<DependentTemplateSpecializationType>(Result)) {
-      DependentTemplateSpecializationTypeLoc NewTL
-        = TLB.push<DependentTemplateSpecializationTypeLoc>(Result);
-      NewTL.setElaboratedKeywordLoc(TL.getElaboratedKeywordLoc());
-      NewTL.setQualifierLoc(QualifierLoc);
-      NewTL.setTemplateKeywordLoc(TL.getTemplateKeywordLoc());
-      NewTL.setTemplateNameLoc(TL.getTemplateNameLoc());
-      NewTL.setLAngleLoc(TL.getLAngleLoc());
-      NewTL.setRAngleLoc(TL.getRAngleLoc());
-      for (unsigned i = 0, e = NewTemplateArgs.size(); i != e; ++i)
-        NewTL.setArgLocInfo(i, NewTemplateArgs[i].getLocInfo());
-      return Result;
-    }
     TLB.push<TemplateSpecializationTypeLoc>(Result).set(
         TL.getElaboratedKeywordLoc(), QualifierLoc, TL.getTemplateKeywordLoc(),
         TL.getTemplateNameLoc(), NewTemplateArgs);
@@ -7799,83 +7774,6 @@ QualType TreeTransform<Derived>::TransformDependentNameType(
   return Result;
 }
 
-template <typename Derived>
-QualType TreeTransform<Derived>::TransformDependentTemplateSpecializationType(
-    TypeLocBuilder &TLB, DependentTemplateSpecializationTypeLoc TL) {
-  return getDerived().TransformDependentTemplateSpecializationType(
-      TLB, TL, QualType(), nullptr, false);
-}
-
-template <typename Derived>
-QualType TreeTransform<Derived>::TransformDependentTemplateSpecializationType(
-    TypeLocBuilder &TLB, DependentTemplateSpecializationTypeLoc TL,
-    QualType ObjectType, NamedDecl *UnqualLookup, bool AllowInjectedClassName) {
-  const DependentTemplateSpecializationType *T = TL.getTypePtr();
-
-  NestedNameSpecifierLoc QualifierLoc = TL.getQualifierLoc();
-  if (QualifierLoc) {
-    QualifierLoc = getDerived().TransformNestedNameSpecifierLoc(
-        QualifierLoc, ObjectType, UnqualLookup);
-    if (!QualifierLoc)
-      return QualType();
-    // These only apply to the leftmost prefix.
-    ObjectType = QualType();
-    UnqualLookup = nullptr;
-  }
-  CXXScopeSpec SS;
-  SS.Adopt(QualifierLoc);
-
-  TemplateArgumentListInfo NewTemplateArgs(TL.getLAngleLoc(),
-                                           TL.getRAngleLoc());
-  auto ArgsRange = llvm::make_range<TemplateArgumentLocContainerIterator<
-      DependentTemplateSpecializationTypeLoc>>({TL, 0}, {TL, TL.getNumArgs()});
-
-  if (getDerived().TransformTemplateArguments(ArgsRange.begin(),
-                                              ArgsRange.end(), NewTemplateArgs))
-    return QualType();
-  bool TemplateArgumentsChanged = !llvm::equal(
-      ArgsRange, NewTemplateArgs.arguments(),
-      [](const TemplateArgumentLoc &A, const TemplateArgumentLoc &B) {
-        return A.getArgument().structurallyEquals(B.getArgument());
-      });
-
-  const DependentTemplateStorage &DTN = T->getDependentTemplateName();
-
-  QualType Result = TL.getType();
-  if (getDerived().AlwaysRebuild() || SS.getScopeRep() != DTN.getQualifier() ||
-      TemplateArgumentsChanged || !ObjectType.isNull()) {
-    TemplateName Name = getDerived().RebuildTemplateName(
-        SS, TL.getTemplateKeywordLoc(), DTN.getName(), TL.getTemplateNameLoc(),
-        ObjectType, AllowInjectedClassName);
-    if (Name.isNull())
-      return QualType();
-    Result = getDerived().RebuildDependentTemplateSpecializationType(
-        T->getKeyword(), TL.getTemplateKeywordLoc(), Name,
-        TL.getTemplateNameLoc(), NewTemplateArgs,
-        /*AllowInjectedClassName=*/false);
-    if (Result.isNull())
-      return QualType();
-  }
-
-  QualifierLoc = SS.getWithLocInContext(SemaRef.Context);
-  if (isa<TemplateSpecializationType>(Result)) {
-    TLB.push<TemplateSpecializationTypeLoc>(Result).set(
-        TL.getElaboratedKeywordLoc(), QualifierLoc, TL.getTemplateKeywordLoc(),
-        TL.getTemplateNameLoc(), NewTemplateArgs);
-  } else {
-    auto SpecTL = TLB.push<DependentTemplateSpecializationTypeLoc>(Result);
-    SpecTL.setElaboratedKeywordLoc(TL.getElaboratedKeywordLoc());
-    SpecTL.setQualifierLoc(QualifierLoc);
-    SpecTL.setTemplateKeywordLoc(TL.getTemplateKeywordLoc());
-    SpecTL.setTemplateNameLoc(TL.getTemplateNameLoc());
-    SpecTL.setLAngleLoc(TL.getLAngleLoc());
-    SpecTL.setRAngleLoc(TL.getRAngleLoc());
-    for (unsigned I = 0, E = NewTemplateArgs.size(); I != E; ++I)
-      SpecTL.setArgLocInfo(I, NewTemplateArgs[I].getLocInfo());
-  }
-  return Result;
-}
-
 template<typename Derived>
 QualType TreeTransform<Derived>::TransformPackExpansionType(TypeLocBuilder &TLB,
                                                       PackExpansionTypeLoc TL) {
@@ -17468,8 +17366,9 @@ template <typename Derived>
 QualType TreeTransform<Derived>::RebuildTemplateSpecializationType(
     ElaboratedTypeKeyword Keyword, TemplateName Template,
     SourceLocation TemplateNameLoc, TemplateArgumentListInfo &TemplateArgs) {
-  return SemaRef.CheckTemplateIdType(Keyword, Template, TemplateNameLoc,
-                                     TemplateArgs);
+  return SemaRef.CheckTemplateIdType(
+      Keyword, Template, TemplateNameLoc, TemplateArgs,
+      /*Scope=*/nullptr, /*ForNestedNameSpecifier=*/false);
 }
 
 template<typename Derived>
diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index 1b3a8b1..5f40e94 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -7532,20 +7532,6 @@ void TypeLocReader::VisitDependentNameTypeLoc(DependentNameTypeLoc TL) {
   TL.setNameLoc(readSourceLocation());
 }
 
-void TypeLocReader::VisitDependentTemplateSpecializationTypeLoc(
-       DependentTemplateSpecializationTypeLoc TL) {
-  TL.setElaboratedKeywordLoc(readSourceLocation());
-  TL.setQualifierLoc(ReadNestedNameSpecifierLoc());
-  TL.setTemplateKeywordLoc(readSourceLocation());
-  TL.setTemplateNameLoc(readSourceLocation());
-  TL.setLAngleLoc(readSourceLocation());
-  TL.setRAngleLoc(readSourceLocation());
-  for (unsigned I = 0, E = TL.getNumArgs(); I != E; ++I)
-    TL.setArgLocInfo(I,
-                     Reader.readTemplateArgumentLocInfo(
-                         TL.getTypePtr()->template_arguments()[I].getKind()));
-}
-
 void TypeLocReader::VisitPackExpansionTypeLoc(PackExpansionTypeLoc TL) {
   TL.setEllipsisLoc(readSourceLocation());
 }
diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index a3a25e4..15a3ed4 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -659,18 +659,6 @@ void TypeLocWriter::VisitDependentNameTypeLoc(DependentNameTypeLoc TL) {
   addSourceLocation(TL.getNameLoc());
 }
 
-void TypeLocWriter::VisitDependentTemplateSpecializationTypeLoc(
-       DependentTemplateSpecializationTypeLoc TL) {
-  addSourceLocation(TL.getElaboratedKeywordLoc());
-  Record.AddNestedNameSpecifierLoc(TL.getQualifierLoc());
-  addSourceLocation(TL.getTemplateKeywordLoc());
-  addSourceLocation(TL.getTemplateNameLoc());
-  addSourceLocation(TL.getLAngleLoc());
-  addSourceLocation(TL.getRAngleLoc());
-  for (unsigned I = 0, E = TL.getNumArgs(); I != E; ++I)
-    Record.AddTemplateArgumentLocInfo(TL.getArgLoc(I));
-}
-
 void TypeLocWriter::VisitPackExpansionTypeLoc(PackExpansionTypeLoc TL) {
   addSourceLocation(TL.getEllipsisLoc());
 }
@@ -1058,7 +1046,6 @@ void ASTWriter::WriteBlockInfoBlock() {
   RECORD(TYPE_TEMPLATE_TYPE_PARM);
   RECORD(TYPE_TEMPLATE_SPECIALIZATION);
   RECORD(TYPE_DEPENDENT_NAME);
-  RECORD(TYPE_DEPENDENT_TEMPLATE_SPECIALIZATION);
   RECORD(TYPE_DEPENDENT_SIZED_ARRAY);
   RECORD(TYPE_PAREN);
   RECORD(TYPE_MACRO_QUALIFIED);
diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp
index 6f13d55..b629de3 100644
--- a/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp
@@ -160,6 +160,11 @@ bool tryToFindPtrOrigin(
         if (Name == "__builtin___CFStringMakeConstantString" ||
             Name == "NSClassFromString")
           return callback(E, true);
+      } else if (auto *CalleeE = call->getCallee()) {
+        if (auto *E = dyn_cast<DeclRefExpr>(CalleeE->IgnoreParenCasts())) {
+          if (isSingleton(E->getFoundDecl()))
+            return callback(E, true);
+        }
       }
 
       // Sometimes, canonical type erroneously turns Ref<T> into T.
diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp
index 56747d7..90b2343 100644
--- a/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp
@@ -479,7 +479,7 @@ bool isTrivialBuiltinFunction(const FunctionDecl *F) {
          Name.starts_with("os_log") || Name.starts_with("_os_log");
 }
 
-bool isSingleton(const FunctionDecl *F) {
+bool isSingleton(const NamedDecl *F) {
   assert(F);
   // FIXME: check # of params == 1
   if (auto *MethodDecl = dyn_cast<CXXMethodDecl>(F)) {
diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.h b/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.h
index 3c9560c..d2095d0 100644
--- a/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.h
+++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.h
@@ -21,6 +21,7 @@ class CXXMethodDecl;
 class CXXRecordDecl;
 class Decl;
 class FunctionDecl;
+class NamedDecl;
 class QualType;
 class RecordType;
 class Stmt;
@@ -156,7 +157,7 @@ bool isPtrConversion(const FunctionDecl *F);
 bool isTrivialBuiltinFunction(const FunctionDecl *F);
 
 /// \returns true if \p F is a static singleton function.
-bool isSingleton(const FunctionDecl *F);
+bool isSingleton(const NamedDecl *F);
 
 /// An inter-procedural analysis facility that detects functions with "trivial"
 /// behavior with respect to reference counting, such as simple field getters.
diff --git a/clang/lib/Tooling/Refactoring/Rename/USRLocFinder.cpp b/clang/lib/Tooling/Refactoring/Rename/USRLocFinder.cpp
index d944411..c9108fc 100644
--- a/clang/lib/Tooling/Refactoring/Rename/USRLocFinder.cpp
+++ b/clang/lib/Tooling/Refactoring/Rename/USRLocFinder.cpp
@@ -157,12 +157,6 @@ SourceLocation StartLocationForType(TypeLoc TL) {
       return QualifierLoc.getBeginLoc();
     return TTL.getNameLoc();
   }
-  case TypeLoc::DependentTemplateSpecialization: {
-    auto TTL = TL.castAs<DependentTemplateSpecializationTypeLoc>();
-    if (NestedNameSpecifierLoc QualifierLoc = TTL.getQualifierLoc())
-      return QualifierLoc.getBeginLoc();
-    return TTL.getTemplateNameLoc();
-  }
   default:
     llvm_unreachable("unhandled TypeLoc class");
   }
diff --git a/clang/lib/Tooling/Syntax/BuildTree.cpp b/clang/lib/Tooling/Syntax/BuildTree.cpp
index b75f8ff..90fd1f9 100644
--- a/clang/lib/Tooling/Syntax/BuildTree.cpp
+++ b/clang/lib/Tooling/Syntax/BuildTree.cpp
@@ -974,13 +974,6 @@ public:
           BeginLoc = TST.getTemplateNameLoc();
         return buildSimpleTemplateName({BeginLoc, TST.getEndLoc()});
       }
-      case TypeLoc::DependentTemplateSpecialization: {
-        auto DT = TL.castAs<DependentTemplateSpecializationTypeLoc>();
-        SourceLocation BeginLoc = DT.getTemplateKeywordLoc();
-        if (BeginLoc.isInvalid())
-          BeginLoc = DT.getTemplateNameLoc();
-        return buildSimpleTemplateName({BeginLoc, DT.getEndLoc()});
-      }
       case TypeLoc::Decltype: {
         const auto DTL = TL.castAs<DecltypeTypeLoc>();
         if (!RecursiveASTVisitor::TraverseDecltypeTypeLoc(
diff --git a/clang/test/AST/ast-dump-templates.cpp b/clang/test/AST/ast-dump-templates.cpp
index e43fe6b..18f62e4 100644
--- a/clang/test/AST/ast-dump-templates.cpp
+++ b/clang/test/AST/ast-dump-templates.cpp
@@ -175,7 +175,10 @@ namespace TestDependentMemberPointer {
 // DUMP-NEXT:  |   `-BuiltinType {{.+}} 'int'
 // DUMP-NEXT:  `-TypeAliasDecl {{.+}} Z 'int U::template V<int>::*'{{$}}
 // DUMP-NEXT:    `-MemberPointerType {{.+}} 'int U::template V<int>::*' dependent
-// DUMP-NEXT:      |-DependentTemplateSpecializationType {{.+}} 'U::template V<int>' dependent
+// DUMP-NEXT:      |-TemplateSpecializationType {{.+}} 'U::template V<int>' dependent
+// DUMP-NEXT:      | |-name: 'U::template V':'type-parameter-0-0::template V' dependent
+// DUMP-NEXT:      | | `-NestedNameSpecifier TypeSpec 'U'
+// DUMP-NEXT:      | `-TemplateArgument type 'int'
 // DUMP-NEXT:      `-BuiltinType {{.+}} 'int'
 } // namespace TestDependentMemberPointer
 
@@ -237,6 +240,28 @@ namespace GH153540 {
 // DUMP-NEXT: CXXConstructExpr {{.*}} <col:5, col:11> 'N::S<int>':'GH153540::N::S<int>' 'void (int)'
 } // namespace GH153540
 
+namespace AliasDependentTemplateSpecializationType {
+  // DUMP-LABEL: NamespaceDecl {{.*}} AliasDependentTemplateSpecializationType{{$}}
+
+  template<template<class> class TT> using T1 = TT<int>;
+  template<class T> using T2 = T1<T::template X>;
+
+// DUMP:      TypeAliasDecl {{.*}} T2 'T1<T::template X>':'T::template X<int>'
+// DUMP-NEXT: `-TemplateSpecializationType {{.*}} 'T1<T::template X>' sugar dependent alias
+// DUMP-NEXT:   |-name: 'T1':'AliasDependentTemplateSpecializationType::T1' qualified
+// DUMP-NEXT:   | `-TypeAliasTemplateDecl {{.*}} T1
+// DUMP-NEXT:   |-TemplateArgument template 'T::template X':'type-parameter-0-0::template X' dependent
+// DUMP-NEXT:   | `-NestedNameSpecifier TypeSpec 'T'
+// DUMP-NEXT:   `-TemplateSpecializationType {{.*}} 'T::template X<int>' dependent
+// DUMP-NEXT:     |-name: 'T::template X':'type-parameter-0-0::template X' subst index 0 final
+// DUMP-NEXT:     | |-parameter: TemplateTemplateParmDecl {{.*}} depth 0 index 0 TT
+// DUMP-NEXT:     | |-associated TypeAliasTemplate {{.*}} 'T1'
+// DUMP-NEXT:     | `-replacement: 'T::template X':'type-parameter-0-0::template X' dependent
+// DUMP-NEXT:     |   `-NestedNameSpecifier TypeSpec 'T'
+// DUMP-NEXT:     `-TemplateArgument type 'int'
+// DUMP-NEXT:       `-BuiltinType {{.*}} 'int'
+} // namespace
+
 // NOTE: CHECK lines have been autogenerated by gen_ast_dump_json_test.py
 
 
@@ -6646,8 +6671,8 @@ namespace GH153540 {
 // JSON-NEXT:      "tokLen": 9
 // JSON-NEXT:     },
 // JSON-NEXT:     "end": {
-// JSON-NEXT:      "offset": 6425,
-// JSON-NEXT:      "line": 180,
+// JSON-NEXT:      "offset": 6613,
+// JSON-NEXT:      "line": 183,
 // JSON-NEXT:      "col": 1,
 // JSON-NEXT:      "tokLen": 1
 // JSON-NEXT:     }
@@ -6961,12 +6986,30 @@ namespace GH153540 {
 // JSON-NEXT:            "inner": [
 // JSON-NEXT:             {
 // JSON-NEXT:              "id": "0x{{.*}}",
-// JSON-NEXT:              "kind": "DependentTemplateSpecializationType",
+// JSON-NEXT:              "kind": "TemplateSpecializationType",
 // JSON-NEXT:              "type": {
 // JSON-NEXT:               "qualType": "U::template V<int>"
 // JSON-NEXT:              },
 // JSON-NEXT:              "isDependent": true,
-// JSON-NEXT:              "isInstantiationDependent": true
+// JSON-NEXT:              "isInstantiationDependent": true,
+// JSON-NEXT:              "templateName": "U::template V",
+// JSON-NEXT:              "inner": [
+// JSON-NEXT:               {
+// JSON-NEXT:                "kind": "TemplateArgument",
+// JSON-NEXT:                "type": {
+// JSON-NEXT:                 "qualType": "int"
+// JSON-NEXT:                },
+// JSON-NEXT:                "inner": [
+// JSON-NEXT:                 {
+// JSON-NEXT:                  "id": "0x{{.*}}",
+// JSON-NEXT:                  "kind": "BuiltinType",
+// JSON-NEXT:                  "type": {
+// JSON-NEXT:                   "qualType": "int"
+// JSON-NEXT:                  }
+// JSON-NEXT:                 }
+// JSON-NEXT:                ]
+// JSON-NEXT:               }
+// JSON-NEXT:              ]
 // JSON-NEXT:             },
 // JSON-NEXT:             {
 // JSON-NEXT:              "id": "0x{{.*}}",
@@ -6989,20 +7032,20 @@ namespace GH153540 {
 // JSON-NEXT:    "id": "0x{{.*}}",
 // JSON-NEXT:    "kind": "NamespaceDecl",
 // JSON-NEXT:    "loc": {
-// JSON-NEXT:     "offset": 6478,
-// JSON-NEXT:     "line": 182,
+// JSON-NEXT:     "offset": 6666,
+// JSON-NEXT:     "line": 185,
 // JSON-NEXT:     "col": 11,
 // JSON-NEXT:     "tokLen": 19
 // JSON-NEXT:    },
 // JSON-NEXT:    "range": {
 // JSON-NEXT:     "begin": {
-// JSON-NEXT:      "offset": 6468,
+// JSON-NEXT:      "offset": 6656,
 // JSON-NEXT:      "col": 1,
 // JSON-NEXT:      "tokLen": 9
 // JSON-NEXT:     },
 // JSON-NEXT:     "end": {
-// JSON-NEXT:      "offset": 9336,
-// JSON-NEXT:      "line": 222,
+// JSON-NEXT:      "offset": 9524,
+// JSON-NEXT:      "line": 225,
 // JSON-NEXT:      "col": 1,
 // JSON-NEXT:      "tokLen": 1
 // JSON-NEXT:     }
@@ -7013,19 +7056,19 @@ namespace GH153540 {
 // JSON-NEXT:      "id": "0x{{.*}}",
 // JSON-NEXT:      "kind": "ClassTemplateDecl",
 // JSON-NEXT:      "loc": {
-// JSON-NEXT:       "offset": 6601,
-// JSON-NEXT:       "line": 184,
+// JSON-NEXT:       "offset": 6789,
+// JSON-NEXT:       "line": 187,
 // JSON-NEXT:       "col": 41,
 // JSON-NEXT:       "tokLen": 9
 // JSON-NEXT:      },
 // JSON-NEXT:      "range": {
 // JSON-NEXT:       "begin": {
-// JSON-NEXT:        "offset": 6563,
+// JSON-NEXT:        "offset": 6751,
 // JSON-NEXT:        "col": 3,
 // JSON-NEXT:        "tokLen": 8
 // JSON-NEXT:       },
 // JSON-NEXT:       "end": {
-// JSON-NEXT:        "offset": 6612,
+// JSON-NEXT:        "offset": 6800,
 // JSON-NEXT:        "col": 52,
 // JSON-NEXT:        "tokLen": 1
 // JSON-NEXT:       }
@@ -7036,18 +7079,18 @@ namespace GH153540 {
 // JSON-NEXT:        "id": "0x{{.*}}",
 // JSON-NEXT:        "kind": "TemplateTypeParmDecl",
 // JSON-NEXT:        "loc": {
-// JSON-NEXT:         "offset": 6579,
+// JSON-NEXT:         "offset": 6767,
 // JSON-NEXT:         "col": 19,
 // JSON-NEXT:         "tokLen": 3
 // JSON-NEXT:        },
 // JSON-NEXT:        "range": {
 // JSON-NEXT:         "begin": {
-// JSON-NEXT:          "offset": 6573,
+// JSON-NEXT:          "offset": 6761,
 // JSON-NEXT:          "col": 13,
 // JSON-NEXT:          "tokLen": 5
 // JSON-NEXT:         },
 // JSON-NEXT:         "end": {
-// JSON-NEXT:          "offset": 6579,
+// JSON-NEXT:          "offset": 6767,
 // JSON-NEXT:          "col": 19,
 // JSON-NEXT:          "tokLen": 3
 // JSON-NEXT:         }
@@ -7061,18 +7104,18 @@ namespace GH153540 {
 // JSON-NEXT:        "id": "0x{{.*}}",
 // JSON-NEXT:        "kind": "NonTypeTemplateParmDecl",
 // JSON-NEXT:        "loc": {
-// JSON-NEXT:         "offset": 6589,
+// JSON-NEXT:         "offset": 6777,
 // JSON-NEXT:         "col": 29,
 // JSON-NEXT:         "tokLen": 3
 // JSON-NEXT:        },
 // JSON-NEXT:        "range": {
 // JSON-NEXT:         "begin": {
-// JSON-NEXT:          "offset": 6584,
+// JSON-NEXT:          "offset": 6772,
 // JSON-NEXT:          "col": 24,
 // JSON-NEXT:          "tokLen": 4
 // JSON-NEXT:         },
 // JSON-NEXT:         "end": {
-// JSON-NEXT:          "offset": 6589,
+// JSON-NEXT:          "offset": 6777,
 // JSON-NEXT:          "col": 29,
 // JSON-NEXT:          "tokLen": 3
 // JSON-NEXT:         }
@@ -7088,18 +7131,18 @@ namespace GH153540 {
 // JSON-NEXT:        "id": "0x{{.*}}",
 // JSON-NEXT:        "kind": "CXXRecordDecl",
 // JSON-NEXT:        "loc": {
-// JSON-NEXT:         "offset": 6601,
+// JSON-NEXT:         "offset": 6789,
 // JSON-NEXT:         "col": 41,
 // JSON-NEXT:         "tokLen": 9
 // JSON-NEXT:        },
 // JSON-NEXT:        "range": {
 // JSON-NEXT:         "begin": {
-// JSON-NEXT:          "offset": 6594,
+// JSON-NEXT:          "offset": 6782,
 // JSON-NEXT:          "col": 34,
 // JSON-NEXT:          "tokLen": 6
 // JSON-NEXT:         },
 // JSON-NEXT:         "end": {
-// JSON-NEXT:          "offset": 6612,
+// JSON-NEXT:          "offset": 6800,
 // JSON-NEXT:          "col": 52,
 // JSON-NEXT:          "tokLen": 1
 // JSON-NEXT:         }
@@ -7162,18 +7205,18 @@ namespace GH153540 {
 // JSON-NEXT:          "id": "0x{{.*}}",
 // JSON-NEXT:          "kind": "CXXRecordDecl",
 // JSON-NEXT:          "loc": {
-// JSON-NEXT:           "offset": 6601,
+// JSON-NEXT:           "offset": 6789,
 // JSON-NEXT:           "col": 41,
 // JSON-NEXT:           "tokLen": 9
 // JSON-NEXT:          },
 // JSON-NEXT:          "range": {
 // JSON-NEXT:           "begin": {
-// JSON-NEXT:            "offset": 6594,
+// JSON-NEXT:            "offset": 6782,
 // JSON-NEXT:            "col": 34,
 // JSON-NEXT:            "tokLen": 6
 // JSON-NEXT:           },
 // JSON-NEXT:           "end": {
-// JSON-NEXT:            "offset": 6601,
+// JSON-NEXT:            "offset": 6789,
 // JSON-NEXT:            "col": 41,
 // JSON-NEXT:            "tokLen": 9
 // JSON-NEXT:           }
@@ -7190,19 +7233,19 @@ namespace GH153540 {
 // JSON-NEXT:      "id": "0x{{.*}}",
 // JSON-NEXT:      "kind": "ClassTemplateDecl",
 // JSON-NEXT:      "loc": {
-// JSON-NEXT:       "offset": 6655,
-// JSON-NEXT:       "line": 185,
+// JSON-NEXT:       "offset": 6843,
+// JSON-NEXT:       "line": 188,
 // JSON-NEXT:       "col": 41,
 // JSON-NEXT:       "tokLen": 9
 // JSON-NEXT:      },
 // JSON-NEXT:      "range": {
 // JSON-NEXT:       "begin": {
-// JSON-NEXT:        "offset": 6617,
+// JSON-NEXT:        "offset": 6805,
 // JSON-NEXT:        "col": 3,
 // JSON-NEXT:        "tokLen": 8
 // JSON-NEXT:       },
 // JSON-NEXT:       "end": {
-// JSON-NEXT:        "offset": 6666,
+// JSON-NEXT:        "offset": 6854,
 // JSON-NEXT:        "col": 52,
 // JSON-NEXT:        "tokLen": 1
 // JSON-NEXT:       }
@@ -7213,18 +7256,18 @@ namespace GH153540 {
 // JSON-NEXT:        "id": "0x{{.*}}",
 // JSON-NEXT:        "kind": "TemplateTypeParmDecl",
 // JSON-NEXT:        "loc": {
-// JSON-NEXT:         "offset": 6633,
+// JSON-NEXT:         "offset": 6821,
 // JSON-NEXT:         "col": 19,
 // JSON-NEXT:         "tokLen": 3
 // JSON-NEXT:        },
 // JSON-NEXT:        "range": {
 // JSON-NEXT:         "begin": {
-// JSON-NEXT:          "offset": 6627,
+// JSON-NEXT:          "offset": 6815,
 // JSON-NEXT:          "col": 13,
 // JSON-NEXT:          "tokLen": 5
 // JSON-NEXT:         },
 // JSON-NEXT:         "end": {
-// JSON-NEXT:          "offset": 6633,
+// JSON-NEXT:          "offset": 6821,
 // JSON-NEXT:          "col": 19,
 // JSON-NEXT:          "tokLen": 3
 // JSON-NEXT:         }
@@ -7238,18 +7281,18 @@ namespace GH153540 {
 // JSON-NEXT:        "id": "0x{{.*}}",
 // JSON-NEXT:        "kind": "NonTypeTemplateParmDecl",
 // JSON-NEXT:        "loc": {
-// JSON-NEXT:         "offset": 6643,
+// JSON-NEXT:         "offset": 6831,
 // JSON-NEXT:         "col": 29,
 // JSON-NEXT:         "tokLen": 3
 // JSON-NEXT:        },
 // JSON-NEXT:        "range": {
 // JSON-NEXT:         "begin": {
-// JSON-NEXT:          "offset": 6638,
+// JSON-NEXT:          "offset": 6826,
 // JSON-NEXT:          "col": 24,
 // JSON-NEXT:          "tokLen": 4
 // JSON-NEXT:         },
 // JSON-NEXT:         "end": {
-// JSON-NEXT:          "offset": 6643,
+// JSON-NEXT:          "offset": 6831,
 // JSON-NEXT:          "col": 29,
 // JSON-NEXT:          "tokLen": 3
 // JSON-NEXT:         }
@@ -7265,18 +7308,18 @@ namespace GH153540 {
 // JSON-NEXT:        "id": "0x{{.*}}",
 // JSON-NEXT:        "kind": "CXXRecordDecl",
 // JSON-NEXT:        "loc": {
-// JSON-NEXT:         "offset": 6655,
+// JSON-NEXT:         "offset": 6843,
 // JSON-NEXT:         "col": 41,
 // JSON-NEXT:         "tokLen": 9
 // JSON-NEXT:        },
 // JSON-NEXT:        "range": {
 // JSON-NEXT:         "begin": {
-// JSON-NEXT:          "offset": 6648,
+// JSON-NEXT:          "offset": 6836,
 // JSON-NEXT:          "col": 34,
 // JSON-NEXT:          "tokLen": 6
 // JSON-NEXT:         },
 // JSON-NEXT:         "end": {
-// JSON-NEXT:          "offset": 6666,
+// JSON-NEXT:          "offset": 6854,
 // JSON-NEXT:          "col": 52,
 // JSON-NEXT:          "tokLen": 1
 // JSON-NEXT:         }
@@ -7339,18 +7382,18 @@ namespace GH153540 {
 // JSON-NEXT:          "id": "0x{{.*}}",
 // JSON-NEXT:          "kind": "CXXRecordDecl",
 // JSON-NEXT:          "loc": {
-// JSON-NEXT:           "offset": 6655,
+// JSON-NEXT:           "offset": 6843,
 // JSON-NEXT:           "col": 41,
 // JSON-NEXT:           "tokLen": 9
 // JSON-NEXT:          },
 // JSON-NEXT:          "range": {
 // JSON-NEXT:           "begin": {
-// JSON-NEXT:            "offset": 6648,
+// JSON-NEXT:            "offset": 6836,
 // JSON-NEXT:            "col": 34,
 // JSON-NEXT:            "tokLen": 6
 // JSON-NEXT:           },
 // JSON-NEXT:           "end": {
-// JSON-NEXT:            "offset": 6655,
+// JSON-NEXT:            "offset": 6843,
 // JSON-NEXT:            "col": 41,
 // JSON-NEXT:            "tokLen": 9
 // JSON-NEXT:           }
@@ -7367,21 +7410,21 @@ namespace GH153540 {
 // JSON-NEXT:      "id": "0x{{.*}}",
 // JSON-NEXT:      "kind": "ClassTemplatePartialSpecializationDecl",
 // JSON-NEXT:      "loc": {
-// JSON-NEXT:       "offset": 6719,
-// JSON-NEXT:       "line": 188,
+// JSON-NEXT:       "offset": 6907,
+// JSON-NEXT:       "line": 191,
 // JSON-NEXT:       "col": 10,
 // JSON-NEXT:       "tokLen": 9
 // JSON-NEXT:      },
 // JSON-NEXT:      "range": {
 // JSON-NEXT:       "begin": {
-// JSON-NEXT:        "offset": 6672,
-// JSON-NEXT:        "line": 187,
+// JSON-NEXT:        "offset": 6860,
+// JSON-NEXT:        "line": 190,
 // JSON-NEXT:        "col": 3,
 // JSON-NEXT:        "tokLen": 8
 // JSON-NEXT:       },
 // JSON-NEXT:       "end": {
-// JSON-NEXT:        "offset": 6753,
-// JSON-NEXT:        "line": 188,
+// JSON-NEXT:        "offset": 6941,
+// JSON-NEXT:        "line": 191,
 // JSON-NEXT:        "col": 44,
 // JSON-NEXT:        "tokLen": 1
 // JSON-NEXT:       }
@@ -7488,12 +7531,12 @@ namespace GH153540 {
 // JSON-NEXT:              "kind": "DeclRefExpr",
 // JSON-NEXT:              "range": {
 // JSON-NEXT:               "begin": {
-// JSON-NEXT:                "offset": 6743,
+// JSON-NEXT:                "offset": 6931,
 // JSON-NEXT:                "col": 34,
 // JSON-NEXT:                "tokLen": 2
 // JSON-NEXT:               },
 // JSON-NEXT:               "end": {
-// JSON-NEXT:                "offset": 6743,
+// JSON-NEXT:                "offset": 6931,
 // JSON-NEXT:                "col": 34,
 // JSON-NEXT:                "tokLen": 2
 // JSON-NEXT:               }
@@ -7527,12 +7570,12 @@ namespace GH153540 {
 // JSON-NEXT:          "kind": "DeclRefExpr",
 // JSON-NEXT:          "range": {
 // JSON-NEXT:           "begin": {
-// JSON-NEXT:            "offset": 6748,
+// JSON-NEXT:            "offset": 6936,
 // JSON-NEXT:            "col": 39,
 // JSON-NEXT:            "tokLen": 2
 // JSON-NEXT:           },
 // JSON-NEXT:           "end": {
-// JSON-NEXT:            "offset": 6748,
+// JSON-NEXT:            "offset": 6936,
 // JSON-NEXT:            "col": 39,
 // JSON-NEXT:            "tokLen": 2
 // JSON-NEXT:           }
@@ -7556,19 +7599,19 @@ namespace GH153540 {
 // JSON-NEXT:        "id": "0x{{.*}}",
 // JSON-NEXT:        "kind": "TemplateTypeParmDecl",
 // JSON-NEXT:        "loc": {
-// JSON-NEXT:         "offset": 6688,
-// JSON-NEXT:         "line": 187,
+// JSON-NEXT:         "offset": 6876,
+// JSON-NEXT:         "line": 190,
 // JSON-NEXT:         "col": 19,
 // JSON-NEXT:         "tokLen": 2
 // JSON-NEXT:        },
 // JSON-NEXT:        "range": {
 // JSON-NEXT:         "begin": {
-// JSON-NEXT:          "offset": 6682,
+// JSON-NEXT:          "offset": 6870,
 // JSON-NEXT:          "col": 13,
 // JSON-NEXT:          "tokLen": 5
 // JSON-NEXT:         },
 // JSON-NEXT:         "end": {
-// JSON-NEXT:          "offset": 6688,
+// JSON-NEXT:          "offset": 6876,
 // JSON-NEXT:          "col": 19,
 // JSON-NEXT:          "tokLen": 2
 // JSON-NEXT:         }
@@ -7583,18 +7626,18 @@ namespace GH153540 {
 // JSON-NEXT:        "id": "0x{{.*}}",
 // JSON-NEXT:        "kind": "NonTypeTemplateParmDecl",
 // JSON-NEXT:        "loc": {
-// JSON-NEXT:         "offset": 6697,
+// JSON-NEXT:         "offset": 6885,
 // JSON-NEXT:         "col": 28,
 // JSON-NEXT:         "tokLen": 2
 // JSON-NEXT:        },
 // JSON-NEXT:        "range": {
 // JSON-NEXT:         "begin": {
-// JSON-NEXT:          "offset": 6692,
+// JSON-NEXT:          "offset": 6880,
 // JSON-NEXT:          "col": 23,
 // JSON-NEXT:          "tokLen": 4
 // JSON-NEXT:         },
 // JSON-NEXT:         "end": {
-// JSON-NEXT:          "offset": 6697,
+// JSON-NEXT:          "offset": 6885,
 // JSON-NEXT:          "col": 28,
 // JSON-NEXT:          "tokLen": 2
 // JSON-NEXT:         }
@@ -7611,18 +7654,18 @@ namespace GH153540 {
 // JSON-NEXT:        "id": "0x{{.*}}",
 // JSON-NEXT:        "kind": "NonTypeTemplateParmDecl",
 // JSON-NEXT:        "loc": {
-// JSON-NEXT:         "offset": 6706,
+// JSON-NEXT:         "offset": 6894,
 // JSON-NEXT:         "col": 37,
 // JSON-NEXT:         "tokLen": 2
 // JSON-NEXT:        },
 // JSON-NEXT:        "range": {
 // JSON-NEXT:         "begin": {
-// JSON-NEXT:          "offset": 6701,
+// JSON-NEXT:          "offset": 6889,
 // JSON-NEXT:          "col": 32,
 // JSON-NEXT:          "tokLen": 4
 // JSON-NEXT:         },
 // JSON-NEXT:         "end": {
-// JSON-NEXT:          "offset": 6706,
+// JSON-NEXT:          "offset": 6894,
 // JSON-NEXT:          "col": 37,
 // JSON-NEXT:          "tokLen": 2
 // JSON-NEXT:         }
@@ -7639,19 +7682,19 @@ namespace GH153540 {
 // JSON-NEXT:        "id": "0x{{.*}}",
 // JSON-NEXT:        "kind": "CXXRecordDecl",
 // JSON-NEXT:        "loc": {
-// JSON-NEXT:         "offset": 6719,
-// JSON-NEXT:         "line": 188,
+// JSON-NEXT:         "offset": 6907,
+// JSON-NEXT:         "line": 191,
 // JSON-NEXT:         "col": 10,
 // JSON-NEXT:         "tokLen": 9
 // JSON-NEXT:        },
 // JSON-NEXT:        "range": {
 // JSON-NEXT:         "begin": {
-// JSON-NEXT:          "offset": 6712,
+// JSON-NEXT:          "offset": 6900,
 // JSON-NEXT:          "col": 3,
 // JSON-NEXT:          "tokLen": 6
 // JSON-NEXT:         },
 // JSON-NEXT:         "end": {
-// JSON-NEXT:          "offset": 6719,
+// JSON-NEXT:          "offset": 6907,
 // JSON-NEXT:          "col": 10,
 // JSON-NEXT:          "tokLen": 9
 // JSON-NEXT:         }
@@ -7666,21 +7709,21 @@ namespace GH153540 {
 // JSON-NEXT:      "id": "0x{{.*}}",
 // JSON-NEXT:      "kind": "ClassTemplatePartialSpecializationDecl",
 // JSON-NEXT:      "loc": {
-// JSON-NEXT:       "offset": 8035,
-// JSON-NEXT:       "line": 206,
+// JSON-NEXT:       "offset": 8223,
+// JSON-NEXT:       "line": 209,
 // JSON-NEXT:       "col": 10,
 // JSON-NEXT:       "tokLen": 9
 // JSON-NEXT:      },
 // JSON-NEXT:      "range": {
 // JSON-NEXT:       "begin": {
-// JSON-NEXT:        "offset": 7985,
-// JSON-NEXT:        "line": 205,
+// JSON-NEXT:        "offset": 8173,
+// JSON-NEXT:        "line": 208,
 // JSON-NEXT:        "col": 3,
 // JSON-NEXT:        "tokLen": 8
 // JSON-NEXT:       },
 // JSON-NEXT:       "end": {
-// JSON-NEXT:        "offset": 8069,
-// JSON-NEXT:        "line": 206,
+// JSON-NEXT:        "offset": 8257,
+// JSON-NEXT:        "line": 209,
 // JSON-NEXT:        "col": 44,
 // JSON-NEXT:        "tokLen": 1
 // JSON-NEXT:       }
@@ -7787,12 +7830,12 @@ namespace GH153540 {
 // JSON-NEXT:              "kind": "DeclRefExpr",
 // JSON-NEXT:              "range": {
 // JSON-NEXT:               "begin": {
-// JSON-NEXT:                "offset": 8059,
+// JSON-NEXT:                "offset": 8247,
 // JSON-NEXT:                "col": 34,
 // JSON-NEXT:                "tokLen": 2
 // JSON-NEXT:               },
 // JSON-NEXT:               "end": {
-// JSON-NEXT:                "offset": 8059,
+// JSON-NEXT:                "offset": 8247,
 // JSON-NEXT:                "col": 34,
 // JSON-NEXT:                "tokLen": 2
 // JSON-NEXT:               }
@@ -7826,12 +7869,12 @@ namespace GH153540 {
 // JSON-NEXT:          "kind": "DeclRefExpr",
 // JSON-NEXT:          "range": {
 // JSON-NEXT:           "begin": {
-// JSON-NEXT:            "offset": 8064,
+// JSON-NEXT:            "offset": 8252,
 // JSON-NEXT:            "col": 39,
 // JSON-NEXT:            "tokLen": 2
 // JSON-NEXT:           },
 // JSON-NEXT:           "end": {
-// JSON-NEXT:            "offset": 8064,
+// JSON-NEXT:            "offset": 8252,
 // JSON-NEXT:            "col": 39,
 // JSON-NEXT:            "tokLen": 2
 // JSON-NEXT:           }
@@ -7855,19 +7898,19 @@ namespace GH153540 {
 // JSON-NEXT:        "id": "0x{{.*}}",
 // JSON-NEXT:        "kind": "TemplateTypeParmDecl",
 // JSON-NEXT:        "loc": {
-// JSON-NEXT:         "offset": 8004,
-// JSON-NEXT:         "line": 205,
+// JSON-NEXT:         "offset": 8192,
+// JSON-NEXT:         "line": 208,
 // JSON-NEXT:         "col": 22,
 // JSON-NEXT:         "tokLen": 2
 // JSON-NEXT:        },
 // JSON-NEXT:        "range": {
 // JSON-NEXT:         "begin": {
-// JSON-NEXT:          "offset": 7995,
+// JSON-NEXT:          "offset": 8183,
 // JSON-NEXT:          "col": 13,
 // JSON-NEXT:          "tokLen": 8
 // JSON-NEXT:         },
 // JSON-NEXT:         "end": {
-// JSON-NEXT:          "offset": 8004,
+// JSON-NEXT:          "offset": 8192,
 // JSON-NEXT:          "col": 22,
 // JSON-NEXT:          "tokLen": 2
 // JSON-NEXT:         }
@@ -7882,18 +7925,18 @@ namespace GH153540 {
 // JSON-NEXT:        "id": "0x{{.*}}",
 // JSON-NEXT:        "kind": "NonTypeTemplateParmDecl",
 // JSON-NEXT:        "loc": {
-// JSON-NEXT:         "offset": 8013,
+// JSON-NEXT:         "offset": 8201,
 // JSON-NEXT:         "col": 31,
 // JSON-NEXT:         "tokLen": 2
 // JSON-NEXT:        },
 // JSON-NEXT:        "range": {
 // JSON-NEXT:         "begin": {
-// JSON-NEXT:          "offset": 8008,
+// JSON-NEXT:          "offset": 8196,
 // JSON-NEXT:          "col": 26,
 // JSON-NEXT:          "tokLen": 4
 // JSON-NEXT:         },
 // JSON-NEXT:         "end": {
-// JSON-NEXT:          "offset": 8013,
+// JSON-NEXT:          "offset": 8201,
 // JSON-NEXT:          "col": 31,
 // JSON-NEXT:          "tokLen": 2
 // JSON-NEXT:         }
@@ -7910,18 +7953,18 @@ namespace GH153540 {
 // JSON-NEXT:        "id": "0x{{.*}}",
 // JSON-NEXT:        "kind": "NonTypeTemplateParmDecl",
 // JSON-NEXT:        "loc": {
-// JSON-NEXT:         "offset": 8022,
+// JSON-NEXT:         "offset": 8210,
 // JSON-NEXT:         "col": 40,
 // JSON-NEXT:         "tokLen": 2
 // JSON-NEXT:        },
 // JSON-NEXT:        "range": {
 // JSON-NEXT:         "begin": {
-// JSON-NEXT:          "offset": 8017,
+// JSON-NEXT:          "offset": 8205,
 // JSON-NEXT:          "col": 35,
 // JSON-NEXT:          "tokLen": 4
 // JSON-NEXT:         },
 // JSON-NEXT:         "end": {
-// JSON-NEXT:          "offset": 8022,
+// JSON-NEXT:          "offset": 8210,
 // JSON-NEXT:          "col": 40,
 // JSON-NEXT:          "tokLen": 2
 // JSON-NEXT:         }
@@ -7938,19 +7981,19 @@ namespace GH153540 {
 // JSON-NEXT:        "id": "0x{{.*}}",
 // JSON-NEXT:        "kind": "CXXRecordDecl",
 // JSON-NEXT:        "loc": {
-// JSON-NEXT:         "offset": 8035,
-// JSON-NEXT:         "line": 206,
+// JSON-NEXT:         "offset": 8223,
+// JSON-NEXT:         "line": 209,
 // JSON-NEXT:         "col": 10,
 // JSON-NEXT:         "tokLen": 9
 // JSON-NEXT:        },
 // JSON-NEXT:        "range": {
 // JSON-NEXT:         "begin": {
-// JSON-NEXT:          "offset": 8028,
+// JSON-NEXT:          "offset": 8216,
 // JSON-NEXT:          "col": 3,
 // JSON-NEXT:          "tokLen": 6
 // JSON-NEXT:         },
 // JSON-NEXT:         "end": {
-// JSON-NEXT:          "offset": 8035,
+// JSON-NEXT:          "offset": 8223,
 // JSON-NEXT:          "col": 10,
 // JSON-NEXT:          "tokLen": 9
 // JSON-NEXT:         }
@@ -7967,20 +8010,20 @@ namespace GH153540 {
 // JSON-NEXT:    "id": "0x{{.*}}",
 // JSON-NEXT:    "kind": "NamespaceDecl",
 // JSON-NEXT:    "loc": {
-// JSON-NEXT:     "offset": 9382,
-// JSON-NEXT:     "line": 224,
+// JSON-NEXT:     "offset": 9570,
+// JSON-NEXT:     "line": 227,
 // JSON-NEXT:     "col": 11,
 // JSON-NEXT:     "tokLen": 8
 // JSON-NEXT:    },
 // JSON-NEXT:    "range": {
 // JSON-NEXT:     "begin": {
-// JSON-NEXT:      "offset": 9372,
+// JSON-NEXT:      "offset": 9560,
 // JSON-NEXT:      "col": 1,
 // JSON-NEXT:      "tokLen": 9
 // JSON-NEXT:     },
 // JSON-NEXT:     "end": {
-// JSON-NEXT:      "offset": 9791,
-// JSON-NEXT:      "line": 238,
+// JSON-NEXT:      "offset": 9979,
+// JSON-NEXT:      "line": 241,
 // JSON-NEXT:      "col": 1,
 // JSON-NEXT:      "tokLen": 1
 // JSON-NEXT:     }
@@ -7991,20 +8034,20 @@ namespace GH153540 {
 // JSON-NEXT:      "id": "0x{{.*}}",
 // JSON-NEXT:      "kind": "NamespaceDecl",
 // JSON-NEXT:      "loc": {
-// JSON-NEXT:       "offset": 9456,
-// JSON-NEXT:       "line": 227,
+// JSON-NEXT:       "offset": 9644,
+// JSON-NEXT:       "line": 230,
 // JSON-NEXT:       "col": 13,
 // JSON-NEXT:       "tokLen": 1
 // JSON-NEXT:      },
 // JSON-NEXT:      "range": {
 // JSON-NEXT:       "begin": {
-// JSON-NEXT:        "offset": 9446,
+// JSON-NEXT:        "offset": 9634,
 // JSON-NEXT:        "col": 3,
 // JSON-NEXT:        "tokLen": 9
 // JSON-NEXT:       },
 // JSON-NEXT:       "end": {
-// JSON-NEXT:        "offset": 9507,
-// JSON-NEXT:        "line": 229,
+// JSON-NEXT:        "offset": 9695,
+// JSON-NEXT:        "line": 232,
 // JSON-NEXT:        "col": 3,
 // JSON-NEXT:        "tokLen": 1
 // JSON-NEXT:       }
@@ -8015,19 +8058,19 @@ namespace GH153540 {
 // JSON-NEXT:        "id": "0x{{.*}}",
 // JSON-NEXT:        "kind": "ClassTemplateDecl",
 // JSON-NEXT:        "loc": {
-// JSON-NEXT:         "offset": 9492,
-// JSON-NEXT:         "line": 228,
+// JSON-NEXT:         "offset": 9680,
+// JSON-NEXT:         "line": 231,
 // JSON-NEXT:         "col": 33,
 // JSON-NEXT:         "tokLen": 1
 // JSON-NEXT:        },
 // JSON-NEXT:        "range": {
 // JSON-NEXT:         "begin": {
-// JSON-NEXT:          "offset": 9464,
+// JSON-NEXT:          "offset": 9652,
 // JSON-NEXT:          "col": 5,
 // JSON-NEXT:          "tokLen": 8
 // JSON-NEXT:         },
 // JSON-NEXT:         "end": {
-// JSON-NEXT:          "offset": 9502,
+// JSON-NEXT:          "offset": 9690,
 // JSON-NEXT:          "col": 43,
 // JSON-NEXT:          "tokLen": 1
 // JSON-NEXT:         }
@@ -8038,18 +8081,18 @@ namespace GH153540 {
 // JSON-NEXT:          "id": "0x{{.*}}",
 // JSON-NEXT:          "kind": "TemplateTypeParmDecl",
 // JSON-NEXT:          "loc": {
-// JSON-NEXT:           "offset": 9482,
+// JSON-NEXT:           "offset": 9670,
 // JSON-NEXT:           "col": 23,
 // JSON-NEXT:           "tokLen": 1
 // JSON-NEXT:          },
 // JSON-NEXT:          "range": {
 // JSON-NEXT:           "begin": {
-// JSON-NEXT:            "offset": 9473,
+// JSON-NEXT:            "offset": 9661,
 // JSON-NEXT:            "col": 14,
 // JSON-NEXT:            "tokLen": 8
 // JSON-NEXT:           },
 // JSON-NEXT:           "end": {
-// JSON-NEXT:            "offset": 9482,
+// JSON-NEXT:            "offset": 9670,
 // JSON-NEXT:            "col": 23,
 // JSON-NEXT:            "tokLen": 1
 // JSON-NEXT:           }
@@ -8064,18 +8107,18 @@ namespace GH153540 {
 // JSON-NEXT:          "id": "0x{{.*}}",
 // JSON-NEXT:          "kind": "CXXRecordDecl",
 // JSON-NEXT:          "loc": {
-// JSON-NEXT:           "offset": 9492,
+// JSON-NEXT:           "offset": 9680,
 // JSON-NEXT:           "col": 33,
 // JSON-NEXT:           "tokLen": 1
 // JSON-NEXT:          },
 // JSON-NEXT:          "range": {
 // JSON-NEXT:           "begin": {
-// JSON-NEXT:            "offset": 9485,
+// JSON-NEXT:            "offset": 9673,
 // JSON-NEXT:            "col": 26,
 // JSON-NEXT:            "tokLen": 6
 // JSON-NEXT:           },
 // JSON-NEXT:           "end": {
-// JSON-NEXT:            "offset": 9502,
+// JSON-NEXT:            "offset": 9690,
 // JSON-NEXT:            "col": 43,
 // JSON-NEXT:            "tokLen": 1
 // JSON-NEXT:           }
@@ -8130,18 +8173,18 @@ namespace GH153540 {
 // JSON-NEXT:            "id": "0x{{.*}}",
 // JSON-NEXT:            "kind": "CXXRecordDecl",
 // JSON-NEXT:            "loc": {
-// JSON-NEXT:             "offset": 9492,
+// JSON-NEXT:             "offset": 9680,
 // JSON-NEXT:             "col": 33,
 // JSON-NEXT:             "tokLen": 1
 // JSON-NEXT:            },
 // JSON-NEXT:            "range": {
 // JSON-NEXT:             "begin": {
-// JSON-NEXT:              "offset": 9485,
+// JSON-NEXT:              "offset": 9673,
 // JSON-NEXT:              "col": 26,
 // JSON-NEXT:              "tokLen": 6
 // JSON-NEXT:             },
 // JSON-NEXT:             "end": {
-// JSON-NEXT:              "offset": 9492,
+// JSON-NEXT:              "offset": 9680,
 // JSON-NEXT:              "col": 33,
 // JSON-NEXT:              "tokLen": 1
 // JSON-NEXT:             }
@@ -8154,18 +8197,18 @@ namespace GH153540 {
 // JSON-NEXT:            "id": "0x{{.*}}",
 // JSON-NEXT:            "kind": "CXXConstructorDecl",
 // JSON-NEXT:            "loc": {
-// JSON-NEXT:             "offset": 9496,
+// JSON-NEXT:             "offset": 9684,
 // JSON-NEXT:             "col": 37,
 // JSON-NEXT:             "tokLen": 1
 // JSON-NEXT:            },
 // JSON-NEXT:            "range": {
 // JSON-NEXT:             "begin": {
-// JSON-NEXT:              "offset": 9496,
+// JSON-NEXT:              "offset": 9684,
 // JSON-NEXT:              "col": 37,
 // JSON-NEXT:              "tokLen": 1
 // JSON-NEXT:             },
 // JSON-NEXT:             "end": {
-// JSON-NEXT:              "offset": 9499,
+// JSON-NEXT:              "offset": 9687,
 // JSON-NEXT:              "col": 40,
 // JSON-NEXT:              "tokLen": 1
 // JSON-NEXT:             }
@@ -8179,18 +8222,18 @@ namespace GH153540 {
 // JSON-NEXT:              "id": "0x{{.*}}",
 // JSON-NEXT:              "kind": "ParmVarDecl",
 // JSON-NEXT:              "loc": {
-// JSON-NEXT:               "offset": 9499,
+// JSON-NEXT:               "offset": 9687,
 // JSON-NEXT:               "col": 40,
 // JSON-NEXT:               "tokLen": 1
 // JSON-NEXT:              },
 // JSON-NEXT:              "range": {
 // JSON-NEXT:               "begin": {
-// JSON-NEXT:                "offset": 9498,
+// JSON-NEXT:                "offset": 9686,
 // JSON-NEXT:                "col": 39,
 // JSON-NEXT:                "tokLen": 1
 // JSON-NEXT:               },
 // JSON-NEXT:               "end": {
-// JSON-NEXT:                "offset": 9498,
+// JSON-NEXT:                "offset": 9686,
 // JSON-NEXT:                "col": 39,
 // JSON-NEXT:                "tokLen": 1
 // JSON-NEXT:               }
@@ -8207,18 +8250,18 @@ namespace GH153540 {
 // JSON-NEXT:          "id": "0x{{.*}}",
 // JSON-NEXT:          "kind": "ClassTemplateSpecializationDecl",
 // JSON-NEXT:          "loc": {
-// JSON-NEXT:           "offset": 9492,
+// JSON-NEXT:           "offset": 9680,
 // JSON-NEXT:           "col": 33,
 // JSON-NEXT:           "tokLen": 1
 // JSON-NEXT:          },
 // JSON-NEXT:          "range": {
 // JSON-NEXT:           "begin": {
-// JSON-NEXT:            "offset": 9464,
+// JSON-NEXT:            "offset": 9652,
 // JSON-NEXT:            "col": 5,
 // JSON-NEXT:            "tokLen": 8
 // JSON-NEXT:           },
 // JSON-NEXT:           "end": {
-// JSON-NEXT:            "offset": 9502,
+// JSON-NEXT:            "offset": 9690,
 // JSON-NEXT:            "col": 43,
 // JSON-NEXT:            "tokLen": 1
 // JSON-NEXT:           }
@@ -8286,18 +8329,18 @@ namespace GH153540 {
 // JSON-NEXT:            "id": "0x{{.*}}",
 // JSON-NEXT:            "kind": "CXXRecordDecl",
 // JSON-NEXT:            "loc": {
-// JSON-NEXT:             "offset": 9492,
+// JSON-NEXT:             "offset": 9680,
 // JSON-NEXT:             "col": 33,
 // JSON-NEXT:             "tokLen": 1
 // JSON-NEXT:            },
 // JSON-NEXT:            "range": {
 // JSON-NEXT:             "begin": {
-// JSON-NEXT:              "offset": 9485,
+// JSON-NEXT:              "offset": 9673,
 // JSON-NEXT:              "col": 26,
 // JSON-NEXT:              "tokLen": 6
 // JSON-NEXT:             },
 // JSON-NEXT:             "end": {
-// JSON-NEXT:              "offset": 9492,
+// JSON-NEXT:              "offset": 9680,
 // JSON-NEXT:              "col": 33,
 // JSON-NEXT:              "tokLen": 1
 // JSON-NEXT:             }
@@ -8310,18 +8353,18 @@ namespace GH153540 {
 // JSON-NEXT:            "id": "0x{{.*}}",
 // JSON-NEXT:            "kind": "CXXConstructorDecl",
 // JSON-NEXT:            "loc": {
-// JSON-NEXT:             "offset": 9496,
+// JSON-NEXT:             "offset": 9684,
 // JSON-NEXT:             "col": 37,
 // JSON-NEXT:             "tokLen": 1
 // JSON-NEXT:            },
 // JSON-NEXT:            "range": {
 // JSON-NEXT:             "begin": {
-// JSON-NEXT:              "offset": 9496,
+// JSON-NEXT:              "offset": 9684,
 // JSON-NEXT:              "col": 37,
 // JSON-NEXT:              "tokLen": 1
 // JSON-NEXT:             },
 // JSON-NEXT:             "end": {
-// JSON-NEXT:              "offset": 9499,
+// JSON-NEXT:              "offset": 9687,
 // JSON-NEXT:              "col": 40,
 // JSON-NEXT:              "tokLen": 1
 // JSON-NEXT:             }
@@ -8337,18 +8380,18 @@ namespace GH153540 {
 // JSON-NEXT:              "id": "0x{{.*}}",
 // JSON-NEXT:              "kind": "ParmVarDecl",
 // JSON-NEXT:              "loc": {
-// JSON-NEXT:               "offset": 9499,
+// JSON-NEXT:               "offset": 9687,
 // JSON-NEXT:               "col": 40,
 // JSON-NEXT:               "tokLen": 1
 // JSON-NEXT:              },
 // JSON-NEXT:              "range": {
 // JSON-NEXT:               "begin": {
-// JSON-NEXT:                "offset": 9498,
+// JSON-NEXT:                "offset": 9686,
 // JSON-NEXT:                "col": 39,
 // JSON-NEXT:                "tokLen": 1
 // JSON-NEXT:               },
 // JSON-NEXT:               "end": {
-// JSON-NEXT:                "offset": 9498,
+// JSON-NEXT:                "offset": 9686,
 // JSON-NEXT:                "col": 39,
 // JSON-NEXT:                "tokLen": 1
 // JSON-NEXT:               }
@@ -8363,18 +8406,18 @@ namespace GH153540 {
 // JSON-NEXT:            "id": "0x{{.*}}",
 // JSON-NEXT:            "kind": "CXXConstructorDecl",
 // JSON-NEXT:            "loc": {
-// JSON-NEXT:             "offset": 9492,
+// JSON-NEXT:             "offset": 9680,
 // JSON-NEXT:             "col": 33,
 // JSON-NEXT:             "tokLen": 1
 // JSON-NEXT:            },
 // JSON-NEXT:            "range": {
 // JSON-NEXT:             "begin": {
-// JSON-NEXT:              "offset": 9492,
+// JSON-NEXT:              "offset": 9680,
 // JSON-NEXT:              "col": 33,
 // JSON-NEXT:              "tokLen": 1
 // JSON-NEXT:             },
 // JSON-NEXT:             "end": {
-// JSON-NEXT:              "offset": 9492,
+// JSON-NEXT:              "offset": 9680,
 // JSON-NEXT:              "col": 33,
 // JSON-NEXT:              "tokLen": 1
 // JSON-NEXT:             }
@@ -8393,18 +8436,18 @@ namespace GH153540 {
 // JSON-NEXT:              "id": "0x{{.*}}",
 // JSON-NEXT:              "kind": "ParmVarDecl",
 // JSON-NEXT:              "loc": {
-// JSON-NEXT:               "offset": 9492,
+// JSON-NEXT:               "offset": 9680,
 // JSON-NEXT:               "col": 33,
 // JSON-NEXT:               "tokLen": 1
 // JSON-NEXT:              },
 // JSON-NEXT:              "range": {
 // JSON-NEXT:               "begin": {
-// JSON-NEXT:                "offset": 9492,
+// JSON-NEXT:                "offset": 9680,
 // JSON-NEXT:                "col": 33,
 // JSON-NEXT:                "tokLen": 1
 // JSON-NEXT:               },
 // JSON-NEXT:               "end": {
-// JSON-NEXT:                "offset": 9492,
+// JSON-NEXT:                "offset": 9680,
 // JSON-NEXT:                "col": 33,
 // JSON-NEXT:                "tokLen": 1
 // JSON-NEXT:               }
@@ -8419,18 +8462,18 @@ namespace GH153540 {
 // JSON-NEXT:            "id": "0x{{.*}}",
 // JSON-NEXT:            "kind": "CXXConstructorDecl",
 // JSON-NEXT:            "loc": {
-// JSON-NEXT:             "offset": 9492,
+// JSON-NEXT:             "offset": 9680,
 // JSON-NEXT:             "col": 33,
 // JSON-NEXT:             "tokLen": 1
 // JSON-NEXT:            },
 // JSON-NEXT:            "range": {
 // JSON-NEXT:             "begin": {
-// JSON-NEXT:              "offset": 9492,
+// JSON-NEXT:              "offset": 9680,
 // JSON-NEXT:              "col": 33,
 // JSON-NEXT:              "tokLen": 1
 // JSON-NEXT:             },
 // JSON-NEXT:             "end": {
-// JSON-NEXT:              "offset": 9492,
+// JSON-NEXT:              "offset": 9680,
 // JSON-NEXT:              "col": 33,
 // JSON-NEXT:              "tokLen": 1
 // JSON-NEXT:             }
@@ -8449,18 +8492,18 @@ namespace GH153540 {
 // JSON-NEXT:              "id": "0x{{.*}}",
 // JSON-NEXT:              "kind": "ParmVarDecl",
 // JSON-NEXT:              "loc": {
-// JSON-NEXT:               "offset": 9492,
+// JSON-NEXT:               "offset": 9680,
 // JSON-NEXT:               "col": 33,
 // JSON-NEXT:               "tokLen": 1
 // JSON-NEXT:              },
 // JSON-NEXT:              "range": {
 // JSON-NEXT:               "begin": {
-// JSON-NEXT:                "offset": 9492,
+// JSON-NEXT:                "offset": 9680,
 // JSON-NEXT:                "col": 33,
 // JSON-NEXT:                "tokLen": 1
 // JSON-NEXT:               },
 // JSON-NEXT:               "end": {
-// JSON-NEXT:                "offset": 9492,
+// JSON-NEXT:                "offset": 9680,
 // JSON-NEXT:                "col": 33,
 // JSON-NEXT:                "tokLen": 1
 // JSON-NEXT:               }
@@ -8475,18 +8518,18 @@ namespace GH153540 {
 // JSON-NEXT:            "id": "0x{{.*}}",
 // JSON-NEXT:            "kind": "CXXDestructorDecl",
 // JSON-NEXT:            "loc": {
-// JSON-NEXT:             "offset": 9492,
+// JSON-NEXT:             "offset": 9680,
 // JSON-NEXT:             "col": 33,
 // JSON-NEXT:             "tokLen": 1
 // JSON-NEXT:            },
 // JSON-NEXT:            "range": {
 // JSON-NEXT:             "begin": {
-// JSON-NEXT:              "offset": 9492,
+// JSON-NEXT:              "offset": 9680,
 // JSON-NEXT:              "col": 33,
 // JSON-NEXT:              "tokLen": 1
 // JSON-NEXT:             },
 // JSON-NEXT:             "end": {
-// JSON-NEXT:              "offset": 9492,
+// JSON-NEXT:              "offset": 9680,
 // JSON-NEXT:              "col": 33,
 // JSON-NEXT:              "tokLen": 1
 // JSON-NEXT:             }
@@ -8510,18 +8553,18 @@ namespace GH153540 {
 // JSON-NEXT:        "id": "0x{{.*}}",
 // JSON-NEXT:        "kind": "FunctionTemplateDecl",
 // JSON-NEXT:        "loc": {
-// JSON-NEXT:         "offset": 9496,
+// JSON-NEXT:         "offset": 9684,
 // JSON-NEXT:         "col": 37,
 // JSON-NEXT:         "tokLen": 1
 // JSON-NEXT:        },
 // JSON-NEXT:        "range": {
 // JSON-NEXT:         "begin": {
-// JSON-NEXT:          "offset": 9464,
+// JSON-NEXT:          "offset": 9652,
 // JSON-NEXT:          "col": 5,
 // JSON-NEXT:          "tokLen": 8
 // JSON-NEXT:         },
 // JSON-NEXT:         "end": {
-// JSON-NEXT:          "offset": 9499,
+// JSON-NEXT:          "offset": 9687,
 // JSON-NEXT:          "col": 40,
 // JSON-NEXT:          "tokLen": 1
 // JSON-NEXT:         }
@@ -8533,18 +8576,18 @@ namespace GH153540 {
 // JSON-NEXT:          "id": "0x{{.*}}",
 // JSON-NEXT:          "kind": "TemplateTypeParmDecl",
 // JSON-NEXT:          "loc": {
-// JSON-NEXT:           "offset": 9482,
+// JSON-NEXT:           "offset": 9670,
 // JSON-NEXT:           "col": 23,
 // JSON-NEXT:           "tokLen": 1
 // JSON-NEXT:          },
 // JSON-NEXT:          "range": {
 // JSON-NEXT:           "begin": {
-// JSON-NEXT:            "offset": 9473,
+// JSON-NEXT:            "offset": 9661,
 // JSON-NEXT:            "col": 14,
 // JSON-NEXT:            "tokLen": 8
 // JSON-NEXT:           },
 // JSON-NEXT:           "end": {
-// JSON-NEXT:            "offset": 9482,
+// JSON-NEXT:            "offset": 9670,
 // JSON-NEXT:            "col": 23,
 // JSON-NEXT:            "tokLen": 1
 // JSON-NEXT:           }
@@ -8559,18 +8602,18 @@ namespace GH153540 {
 // JSON-NEXT:          "id": "0x{{.*}}",
 // JSON-NEXT:          "kind": "CXXDeductionGuideDecl",
 // JSON-NEXT:          "loc": {
-// JSON-NEXT:           "offset": 9496,
+// JSON-NEXT:           "offset": 9684,
 // JSON-NEXT:           "col": 37,
 // JSON-NEXT:           "tokLen": 1
 // JSON-NEXT:          },
 // JSON-NEXT:          "range": {
 // JSON-NEXT:           "begin": {
-// JSON-NEXT:            "offset": 9496,
+// JSON-NEXT:            "offset": 9684,
 // JSON-NEXT:            "col": 37,
 // JSON-NEXT:            "tokLen": 1
 // JSON-NEXT:           },
 // JSON-NEXT:           "end": {
-// JSON-NEXT:            "offset": 9499,
+// JSON-NEXT:            "offset": 9687,
 // JSON-NEXT:            "col": 40,
 // JSON-NEXT:            "tokLen": 1
 // JSON-NEXT:           }
@@ -8585,18 +8628,18 @@ namespace GH153540 {
 // JSON-NEXT:            "id": "0x{{.*}}",
 // JSON-NEXT:            "kind": "ParmVarDecl",
 // JSON-NEXT:            "loc": {
-// JSON-NEXT:             "offset": 9499,
+// JSON-NEXT:             "offset": 9687,
 // JSON-NEXT:             "col": 40,
 // JSON-NEXT:             "tokLen": 1
 // JSON-NEXT:            },
 // JSON-NEXT:            "range": {
 // JSON-NEXT:             "begin": {
-// JSON-NEXT:              "offset": 9498,
+// JSON-NEXT:              "offset": 9686,
 // JSON-NEXT:              "col": 39,
 // JSON-NEXT:              "tokLen": 1
 // JSON-NEXT:             },
 // JSON-NEXT:             "end": {
-// JSON-NEXT:              "offset": 9498,
+// JSON-NEXT:              "offset": 9686,
 // JSON-NEXT:              "col": 39,
 // JSON-NEXT:              "tokLen": 1
 // JSON-NEXT:             }
@@ -8611,18 +8654,18 @@ namespace GH153540 {
 // JSON-NEXT:          "id": "0x{{.*}}",
 // JSON-NEXT:          "kind": "CXXDeductionGuideDecl",
 // JSON-NEXT:          "loc": {
-// JSON-NEXT:           "offset": 9496,
+// JSON-NEXT:           "offset": 9684,
 // JSON-NEXT:           "col": 37,
 // JSON-NEXT:           "tokLen": 1
 // JSON-NEXT:          },
 // JSON-NEXT:          "range": {
 // JSON-NEXT:           "begin": {
-// JSON-NEXT:            "offset": 9496,
+// JSON-NEXT:            "offset": 9684,
 // JSON-NEXT:            "col": 37,
 // JSON-NEXT:            "tokLen": 1
 // JSON-NEXT:           },
 // JSON-NEXT:           "end": {
-// JSON-NEXT:            "offset": 9499,
+// JSON-NEXT:            "offset": 9687,
 // JSON-NEXT:            "col": 40,
 // JSON-NEXT:            "tokLen": 1
 // JSON-NEXT:           }
@@ -8653,18 +8696,18 @@ namespace GH153540 {
 // JSON-NEXT:            "id": "0x{{.*}}",
 // JSON-NEXT:            "kind": "ParmVarDecl",
 // JSON-NEXT:            "loc": {
-// JSON-NEXT:             "offset": 9499,
+// JSON-NEXT:             "offset": 9687,
 // JSON-NEXT:             "col": 40,
 // JSON-NEXT:             "tokLen": 1
 // JSON-NEXT:            },
 // JSON-NEXT:            "range": {
 // JSON-NEXT:             "begin": {
-// JSON-NEXT:              "offset": 9498,
+// JSON-NEXT:              "offset": 9686,
 // JSON-NEXT:              "col": 39,
 // JSON-NEXT:              "tokLen": 1
 // JSON-NEXT:             },
 // JSON-NEXT:             "end": {
-// JSON-NEXT:              "offset": 9498,
+// JSON-NEXT:              "offset": 9686,
 // JSON-NEXT:              "col": 39,
 // JSON-NEXT:              "tokLen": 1
 // JSON-NEXT:             }
@@ -8681,18 +8724,18 @@ namespace GH153540 {
 // JSON-NEXT:        "id": "0x{{.*}}",
 // JSON-NEXT:        "kind": "FunctionTemplateDecl",
 // JSON-NEXT:        "loc": {
-// JSON-NEXT:         "offset": 9492,
+// JSON-NEXT:         "offset": 9680,
 // JSON-NEXT:         "col": 33,
 // JSON-NEXT:         "tokLen": 1
 // JSON-NEXT:        },
 // JSON-NEXT:        "range": {
 // JSON-NEXT:         "begin": {
-// JSON-NEXT:          "offset": 9464,
+// JSON-NEXT:          "offset": 9652,
 // JSON-NEXT:          "col": 5,
 // JSON-NEXT:          "tokLen": 8
 // JSON-NEXT:         },
 // JSON-NEXT:         "end": {
-// JSON-NEXT:          "offset": 9492,
+// JSON-NEXT:          "offset": 9680,
 // JSON-NEXT:          "col": 33,
 // JSON-NEXT:          "tokLen": 1
 // JSON-NEXT:         }
@@ -8704,18 +8747,18 @@ namespace GH153540 {
 // JSON-NEXT:          "id": "0x{{.*}}",
 // JSON-NEXT:          "kind": "TemplateTypeParmDecl",
 // JSON-NEXT:          "loc": {
-// JSON-NEXT:           "offset": 9482,
+// JSON-NEXT:           "offset": 9670,
 // JSON-NEXT:           "col": 23,
 // JSON-NEXT:           "tokLen": 1
 // JSON-NEXT:          },
 // JSON-NEXT:          "range": {
 // JSON-NEXT:           "begin": {
-// JSON-NEXT:            "offset": 9473,
+// JSON-NEXT:            "offset": 9661,
 // JSON-NEXT:            "col": 14,
 // JSON-NEXT:            "tokLen": 8
 // JSON-NEXT:           },
 // JSON-NEXT:           "end": {
-// JSON-NEXT:            "offset": 9482,
+// JSON-NEXT:            "offset": 9670,
 // JSON-NEXT:            "col": 23,
 // JSON-NEXT:            "tokLen": 1
 // JSON-NEXT:           }
@@ -8730,18 +8773,18 @@ namespace GH153540 {
 // JSON-NEXT:          "id": "0x{{.*}}",
 // JSON-NEXT:          "kind": "CXXDeductionGuideDecl",
 // JSON-NEXT:          "loc": {
-// JSON-NEXT:           "offset": 9492,
+// JSON-NEXT:           "offset": 9680,
 // JSON-NEXT:           "col": 33,
 // JSON-NEXT:           "tokLen": 1
 // JSON-NEXT:          },
 // JSON-NEXT:          "range": {
 // JSON-NEXT:           "begin": {
-// JSON-NEXT:            "offset": 9492,
+// JSON-NEXT:            "offset": 9680,
 // JSON-NEXT:            "col": 33,
 // JSON-NEXT:            "tokLen": 1
 // JSON-NEXT:           },
 // JSON-NEXT:           "end": {
-// JSON-NEXT:            "offset": 9492,
+// JSON-NEXT:            "offset": 9680,
 // JSON-NEXT:            "col": 33,
 // JSON-NEXT:            "tokLen": 1
 // JSON-NEXT:           }
@@ -8756,18 +8799,18 @@ namespace GH153540 {
 // JSON-NEXT:            "id": "0x{{.*}}",
 // JSON-NEXT:            "kind": "ParmVarDecl",
 // JSON-NEXT:            "loc": {
-// JSON-NEXT:             "offset": 9492,
+// JSON-NEXT:             "offset": 9680,
 // JSON-NEXT:             "col": 33,
 // JSON-NEXT:             "tokLen": 1
 // JSON-NEXT:            },
 // JSON-NEXT:            "range": {
 // JSON-NEXT:             "begin": {
-// JSON-NEXT:              "offset": 9492,
+// JSON-NEXT:              "offset": 9680,
 // JSON-NEXT:              "col": 33,
 // JSON-NEXT:              "tokLen": 1
 // JSON-NEXT:             },
 // JSON-NEXT:             "end": {
-// JSON-NEXT:              "offset": 9492,
+// JSON-NEXT:              "offset": 9680,
 // JSON-NEXT:              "col": 33,
 // JSON-NEXT:              "tokLen": 1
 // JSON-NEXT:             }
@@ -8786,20 +8829,20 @@ namespace GH153540 {
 // JSON-NEXT:      "id": "0x{{.*}}",
 // JSON-NEXT:      "kind": "FunctionDecl",
 // JSON-NEXT:      "loc": {
-// JSON-NEXT:       "offset": 9516,
-// JSON-NEXT:       "line": 230,
+// JSON-NEXT:       "offset": 9704,
+// JSON-NEXT:       "line": 233,
 // JSON-NEXT:       "col": 8,
 // JSON-NEXT:       "tokLen": 1
 // JSON-NEXT:      },
 // JSON-NEXT:      "range": {
 // JSON-NEXT:       "begin": {
-// JSON-NEXT:        "offset": 9511,
+// JSON-NEXT:        "offset": 9699,
 // JSON-NEXT:        "col": 3,
 // JSON-NEXT:        "tokLen": 4
 // JSON-NEXT:       },
 // JSON-NEXT:       "end": {
-// JSON-NEXT:        "offset": 9537,
-// JSON-NEXT:        "line": 232,
+// JSON-NEXT:        "offset": 9725,
+// JSON-NEXT:        "line": 235,
 // JSON-NEXT:        "col": 3,
 // JSON-NEXT:        "tokLen": 1
 // JSON-NEXT:       }
@@ -8815,14 +8858,14 @@ namespace GH153540 {
 // JSON-NEXT:        "kind": "CompoundStmt",
 // JSON-NEXT:        "range": {
 // JSON-NEXT:         "begin": {
-// JSON-NEXT:          "offset": 9520,
-// JSON-NEXT:          "line": 230,
+// JSON-NEXT:          "offset": 9708,
+// JSON-NEXT:          "line": 233,
 // JSON-NEXT:          "col": 12,
 // JSON-NEXT:          "tokLen": 1
 // JSON-NEXT:         },
 // JSON-NEXT:         "end": {
-// JSON-NEXT:          "offset": 9537,
-// JSON-NEXT:          "line": 232,
+// JSON-NEXT:          "offset": 9725,
+// JSON-NEXT:          "line": 235,
 // JSON-NEXT:          "col": 3,
 // JSON-NEXT:          "tokLen": 1
 // JSON-NEXT:         }
@@ -8833,13 +8876,13 @@ namespace GH153540 {
 // JSON-NEXT:          "kind": "CXXFunctionalCastExpr",
 // JSON-NEXT:          "range": {
 // JSON-NEXT:           "begin": {
-// JSON-NEXT:            "offset": 9526,
-// JSON-NEXT:            "line": 231,
+// JSON-NEXT:            "offset": 9714,
+// JSON-NEXT:            "line": 234,
 // JSON-NEXT:            "col": 5,
 // JSON-NEXT:            "tokLen": 1
 // JSON-NEXT:           },
 // JSON-NEXT:           "end": {
-// JSON-NEXT:            "offset": 9532,
+// JSON-NEXT:            "offset": 9720,
 // JSON-NEXT:            "col": 11,
 // JSON-NEXT:            "tokLen": 1
 // JSON-NEXT:           }
@@ -8864,12 +8907,12 @@ namespace GH153540 {
 // JSON-NEXT:            "kind": "CXXConstructExpr",
 // JSON-NEXT:            "range": {
 // JSON-NEXT:             "begin": {
-// JSON-NEXT:              "offset": 9526,
+// JSON-NEXT:              "offset": 9714,
 // JSON-NEXT:              "col": 5,
 // JSON-NEXT:              "tokLen": 1
 // JSON-NEXT:             },
 // JSON-NEXT:             "end": {
-// JSON-NEXT:              "offset": 9532,
+// JSON-NEXT:              "offset": 9720,
 // JSON-NEXT:              "col": 11,
 // JSON-NEXT:              "tokLen": 1
 // JSON-NEXT:             }
@@ -8890,12 +8933,12 @@ namespace GH153540 {
 // JSON-NEXT:              "kind": "IntegerLiteral",
 // JSON-NEXT:              "range": {
 // JSON-NEXT:               "begin": {
-// JSON-NEXT:                "offset": 9531,
+// JSON-NEXT:                "offset": 9719,
 // JSON-NEXT:                "col": 10,
 // JSON-NEXT:                "tokLen": 1
 // JSON-NEXT:               },
 // JSON-NEXT:               "end": {
-// JSON-NEXT:                "offset": 9531,
+// JSON-NEXT:                "offset": 9719,
 // JSON-NEXT:                "col": 10,
 // JSON-NEXT:                "tokLen": 1
 // JSON-NEXT:               }
@@ -8915,6 +8958,282 @@ namespace GH153540 {
 // JSON-NEXT:      ]
 // JSON-NEXT:     }
 // JSON-NEXT:    ]
+// JSON-NEXT:   },
+// JSON-NEXT:   {
+// JSON-NEXT:    "id": "0x{{.*}}",
+// JSON-NEXT:    "kind": "NamespaceDecl",
+// JSON-NEXT:    "loc": {
+// JSON-NEXT:     "offset": 10014,
+// JSON-NEXT:     "line": 243,
+// JSON-NEXT:     "col": 11,
+// JSON-NEXT:     "tokLen": 40
+// JSON-NEXT:    },
+// JSON-NEXT:    "range": {
+// JSON-NEXT:     "begin": {
+// JSON-NEXT:      "offset": 10004,
+// JSON-NEXT:      "col": 1,
+// JSON-NEXT:      "tokLen": 9
+// JSON-NEXT:     },
+// JSON-NEXT:     "end": {
+// JSON-NEXT:      "offset": 11286,
+// JSON-NEXT:      "line": 263,
+// JSON-NEXT:      "col": 1,
+// JSON-NEXT:      "tokLen": 1
+// JSON-NEXT:     }
+// JSON-NEXT:    },
+// JSON-NEXT:    "name": "AliasDependentTemplateSpecializationType",
+// JSON-NEXT:    "inner": [
+// JSON-NEXT:     {
+// JSON-NEXT:      "id": "0x{{.*}}",
+// JSON-NEXT:      "kind": "TypeAliasTemplateDecl",
+// JSON-NEXT:      "loc": {
+// JSON-NEXT:       "offset": 10179,
+// JSON-NEXT:       "line": 246,
+// JSON-NEXT:       "col": 38,
+// JSON-NEXT:       "tokLen": 5
+// JSON-NEXT:      },
+// JSON-NEXT:      "range": {
+// JSON-NEXT:       "begin": {
+// JSON-NEXT:        "offset": 10144,
+// JSON-NEXT:        "col": 3,
+// JSON-NEXT:        "tokLen": 8
+// JSON-NEXT:       },
+// JSON-NEXT:       "end": {
+// JSON-NEXT:        "offset": 10196,
+// JSON-NEXT:        "col": 55,
+// JSON-NEXT:        "tokLen": 1
+// JSON-NEXT:       }
+// JSON-NEXT:      },
+// JSON-NEXT:      "name": "T1",
+// JSON-NEXT:      "inner": [
+// JSON-NEXT:       {
+// JSON-NEXT:        "id": "0x{{.*}}",
+// JSON-NEXT:        "kind": "TemplateTemplateParmDecl",
+// JSON-NEXT:        "loc": {
+// JSON-NEXT:         "offset": 10175,
+// JSON-NEXT:         "col": 34,
+// JSON-NEXT:         "tokLen": 2
+// JSON-NEXT:        },
+// JSON-NEXT:        "range": {
+// JSON-NEXT:         "begin": {
+// JSON-NEXT:          "offset": 10153,
+// JSON-NEXT:          "col": 12,
+// JSON-NEXT:          "tokLen": 8
+// JSON-NEXT:         },
+// JSON-NEXT:         "end": {
+// JSON-NEXT:          "offset": 10175,
+// JSON-NEXT:          "col": 34,
+// JSON-NEXT:          "tokLen": 2
+// JSON-NEXT:         }
+// JSON-NEXT:        },
+// JSON-NEXT:        "name": "TT",
+// JSON-NEXT:        "depth": 0,
+// JSON-NEXT:        "index": 0,
+// JSON-NEXT:        "inner": [
+// JSON-NEXT:         {
+// JSON-NEXT:          "id": "0x{{.*}}",
+// JSON-NEXT:          "kind": "TemplateTypeParmDecl",
+// JSON-NEXT:          "loc": {
+// JSON-NEXT:           "offset": 10167,
+// JSON-NEXT:           "col": 26,
+// JSON-NEXT:           "tokLen": 1
+// JSON-NEXT:          },
+// JSON-NEXT:          "range": {
+// JSON-NEXT:           "begin": {
+// JSON-NEXT:            "offset": 10162,
+// JSON-NEXT:            "col": 21,
+// JSON-NEXT:            "tokLen": 5
+// JSON-NEXT:           },
+// JSON-NEXT:           "end": {
+// JSON-NEXT:            "offset": 10162,
+// JSON-NEXT:            "col": 21,
+// JSON-NEXT:            "tokLen": 5
+// JSON-NEXT:           }
+// JSON-NEXT:          },
+// JSON-NEXT:          "tagUsed": "class",
+// JSON-NEXT:          "depth": 1,
+// JSON-NEXT:          "index": 0
+// JSON-NEXT:         }
+// JSON-NEXT:        ]
+// JSON-NEXT:       },
+// JSON-NEXT:       {
+// JSON-NEXT:        "id": "0x{{.*}}",
+// JSON-NEXT:        "kind": "TypeAliasDecl",
+// JSON-NEXT:        "loc": {
+// JSON-NEXT:         "offset": 10185,
+// JSON-NEXT:         "col": 44,
+// JSON-NEXT:         "tokLen": 2
+// JSON-NEXT:        },
+// JSON-NEXT:        "range": {
+// JSON-NEXT:         "begin": {
+// JSON-NEXT:          "offset": 10179,
+// JSON-NEXT:          "col": 38,
+// JSON-NEXT:          "tokLen": 5
+// JSON-NEXT:         },
+// JSON-NEXT:         "end": {
+// JSON-NEXT:          "offset": 10196,
+// JSON-NEXT:          "col": 55,
+// JSON-NEXT:          "tokLen": 1
+// JSON-NEXT:         }
+// JSON-NEXT:        },
+// JSON-NEXT:        "name": "T1",
+// JSON-NEXT:        "type": {
+// JSON-NEXT:         "qualType": "TT<int>"
+// JSON-NEXT:        },
+// JSON-NEXT:        "inner": [
+// JSON-NEXT:         {
+// JSON-NEXT:          "id": "0x{{.*}}",
+// JSON-NEXT:          "kind": "TemplateSpecializationType",
+// JSON-NEXT:          "type": {
+// JSON-NEXT:           "qualType": "TT<int>"
+// JSON-NEXT:          },
+// JSON-NEXT:          "isDependent": true,
+// JSON-NEXT:          "isInstantiationDependent": true,
+// JSON-NEXT:          "templateName": "TT",
+// JSON-NEXT:          "inner": [
+// JSON-NEXT:           {
+// JSON-NEXT:            "kind": "TemplateArgument",
+// JSON-NEXT:            "type": {
+// JSON-NEXT:             "qualType": "int"
+// JSON-NEXT:            },
+// JSON-NEXT:            "inner": [
+// JSON-NEXT:             {
+// JSON-NEXT:              "id": "0x{{.*}}",
+// JSON-NEXT:              "kind": "BuiltinType",
+// JSON-NEXT:              "type": {
+// JSON-NEXT:               "qualType": "int"
+// JSON-NEXT:              }
+// JSON-NEXT:             }
+// JSON-NEXT:            ]
+// JSON-NEXT:           }
+// JSON-NEXT:          ]
+// JSON-NEXT:         }
+// JSON-NEXT:        ]
+// JSON-NEXT:       }
+// JSON-NEXT:      ]
+// JSON-NEXT:     },
+// JSON-NEXT:     {
+// JSON-NEXT:      "id": "0x{{.*}}",
+// JSON-NEXT:      "kind": "TypeAliasTemplateDecl",
+// JSON-NEXT:      "loc": {
+// JSON-NEXT:       "offset": 10219,
+// JSON-NEXT:       "line": 247,
+// JSON-NEXT:       "col": 21,
+// JSON-NEXT:       "tokLen": 5
+// JSON-NEXT:      },
+// JSON-NEXT:      "range": {
+// JSON-NEXT:       "begin": {
+// JSON-NEXT:        "offset": 10201,
+// JSON-NEXT:        "col": 3,
+// JSON-NEXT:        "tokLen": 8
+// JSON-NEXT:       },
+// JSON-NEXT:       "end": {
+// JSON-NEXT:        "offset": 10246,
+// JSON-NEXT:        "col": 48,
+// JSON-NEXT:        "tokLen": 1
+// JSON-NEXT:       }
+// JSON-NEXT:      },
+// JSON-NEXT:      "name": "T2",
+// JSON-NEXT:      "inner": [
+// JSON-NEXT:       {
+// JSON-NEXT:        "id": "0x{{.*}}",
+// JSON-NEXT:        "kind": "TemplateTypeParmDecl",
+// JSON-NEXT:        "loc": {
+// JSON-NEXT:         "offset": 10216,
+// JSON-NEXT:         "col": 18,
+// JSON-NEXT:         "tokLen": 1
+// JSON-NEXT:        },
+// JSON-NEXT:        "range": {
+// JSON-NEXT:         "begin": {
+// JSON-NEXT:          "offset": 10210,
+// JSON-NEXT:          "col": 12,
+// JSON-NEXT:          "tokLen": 5
+// JSON-NEXT:         },
+// JSON-NEXT:         "end": {
+// JSON-NEXT:          "offset": 10216,
+// JSON-NEXT:          "col": 18,
+// JSON-NEXT:          "tokLen": 1
+// JSON-NEXT:         }
+// JSON-NEXT:        },
+// JSON-NEXT:        "name": "T",
+// JSON-NEXT:        "tagUsed": "class",
+// JSON-NEXT:        "depth": 0,
+// JSON-NEXT:        "index": 0
+// JSON-NEXT:       },
+// JSON-NEXT:       {
+// JSON-NEXT:        "id": "0x{{.*}}",
+// JSON-NEXT:        "kind": "TypeAliasDecl",
+// JSON-NEXT:        "loc": {
+// JSON-NEXT:         "offset": 10225,
+// JSON-NEXT:         "col": 27,
+// JSON-NEXT:         "tokLen": 2
+// JSON-NEXT:        },
+// JSON-NEXT:        "range": {
+// JSON-NEXT:         "begin": {
+// JSON-NEXT:          "offset": 10219,
+// JSON-NEXT:          "col": 21,
+// JSON-NEXT:          "tokLen": 5
+// JSON-NEXT:         },
+// JSON-NEXT:         "end": {
+// JSON-NEXT:          "offset": 10246,
+// JSON-NEXT:          "col": 48,
+// JSON-NEXT:          "tokLen": 1
+// JSON-NEXT:         }
+// JSON-NEXT:        },
+// JSON-NEXT:        "name": "T2",
+// JSON-NEXT:        "type": {
+// JSON-NEXT:         "desugaredQualType": "T::template X<int>",
+// JSON-NEXT:         "qualType": "T1<T::template X>"
+// JSON-NEXT:        },
+// JSON-NEXT:        "inner": [
+// JSON-NEXT:         {
+// JSON-NEXT:          "id": "0x{{.*}}",
+// JSON-NEXT:          "kind": "TemplateSpecializationType",
+// JSON-NEXT:          "type": {
+// JSON-NEXT:           "qualType": "T1<T::template X>"
+// JSON-NEXT:          },
+// JSON-NEXT:          "isDependent": true,
+// JSON-NEXT:          "isInstantiationDependent": true,
+// JSON-NEXT:          "isAlias": true,
+// JSON-NEXT:          "templateName": "T1",
+// JSON-NEXT:          "inner": [
+// JSON-NEXT:           {
+// JSON-NEXT:            "kind": "TemplateArgument"
+// JSON-NEXT:           },
+// JSON-NEXT:           {
+// JSON-NEXT:            "id": "0x{{.*}}",
+// JSON-NEXT:            "kind": "TemplateSpecializationType",
+// JSON-NEXT:            "type": {
+// JSON-NEXT:             "qualType": "T::template X<int>"
+// JSON-NEXT:            },
+// JSON-NEXT:            "isDependent": true,
+// JSON-NEXT:            "isInstantiationDependent": true,
+// JSON-NEXT:            "templateName": "T::template X",
+// JSON-NEXT:            "inner": [
+// JSON-NEXT:             {
+// JSON-NEXT:              "kind": "TemplateArgument",
+// JSON-NEXT:              "type": {
+// JSON-NEXT:               "qualType": "int"
+// JSON-NEXT:              },
+// JSON-NEXT:              "inner": [
+// JSON-NEXT:               {
+// JSON-NEXT:                "id": "0x{{.*}}",
+// JSON-NEXT:                "kind": "BuiltinType",
+// JSON-NEXT:                "type": {
+// JSON-NEXT:                 "qualType": "int"
+// JSON-NEXT:                }
+// JSON-NEXT:               }
+// JSON-NEXT:              ]
+// JSON-NEXT:             }
+// JSON-NEXT:            ]
+// JSON-NEXT:           }
+// JSON-NEXT:          ]
+// JSON-NEXT:         }
+// JSON-NEXT:        ]
+// JSON-NEXT:       }
+// JSON-NEXT:      ]
+// JSON-NEXT:     }
+// JSON-NEXT:    ]
 // JSON-NEXT:   }
 // JSON-NEXT:  ]
 // JSON-NEXT: }
diff --git a/clang/test/Analysis/Checkers/WebKit/unretained-call-args.mm b/clang/test/Analysis/Checkers/WebKit/unretained-call-args.mm
index f39822e..75eead0 100644
--- a/clang/test/Analysis/Checkers/WebKit/unretained-call-args.mm
+++ b/clang/test/Analysis/Checkers/WebKit/unretained-call-args.mm
@@ -438,6 +438,34 @@ void use_const_local() {
 
 } // namespace const_global
 
+namespace var_decl_ref_singleton {
+
+static Class initSomeObject() { return nil; }
+static Class (*getSomeObjectClassSingleton)() = initSomeObject;
+
+bool foo(NSString *obj) {
+  return [obj isKindOfClass:getSomeObjectClassSingleton()];
+}
+
+class Bar {
+public:
+  Class someObject();
+  static Class staticSomeObject();
+};
+typedef Class (Bar::*SomeObjectSingleton)();
+
+bool bar(NSObject *obj, Bar *bar, SomeObjectSingleton someObjSingleton) {
+  return [obj isKindOfClass:(bar->*someObjSingleton)()];
+  // expected-warning@-1{{Call argument for parameter 'aClass' is unretained and unsafe}}
+}
+
+bool baz(NSObject *obj) {
+  Class (*someObjectSingleton)() = Bar::staticSomeObject;
+  return [obj isKindOfClass:someObjectSingleton()];
+}
+
+} // namespace var_decl_ref_singleton
+
 namespace ns_retained_return_value {
 
 NSString *provideNS() NS_RETURNS_RETAINED;
diff --git a/clang/test/Analysis/ctu-import-type-decl-definition.c b/clang/test/Analysis/ctu-import-type-decl-definition.c
index f74920697..10910e0 100644
--- a/clang/test/Analysis/ctu-import-type-decl-definition.c
+++ b/clang/test/Analysis/ctu-import-type-decl-definition.c
@@ -2,26 +2,28 @@
 // RUN: mkdir -p %t
 // RUN: split-file %s %t
 
-// RUN: %clang_cc1 -emit-pch -o %t/import.c.ast %t/import.c
+// RUN: %clang_cc1 -x c -emit-pch -o %t/import.c.ast %t/import.c
 
-// RUN: %clang_extdef_map -- -x c %t/import.c >> %t/externalDefMap.tmp.txt
-// RUN: sed 's/$/.ast/' %t/externalDefMap.tmp.txt >> %t/externalDefMap.txt
+// RUN: %clang_extdef_map %t/import.c -- -c -x c > %t/externalDefMap.tmp.txt
+// RUN: sed 's/$/.ast/' %t/externalDefMap.tmp.txt > %t/externalDefMap.txt
 
 // RUN: %clang_cc1 -analyze \
 // RUN:   -analyzer-checker=core \
 // RUN:   -analyzer-config experimental-enable-naive-ctu-analysis=true \
 // RUN:   -analyzer-config display-ctu-progress=true \
 // RUN:   -analyzer-config ctu-dir=%t \
-// RUN:   -verify %t/main.c
+// RUN:   -verify %t/main.c 2>&1 | FileCheck %s
 
 //--- main.c
 
 // expected-no-diagnostics
+// CHECK: CTU loaded AST file:
 
 typedef struct X_s X_t;
-unsigned long f_import(struct X_s *xPtr);
 
-static void freeWriteFileResources(struct X_s *xPtr) {
+long f_import(struct X_s *xPtr);
+
+static void f_main(struct X_s *xPtr) {
   f_import(xPtr);
 }
 
@@ -36,7 +38,7 @@ struct X_s {
   Y_t y;
 };
 
-unsigned long f_import(struct X_s *xPtr) {
+long f_import(struct X_s *xPtr) {
   if (xPtr != 0) {
   }
   return 0;
diff --git a/clang/test/C/C11/n1285_1.c b/clang/test/C/C11/n1285_1.c
index 25b68e3..345ec94 100644
--- a/clang/test/C/C11/n1285_1.c
+++ b/clang/test/C/C11/n1285_1.c
@@ -1,4 +1,4 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
 // RUN: %clang_cc1 -triple=x86_64 -std=c99 -Wno-dangling -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK
 // RUN: %clang_cc1 -triple=x86_64 -std=c11 -Wno-dangling -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK
 // RUN: %clang_cc1 -triple=x86_64 -std=c11 -O2 -disable-llvm-passes -Wno-dangling -emit-llvm -o - %s | FileCheck %s --check-prefix=C11-O2
@@ -32,9 +32,9 @@ struct X f(void);
 // C11-O2-NEXT:    [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_X]], ptr [[REF_TMP]], i32 0, i32 0
 // C11-O2-NEXT:    [[ARRAYDECAY:%.*]] = getelementptr inbounds [5 x i32], ptr [[A]], i64 0, i64 0
 // C11-O2-NEXT:    call void @llvm.lifetime.end.p0(ptr [[REF_TMP]]) #[[ATTR5]]
-// C11-O2-NEXT:    store ptr [[ARRAYDECAY]], ptr [[P]], align 8, !tbaa [[TBAA2:![0-9]+]]
-// C11-O2-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[P]], align 8, !tbaa [[TBAA2]]
-// C11-O2-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4, !tbaa [[TBAA7:![0-9]+]]
+// C11-O2-NEXT:    store ptr [[ARRAYDECAY]], ptr [[P]], align 8, !tbaa [[INTPTR_TBAA2:![0-9]+]]
+// C11-O2-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[P]], align 8, !tbaa [[INTPTR_TBAA2]]
+// C11-O2-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4, !tbaa [[INT_TBAA7:![0-9]+]]
 // C11-O2-NEXT:    call void @llvm.lifetime.end.p0(ptr [[P]]) #[[ATTR5]]
 // C11-O2-NEXT:    ret i32 [[TMP1]]
 //
@@ -91,18 +91,18 @@ int func_return(void) {
 // C11-O2:       [[COND_END]]:
 // C11-O2-NEXT:    [[A1:%.*]] = getelementptr inbounds nuw [[STRUCT_X]], ptr [[REF_TMP]], i32 0, i32 0
 // C11-O2-NEXT:    [[ARRAYDECAY:%.*]] = getelementptr inbounds [5 x i32], ptr [[A1]], i64 0, i64 0
-// C11-O2-NEXT:    store ptr [[ARRAYDECAY]], ptr @p, align 8, !tbaa [[TBAA2]]
+// C11-O2-NEXT:    store ptr [[ARRAYDECAY]], ptr @p, align 8, !tbaa [[INTPTR_TBAA2]]
 // C11-O2-NEXT:    call void @llvm.lifetime.end.p0(ptr [[REF_TMP]]) #[[ATTR5]]
 // C11-O2-NEXT:    call void @llvm.lifetime.start.p0(ptr [[Q]]) #[[ATTR5]]
 // C11-O2-NEXT:    call void @llvm.memset.p0.i64(ptr align 4 [[DOTCOMPOUNDLITERAL]], i8 0, i64 20, i1 false)
 // C11-O2-NEXT:    [[A2:%.*]] = getelementptr inbounds nuw [[STRUCT_X]], ptr [[DOTCOMPOUNDLITERAL]], i32 0, i32 0
 // C11-O2-NEXT:    [[A3:%.*]] = getelementptr inbounds nuw [[STRUCT_X]], ptr [[DOTCOMPOUNDLITERAL]], i32 0, i32 0
 // C11-O2-NEXT:    [[ARRAYDECAY4:%.*]] = getelementptr inbounds [5 x i32], ptr [[A3]], i64 0, i64 0
-// C11-O2-NEXT:    store ptr [[ARRAYDECAY4]], ptr [[Q]], align 8, !tbaa [[TBAA2]]
-// C11-O2-NEXT:    [[TMP0:%.*]] = load ptr, ptr @p, align 8, !tbaa [[TBAA2]]
-// C11-O2-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4, !tbaa [[TBAA7]]
-// C11-O2-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[Q]], align 8, !tbaa [[TBAA2]]
-// C11-O2-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !tbaa [[TBAA7]]
+// C11-O2-NEXT:    store ptr [[ARRAYDECAY4]], ptr [[Q]], align 8, !tbaa [[INTPTR_TBAA2]]
+// C11-O2-NEXT:    [[TMP0:%.*]] = load ptr, ptr @p, align 8, !tbaa [[INTPTR_TBAA2]]
+// C11-O2-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4, !tbaa [[INT_TBAA7]]
+// C11-O2-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[Q]], align 8, !tbaa [[INTPTR_TBAA2]]
+// C11-O2-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !tbaa [[INT_TBAA7]]
 // C11-O2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP3]]
 // C11-O2-NEXT:    call void @llvm.lifetime.end.p0(ptr [[Q]]) #[[ATTR5]]
 // C11-O2-NEXT:    ret i32 [[ADD]]
@@ -138,10 +138,10 @@ int ternary(void) {
 // C11-O2-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[REF_TMP]], ptr align 4 [[X]], i64 20, i1 false), !tbaa.struct [[TBAA_STRUCT9:![0-9]+]]
 // C11-O2-NEXT:    [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_X]], ptr [[REF_TMP]], i32 0, i32 0
 // C11-O2-NEXT:    [[ARRAYDECAY:%.*]] = getelementptr inbounds [5 x i32], ptr [[A]], i64 0, i64 0
-// C11-O2-NEXT:    store ptr [[ARRAYDECAY]], ptr @p, align 8, !tbaa [[TBAA2]]
+// C11-O2-NEXT:    store ptr [[ARRAYDECAY]], ptr @p, align 8, !tbaa [[INTPTR_TBAA2]]
 // C11-O2-NEXT:    call void @llvm.lifetime.end.p0(ptr [[REF_TMP]]) #[[ATTR5]]
-// C11-O2-NEXT:    [[TMP0:%.*]] = load ptr, ptr @p, align 8, !tbaa [[TBAA2]]
-// C11-O2-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4, !tbaa [[TBAA7]]
+// C11-O2-NEXT:    [[TMP0:%.*]] = load ptr, ptr @p, align 8, !tbaa [[INTPTR_TBAA2]]
+// C11-O2-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4, !tbaa [[INT_TBAA7]]
 // C11-O2-NEXT:    call void @llvm.lifetime.end.p0(ptr [[X]]) #[[ATTR5]]
 // C11-O2-NEXT:    ret i32 [[TMP1]]
 //
@@ -175,10 +175,10 @@ int comma(void) {
 // C11-O2-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[REF_TMP]], ptr align 4 [[X]], i64 20, i1 false), !tbaa.struct [[TBAA_STRUCT9]]
 // C11-O2-NEXT:    [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_X]], ptr [[REF_TMP]], i32 0, i32 0
 // C11-O2-NEXT:    [[ARRAYDECAY:%.*]] = getelementptr inbounds [5 x i32], ptr [[A]], i64 0, i64 0
-// C11-O2-NEXT:    store ptr [[ARRAYDECAY]], ptr @p, align 8, !tbaa [[TBAA2]]
+// C11-O2-NEXT:    store ptr [[ARRAYDECAY]], ptr @p, align 8, !tbaa [[INTPTR_TBAA2]]
 // C11-O2-NEXT:    call void @llvm.lifetime.end.p0(ptr [[REF_TMP]]) #[[ATTR5]]
-// C11-O2-NEXT:    [[TMP0:%.*]] = load ptr, ptr @p, align 8, !tbaa [[TBAA2]]
-// C11-O2-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4, !tbaa [[TBAA7]]
+// C11-O2-NEXT:    [[TMP0:%.*]] = load ptr, ptr @p, align 8, !tbaa [[INTPTR_TBAA2]]
+// C11-O2-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4, !tbaa [[INT_TBAA7]]
 // C11-O2-NEXT:    call void @llvm.lifetime.end.p0(ptr [[X]]) #[[ATTR5]]
 // C11-O2-NEXT:    ret i32 [[TMP1]]
 //
@@ -217,10 +217,10 @@ int cast(void) {
 // C11-O2-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[REF_TMP]], ptr align 4 [[X]], i64 20, i1 false), !tbaa.struct [[TBAA_STRUCT9]]
 // C11-O2-NEXT:    [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_X]], ptr [[REF_TMP]], i32 0, i32 0
 // C11-O2-NEXT:    [[ARRAYDECAY:%.*]] = getelementptr inbounds [5 x i32], ptr [[A]], i64 0, i64 0
-// C11-O2-NEXT:    store ptr [[ARRAYDECAY]], ptr @p, align 8, !tbaa [[TBAA2]]
+// C11-O2-NEXT:    store ptr [[ARRAYDECAY]], ptr @p, align 8, !tbaa [[INTPTR_TBAA2]]
 // C11-O2-NEXT:    call void @llvm.lifetime.end.p0(ptr [[REF_TMP]]) #[[ATTR5]]
-// C11-O2-NEXT:    [[TMP0:%.*]] = load ptr, ptr @p, align 8, !tbaa [[TBAA2]]
-// C11-O2-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4, !tbaa [[TBAA7]]
+// C11-O2-NEXT:    [[TMP0:%.*]] = load ptr, ptr @p, align 8, !tbaa [[INTPTR_TBAA2]]
+// C11-O2-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4, !tbaa [[INT_TBAA7]]
 // C11-O2-NEXT:    call void @llvm.lifetime.end.p0(ptr [[S]]) #[[ATTR5]]
 // C11-O2-NEXT:    call void @llvm.lifetime.end.p0(ptr [[X]]) #[[ATTR5]]
 // C11-O2-NEXT:    ret i32 [[TMP1]]
@@ -232,12 +232,12 @@ int assign(void) {
   return *p;
 }
 //.
-// C11-O2: [[TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0}
+// C11-O2: [[INTPTR_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0}
 // C11-O2: [[META3]] = !{!"p1 int", [[META4:![0-9]+]], i64 0}
 // C11-O2: [[META4]] = !{!"any pointer", [[META5:![0-9]+]], i64 0}
 // C11-O2: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0}
 // C11-O2: [[META6]] = !{!"Simple C/C++ TBAA"}
-// C11-O2: [[TBAA7]] = !{[[META8:![0-9]+]], [[META8]], i64 0}
+// C11-O2: [[INT_TBAA7]] = !{[[META8:![0-9]+]], [[META8]], i64 0}
 // C11-O2: [[META8]] = !{!"int", [[META5]], i64 0}
 // C11-O2: [[TBAA_STRUCT9]] = !{i64 0, i64 20, [[META10:![0-9]+]]}
 // C11-O2: [[META10]] = !{[[META5]], [[META5]], i64 0}
diff --git a/clang/test/C/C2y/n3254.c b/clang/test/C/C2y/n3254.c
index e114735..9f8c477 100644
--- a/clang/test/C/C2y/n3254.c
+++ b/clang/test/C/C2y/n3254.c
@@ -1,4 +1,4 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
 // RUN: %clang_cc1 -triple=x86_64 -std=c2y %s -emit-llvm -o - | FileCheck %s
 
 /* WG14 N3254: Yes
diff --git a/clang/test/CodeGen/AArch64/fp8-init-list.c b/clang/test/CodeGen/AArch64/fp8-init-list.c
index 8b4b31a..7c0f6278 100644
--- a/clang/test/CodeGen/AArch64/fp8-init-list.c
+++ b/clang/test/CodeGen/AArch64/fp8-init-list.c
@@ -1,4 +1,4 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
 
 // RUN: %clang_cc1        -triple aarch64-none-linux-gnu -target-feature +neon -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
 // RUN: %clang_cc1 -x c++ -triple aarch64-none-linux-gnu -target-feature +neon -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s -check-prefix CHECK-CXX
@@ -34,25 +34,25 @@ struct S s;
 // CHECK-LABEL: define dso_local void @f(
 // CHECK-SAME: <1 x i8> [[X:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    store <1 x i8> [[X]], ptr @s, align 1, !tbaa [[TBAA2:![0-9]+]]
+// CHECK-NEXT:    store <1 x i8> [[X]], ptr @s, align 1, !tbaa [[__MFP8_TBAA2:![0-9]+]]
 // CHECK-NEXT:    ret void
 //
 // CHECK-CXX-LABEL: define dso_local void @_Z1fu6__mfp8(
 // CHECK-CXX-SAME: <1 x i8> [[X:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] {
 // CHECK-CXX-NEXT:  [[ENTRY:.*:]]
-// CHECK-CXX-NEXT:    store <1 x i8> [[X]], ptr @s, align 1, !tbaa [[TBAA2:![0-9]+]]
+// CHECK-CXX-NEXT:    store <1 x i8> [[X]], ptr @s, align 1, !tbaa [[__MFP8_TBAA2:![0-9]+]]
 // CHECK-CXX-NEXT:    ret void
 //
 void f(__mfp8 x) {
     s = (struct S){x};
 }
 //.
-// CHECK: [[TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0}
+// CHECK: [[__MFP8_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0}
 // CHECK: [[META3]] = !{!"__mfp8", [[META4:![0-9]+]], i64 0}
 // CHECK: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0}
 // CHECK: [[META5]] = !{!"Simple C/C++ TBAA"}
 //.
-// CHECK-CXX: [[TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0}
+// CHECK-CXX: [[__MFP8_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0}
 // CHECK-CXX: [[META3]] = !{!"__mfp8", [[META4:![0-9]+]], i64 0}
 // CHECK-CXX: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0}
 // CHECK-CXX: [[META5]] = !{!"Simple C++ TBAA"}
diff --git a/clang/test/CodeGen/AArch64/ls64-inline-asm.c b/clang/test/CodeGen/AArch64/ls64-inline-asm.c
index 8aa0684..1d217eb 100644
--- a/clang/test/CodeGen/AArch64/ls64-inline-asm.c
+++ b/clang/test/CodeGen/AArch64/ls64-inline-asm.c
@@ -1,12 +1,13 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
 // RUN: %clang_cc1 -triple aarch64 -target-feature +ls64 -O1 -emit-llvm -x c %s -o - | FileCheck %s
 
 struct foo { unsigned long long x[8]; };
 
-// CHECK-LABEL: @load(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call i512 asm sideeffect "ld64b $0,[$1]", "=r,r,~{memory}"(ptr [[ADDR:%.*]]) #[[ATTR1:[0-9]+]], !srcloc [[META2:![0-9]+]]
-// CHECK-NEXT:    store i512 [[TMP0]], ptr [[OUTPUT:%.*]], align 8
+// CHECK-LABEL: define dso_local void @load(
+// CHECK-SAME: ptr noundef writeonly captures(none) initializes((0, 64)) [[OUTPUT:%.*]], ptr noundef [[ADDR:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call i512 asm sideeffect "ld64b $0,[$1]", "=r,r,~{memory}"(ptr [[ADDR]]) #[[ATTR1:[0-9]+]], !srcloc [[META2:![0-9]+]]
+// CHECK-NEXT:    store i512 [[TMP0]], ptr [[OUTPUT]], align 8
 // CHECK-NEXT:    ret void
 //
 void load(struct foo *output, void *addr)
@@ -14,10 +15,11 @@ void load(struct foo *output, void *addr)
     __asm__ volatile ("ld64b %0,[%1]" : "=r" (*output) : "r" (addr) : "memory");
 }
 
-// CHECK-LABEL: @store(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i512, ptr [[INPUT:%.*]], align 8
-// CHECK-NEXT:    tail call void asm sideeffect "st64b $0,[$1]", "r,r,~{memory}"(i512 [[TMP0]], ptr [[ADDR:%.*]]) #[[ATTR1]], !srcloc [[META3:![0-9]+]]
+// CHECK-LABEL: define dso_local void @store(
+// CHECK-SAME: ptr noundef readonly captures(none) [[INPUT:%.*]], ptr noundef [[ADDR:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load i512, ptr [[INPUT]], align 8
+// CHECK-NEXT:    tail call void asm sideeffect "st64b $0,[$1]", "r,r,~{memory}"(i512 [[TMP0]], ptr [[ADDR]]) #[[ATTR1]], !srcloc [[META3:![0-9]+]]
 // CHECK-NEXT:    ret void
 //
 void store(const struct foo *input, void *addr)
@@ -25,30 +27,31 @@ void store(const struct foo *input, void *addr)
     __asm__ volatile ("st64b %0,[%1]" : : "r" (*input), "r" (addr) : "memory" );
 }
 
-// CHECK-LABEL: @store2(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[IN:%.*]], align 4, !tbaa [[TBAA4:![0-9]+]]
+// CHECK-LABEL: define dso_local void @store2(
+// CHECK-SAME: ptr noundef readonly captures(none) [[IN:%.*]], ptr noundef [[ADDR:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[IN]], align 4, !tbaa [[INT_TBAA4:![0-9]+]]
 // CHECK-NEXT:    [[CONV:%.*]] = sext i32 [[TMP0]] to i64
 // CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw i8, ptr [[IN]], i64 4
-// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4, !tbaa [[TBAA4]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4, !tbaa [[INT_TBAA4]]
 // CHECK-NEXT:    [[CONV2:%.*]] = sext i32 [[TMP1]] to i64
 // CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw i8, ptr [[IN]], i64 16
-// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4, !tbaa [[TBAA4]]
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4, !tbaa [[INT_TBAA4]]
 // CHECK-NEXT:    [[CONV5:%.*]] = sext i32 [[TMP2]] to i64
 // CHECK-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw i8, ptr [[IN]], i64 64
-// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX7]], align 4, !tbaa [[TBAA4]]
+// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX7]], align 4, !tbaa [[INT_TBAA4]]
 // CHECK-NEXT:    [[CONV8:%.*]] = sext i32 [[TMP3]] to i64
 // CHECK-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds nuw i8, ptr [[IN]], i64 100
-// CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX10]], align 4, !tbaa [[TBAA4]]
+// CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX10]], align 4, !tbaa [[INT_TBAA4]]
 // CHECK-NEXT:    [[CONV11:%.*]] = sext i32 [[TMP4]] to i64
 // CHECK-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds nuw i8, ptr [[IN]], i64 144
-// CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX13]], align 4, !tbaa [[TBAA4]]
+// CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX13]], align 4, !tbaa [[INT_TBAA4]]
 // CHECK-NEXT:    [[CONV14:%.*]] = sext i32 [[TMP5]] to i64
 // CHECK-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds nuw i8, ptr [[IN]], i64 196
-// CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX16]], align 4, !tbaa [[TBAA4]]
+// CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX16]], align 4, !tbaa [[INT_TBAA4]]
 // CHECK-NEXT:    [[CONV17:%.*]] = sext i32 [[TMP6]] to i64
 // CHECK-NEXT:    [[ARRAYIDX19:%.*]] = getelementptr inbounds nuw i8, ptr [[IN]], i64 256
-// CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX19]], align 4, !tbaa [[TBAA4]]
+// CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX19]], align 4, !tbaa [[INT_TBAA4]]
 // CHECK-NEXT:    [[CONV20:%.*]] = sext i32 [[TMP7]] to i64
 // CHECK-NEXT:    [[S_SROA_10_0_INSERT_EXT:%.*]] = zext i64 [[CONV20]] to i512
 // CHECK-NEXT:    [[S_SROA_10_0_INSERT_SHIFT:%.*]] = shl nuw i512 [[S_SROA_10_0_INSERT_EXT]], 448
@@ -72,7 +75,7 @@ void store(const struct foo *input, void *addr)
 // CHECK-NEXT:    [[S_SROA_0_0_INSERT_EXT:%.*]] = zext i64 [[CONV]] to i512
 // CHECK-NEXT:    [[S_SROA_0_0_INSERT_MASK:%.*]] = or disjoint i512 [[S_SROA_4_0_INSERT_MASK]], [[S_SROA_4_0_INSERT_SHIFT]]
 // CHECK-NEXT:    [[S_SROA_0_0_INSERT_INSERT:%.*]] = or i512 [[S_SROA_0_0_INSERT_MASK]], [[S_SROA_0_0_INSERT_EXT]]
-// CHECK-NEXT:    tail call void asm sideeffect "st64b $0,[$1]", "r,r,~{memory}"(i512 [[S_SROA_0_0_INSERT_INSERT]], ptr [[ADDR:%.*]]) #[[ATTR1]], !srcloc [[META8:![0-9]+]]
+// CHECK-NEXT:    tail call void asm sideeffect "st64b $0,[$1]", "r,r,~{memory}"(i512 [[S_SROA_0_0_INSERT_INSERT]], ptr [[ADDR]]) #[[ATTR1]], !srcloc [[META8:![0-9]+]]
 // CHECK-NEXT:    ret void
 //
 void store2(int *in, void *addr)
@@ -80,3 +83,12 @@ void store2(int *in, void *addr)
     struct foo s = { in[0], in[1], in[4], in[16], in[25], in[36], in[49], in[64] };
     __asm__ volatile ("st64b %0,[%1]" : : "r" (s), "r" (addr) : "memory" );
 }
+//.
+// CHECK: [[META2]] = !{i64 789}
+// CHECK: [[META3]] = !{i64 1368}
+// CHECK: [[INT_TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0}
+// CHECK: [[META5]] = !{!"int", [[META6:![0-9]+]], i64 0}
+// CHECK: [[META6]] = !{!"omnipotent char", [[META7:![0-9]+]], i64 0}
+// CHECK: [[META7]] = !{!"Simple C/C++ TBAA"}
+// CHECK: [[META8]] = !{i64 5992}
+//.
diff --git a/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_cnt.c b/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_cnt.c
index c0b3e1a..049c174 100644
--- a/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_cnt.c
+++ b/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_cnt.c
@@ -6,49 +6,55 @@
 
 #include <arm_sme.h>
 
-// CHECK-C-LABEL: define dso_local i64 @test_svcntsb(
+// CHECK-C-LABEL: define dso_local range(i64 0, -9223372036854775808) i64 @test_svcntsb(
 // CHECK-C-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] {
 // CHECK-C-NEXT:  entry:
-// CHECK-C-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
-// CHECK-C-NEXT:    ret i64 [[TMP0]]
+// CHECK-C-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsd()
+// CHECK-C-NEXT:    [[MULSVL:%.*]] = shl nuw nsw i64 [[TMP0]], 3
+// CHECK-C-NEXT:    ret i64 [[MULSVL]]
 //
-// CHECK-CXX-LABEL: define dso_local noundef i64 @_Z12test_svcntsbv(
+// CHECK-CXX-LABEL: define dso_local noundef range(i64 0, -9223372036854775808) i64 @_Z12test_svcntsbv(
 // CHECK-CXX-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] {
 // CHECK-CXX-NEXT:  entry:
-// CHECK-CXX-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
-// CHECK-CXX-NEXT:    ret i64 [[TMP0]]
+// CHECK-CXX-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsd()
+// CHECK-CXX-NEXT:    [[MULSVL:%.*]] = shl nuw nsw i64 [[TMP0]], 3
+// CHECK-CXX-NEXT:    ret i64 [[MULSVL]]
 //
 uint64_t test_svcntsb() {
   return svcntsb();
 }
 
-// CHECK-C-LABEL: define dso_local i64 @test_svcntsh(
+// CHECK-C-LABEL: define dso_local range(i64 0, -9223372036854775808) i64 @test_svcntsh(
 // CHECK-C-SAME: ) local_unnamed_addr #[[ATTR0]] {
 // CHECK-C-NEXT:  entry:
-// CHECK-C-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsh()
-// CHECK-C-NEXT:    ret i64 [[TMP0]]
+// CHECK-C-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsd()
+// CHECK-C-NEXT:    [[MULSVL:%.*]] = shl nuw nsw i64 [[TMP0]], 2
+// CHECK-C-NEXT:    ret i64 [[MULSVL]]
 //
-// CHECK-CXX-LABEL: define dso_local noundef i64 @_Z12test_svcntshv(
+// CHECK-CXX-LABEL: define dso_local noundef range(i64 0, -9223372036854775808) i64 @_Z12test_svcntshv(
 // CHECK-CXX-SAME: ) local_unnamed_addr #[[ATTR0]] {
 // CHECK-CXX-NEXT:  entry:
-// CHECK-CXX-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsh()
-// CHECK-CXX-NEXT:    ret i64 [[TMP0]]
+// CHECK-CXX-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsd()
+// CHECK-CXX-NEXT:    [[MULSVL:%.*]] = shl nuw nsw i64 [[TMP0]], 2
+// CHECK-CXX-NEXT:    ret i64 [[MULSVL]]
 //
 uint64_t test_svcntsh() {
   return svcntsh();
 }
 
-// CHECK-C-LABEL: define dso_local i64 @test_svcntsw(
+// CHECK-C-LABEL: define dso_local range(i64 0, -9223372036854775808) i64 @test_svcntsw(
 // CHECK-C-SAME: ) local_unnamed_addr #[[ATTR0]] {
 // CHECK-C-NEXT:  entry:
-// CHECK-C-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsw()
-// CHECK-C-NEXT:    ret i64 [[TMP0]]
+// CHECK-C-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsd()
+// CHECK-C-NEXT:    [[MULSVL:%.*]] = shl nuw nsw i64 [[TMP0]], 1
+// CHECK-C-NEXT:    ret i64 [[MULSVL]]
 //
-// CHECK-CXX-LABEL: define dso_local noundef i64 @_Z12test_svcntswv(
+// CHECK-CXX-LABEL: define dso_local noundef range(i64 0, -9223372036854775808) i64 @_Z12test_svcntswv(
 // CHECK-CXX-SAME: ) local_unnamed_addr #[[ATTR0]] {
 // CHECK-CXX-NEXT:  entry:
-// CHECK-CXX-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsw()
-// CHECK-CXX-NEXT:    ret i64 [[TMP0]]
+// CHECK-CXX-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsd()
+// CHECK-CXX-NEXT:    [[MULSVL:%.*]] = shl nuw nsw i64 [[TMP0]], 1
+// CHECK-CXX-NEXT:    ret i64 [[MULSVL]]
 //
 uint64_t test_svcntsw() {
   return svcntsw();
diff --git a/clang/test/CodeGen/LoongArch/lasx/builtin-alias.c b/clang/test/CodeGen/LoongArch/lasx/builtin-alias.c
index 9a8ce22..dd094e5 100644
--- a/clang/test/CodeGen/LoongArch/lasx/builtin-alias.c
+++ b/clang/test/CodeGen/LoongArch/lasx/builtin-alias.c
@@ -1,6386 +1,7130 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
 // RUN: %clang_cc1 -triple loongarch64 -target-feature +lasx -O2 -emit-llvm %s -o - | FileCheck %s
 
 #include <lasxintrin.h>
 
-// CHECK-LABEL: @xvsll_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2:![0-9]+]]
-// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsll_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2:![0-9]+]]
+// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsll.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvsll_b(v32i8 _1, v32i8 _2) { return __lasx_xvsll_b(_1, _2); }
-// CHECK-LABEL: @xvsll_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsll_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsll.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvsll_h(v16i16 _1, v16i16 _2) { return __lasx_xvsll_h(_1, _2); }
-// CHECK-LABEL: @xvsll_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsll_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsll.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvsll_w(v8i32 _1, v8i32 _2) { return __lasx_xvsll_w(_1, _2); }
-// CHECK-LABEL: @xvsll_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsll_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsll.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvsll_d(v4i64 _1, v4i64 _2) { return __lasx_xvsll_d(_1, _2); }
-// CHECK-LABEL: @xvslli_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvslli_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8> [[_1]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvslli_b(v32i8 _1) { return __lasx_xvslli_b(_1, 1); }
-// CHECK-LABEL: @xvslli_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvslli_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslli.h(<16 x i16> [[_1]], i32 1)
-// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvslli_h(v16i16 _1) { return __lasx_xvslli_h(_1, 1); }
-// CHECK-LABEL: @xvslli_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvslli_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslli.w(<8 x i32> [[_1]], i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvslli_w(v8i32 _1) { return __lasx_xvslli_w(_1, 1); }
-// CHECK-LABEL: @xvslli_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvslli_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslli.d(<4 x i64> [[_1]], i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvslli_d(v4i64 _1) { return __lasx_xvslli_d(_1, 1); }
-// CHECK-LABEL: @xvsra_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsra_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsra.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvsra_b(v32i8 _1, v32i8 _2) { return __lasx_xvsra_b(_1, _2); }
-// CHECK-LABEL: @xvsra_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsra_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsra.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvsra_h(v16i16 _1, v16i16 _2) { return __lasx_xvsra_h(_1, _2); }
-// CHECK-LABEL: @xvsra_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsra_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsra.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvsra_w(v8i32 _1, v8i32 _2) { return __lasx_xvsra_w(_1, _2); }
-// CHECK-LABEL: @xvsra_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsra_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsra.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvsra_d(v4i64 _1, v4i64 _2) { return __lasx_xvsra_d(_1, _2); }
-// CHECK-LABEL: @xvsrai_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrai_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8> [[_1]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvsrai_b(v32i8 _1) { return __lasx_xvsrai_b(_1, 1); }
-// CHECK-LABEL: @xvsrai_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrai_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrai.h(<16 x i16> [[_1]], i32 1)
-// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvsrai_h(v16i16 _1) { return __lasx_xvsrai_h(_1, 1); }
-// CHECK-LABEL: @xvsrai_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrai_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrai.w(<8 x i32> [[_1]], i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvsrai_w(v8i32 _1) { return __lasx_xvsrai_w(_1, 1); }
-// CHECK-LABEL: @xvsrai_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrai_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrai.d(<4 x i64> [[_1]], i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvsrai_d(v4i64 _1) { return __lasx_xvsrai_d(_1, 1); }
-// CHECK-LABEL: @xvsrar_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrar_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrar.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvsrar_b(v32i8 _1, v32i8 _2) { return __lasx_xvsrar_b(_1, _2); }
-// CHECK-LABEL: @xvsrar_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrar_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrar.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvsrar_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrar_h(_1, _2); }
-// CHECK-LABEL: @xvsrar_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrar_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrar.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvsrar_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrar_w(_1, _2); }
-// CHECK-LABEL: @xvsrar_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrar_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrar.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvsrar_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrar_d(_1, _2); }
-// CHECK-LABEL: @xvsrari_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrari_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8> [[_1]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvsrari_b(v32i8 _1) { return __lasx_xvsrari_b(_1, 1); }
-// CHECK-LABEL: @xvsrari_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrari_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrari.h(<16 x i16> [[_1]], i32 1)
-// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvsrari_h(v16i16 _1) { return __lasx_xvsrari_h(_1, 1); }
-// CHECK-LABEL: @xvsrari_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrari_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrari.w(<8 x i32> [[_1]], i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvsrari_w(v8i32 _1) { return __lasx_xvsrari_w(_1, 1); }
-// CHECK-LABEL: @xvsrari_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrari_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrari.d(<4 x i64> [[_1]], i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvsrari_d(v4i64 _1) { return __lasx_xvsrari_d(_1, 1); }
-// CHECK-LABEL: @xvsrl_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrl_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrl.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvsrl_b(v32i8 _1, v32i8 _2) { return __lasx_xvsrl_b(_1, _2); }
-// CHECK-LABEL: @xvsrl_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrl_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrl.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvsrl_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrl_h(_1, _2); }
-// CHECK-LABEL: @xvsrl_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrl_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrl.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvsrl_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrl_w(_1, _2); }
-// CHECK-LABEL: @xvsrl_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrl_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrl.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvsrl_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrl_d(_1, _2); }
-// CHECK-LABEL: @xvsrli_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrli_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrli.b(<32 x i8> [[_1]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvsrli_b(v32i8 _1) { return __lasx_xvsrli_b(_1, 1); }
-// CHECK-LABEL: @xvsrli_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrli_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrli.h(<16 x i16> [[_1]], i32 1)
-// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvsrli_h(v16i16 _1) { return __lasx_xvsrli_h(_1, 1); }
-// CHECK-LABEL: @xvsrli_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrli_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrli.w(<8 x i32> [[_1]], i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvsrli_w(v8i32 _1) { return __lasx_xvsrli_w(_1, 1); }
-// CHECK-LABEL: @xvsrli_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrli_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrli.d(<4 x i64> [[_1]], i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvsrli_d(v4i64 _1) { return __lasx_xvsrli_d(_1, 1); }
-// CHECK-LABEL: @xvsrlr_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrlr_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlr.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvsrlr_b(v32i8 _1, v32i8 _2) { return __lasx_xvsrlr_b(_1, _2); }
-// CHECK-LABEL: @xvsrlr_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrlr_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlr.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvsrlr_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrlr_h(_1, _2); }
-// CHECK-LABEL: @xvsrlr_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrlr_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlr.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvsrlr_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrlr_w(_1, _2); }
-// CHECK-LABEL: @xvsrlr_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrlr_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlr.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvsrlr_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrlr_d(_1, _2); }
-// CHECK-LABEL: @xvsrlri_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrlri_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(<32 x i8> [[_1]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvsrlri_b(v32i8 _1) { return __lasx_xvsrlri_b(_1, 1); }
-// CHECK-LABEL: @xvsrlri_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrlri_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlri.h(<16 x i16> [[_1]], i32 1)
-// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvsrlri_h(v16i16 _1) { return __lasx_xvsrlri_h(_1, 1); }
-// CHECK-LABEL: @xvsrlri_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrlri_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlri.w(<8 x i32> [[_1]], i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvsrlri_w(v8i32 _1) { return __lasx_xvsrlri_w(_1, 1); }
-// CHECK-LABEL: @xvsrlri_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrlri_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlri.d(<4 x i64> [[_1]], i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvsrlri_d(v4i64 _1) { return __lasx_xvsrlri_d(_1, 1); }
-// CHECK-LABEL: @xvbitclr_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvbitclr_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitclr.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32u8 xvbitclr_b(v32u8 _1, v32u8 _2) { return __lasx_xvbitclr_b(_1, _2); }
-// CHECK-LABEL: @xvbitclr_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvbitclr_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitclr.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16u16 xvbitclr_h(v16u16 _1, v16u16 _2) { return __lasx_xvbitclr_h(_1, _2); }
-// CHECK-LABEL: @xvbitclr_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvbitclr_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitclr.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8u32 xvbitclr_w(v8u32 _1, v8u32 _2) { return __lasx_xvbitclr_w(_1, _2); }
-// CHECK-LABEL: @xvbitclr_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvbitclr_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitclr.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4u64 xvbitclr_d(v4u64 _1, v4u64 _2) { return __lasx_xvbitclr_d(_1, _2); }
-// CHECK-LABEL: @xvbitclri_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvbitclri_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(<32 x i8> [[_1]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32u8 xvbitclri_b(v32u8 _1) { return __lasx_xvbitclri_b(_1, 1); }
-// CHECK-LABEL: @xvbitclri_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvbitclri_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitclri.h(<16 x i16> [[_1]], i32 1)
-// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16u16 xvbitclri_h(v16u16 _1) { return __lasx_xvbitclri_h(_1, 1); }
-// CHECK-LABEL: @xvbitclri_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvbitclri_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitclri.w(<8 x i32> [[_1]], i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8u32 xvbitclri_w(v8u32 _1) { return __lasx_xvbitclri_w(_1, 1); }
-// CHECK-LABEL: @xvbitclri_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvbitclri_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitclri.d(<4 x i64> [[_1]], i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4u64 xvbitclri_d(v4u64 _1) { return __lasx_xvbitclri_d(_1, 1); }
-// CHECK-LABEL: @xvbitset_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvbitset_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitset.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32u8 xvbitset_b(v32u8 _1, v32u8 _2) { return __lasx_xvbitset_b(_1, _2); }
-// CHECK-LABEL: @xvbitset_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvbitset_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitset.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16u16 xvbitset_h(v16u16 _1, v16u16 _2) { return __lasx_xvbitset_h(_1, _2); }
-// CHECK-LABEL: @xvbitset_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvbitset_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitset.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8u32 xvbitset_w(v8u32 _1, v8u32 _2) { return __lasx_xvbitset_w(_1, _2); }
-// CHECK-LABEL: @xvbitset_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvbitset_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitset.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4u64 xvbitset_d(v4u64 _1, v4u64 _2) { return __lasx_xvbitset_d(_1, _2); }
-// CHECK-LABEL: @xvbitseti_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvbitseti_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(<32 x i8> [[_1]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32u8 xvbitseti_b(v32u8 _1) { return __lasx_xvbitseti_b(_1, 1); }
-// CHECK-LABEL: @xvbitseti_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvbitseti_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitseti.h(<16 x i16> [[_1]], i32 1)
-// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16u16 xvbitseti_h(v16u16 _1) { return __lasx_xvbitseti_h(_1, 1); }
-// CHECK-LABEL: @xvbitseti_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvbitseti_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitseti.w(<8 x i32> [[_1]], i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8u32 xvbitseti_w(v8u32 _1) { return __lasx_xvbitseti_w(_1, 1); }
-// CHECK-LABEL: @xvbitseti_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvbitseti_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitseti.d(<4 x i64> [[_1]], i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4u64 xvbitseti_d(v4u64 _1) { return __lasx_xvbitseti_d(_1, 1); }
-// CHECK-LABEL: @xvbitrev_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvbitrev_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitrev.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32u8 xvbitrev_b(v32u8 _1, v32u8 _2) { return __lasx_xvbitrev_b(_1, _2); }
-// CHECK-LABEL: @xvbitrev_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvbitrev_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitrev.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16u16 xvbitrev_h(v16u16 _1, v16u16 _2) { return __lasx_xvbitrev_h(_1, _2); }
-// CHECK-LABEL: @xvbitrev_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvbitrev_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitrev.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8u32 xvbitrev_w(v8u32 _1, v8u32 _2) { return __lasx_xvbitrev_w(_1, _2); }
-// CHECK-LABEL: @xvbitrev_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvbitrev_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitrev.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4u64 xvbitrev_d(v4u64 _1, v4u64 _2) { return __lasx_xvbitrev_d(_1, _2); }
-// CHECK-LABEL: @xvbitrevi_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvbitrevi_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(<32 x i8> [[_1]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32u8 xvbitrevi_b(v32u8 _1) { return __lasx_xvbitrevi_b(_1, 1); }
-// CHECK-LABEL: @xvbitrevi_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvbitrevi_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h(<16 x i16> [[_1]], i32 1)
-// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16u16 xvbitrevi_h(v16u16 _1) { return __lasx_xvbitrevi_h(_1, 1); }
-// CHECK-LABEL: @xvbitrevi_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvbitrevi_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w(<8 x i32> [[_1]], i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8u32 xvbitrevi_w(v8u32 _1) { return __lasx_xvbitrevi_w(_1, 1); }
-// CHECK-LABEL: @xvbitrevi_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvbitrevi_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d(<4 x i64> [[_1]], i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4u64 xvbitrevi_d(v4u64 _1) { return __lasx_xvbitrevi_d(_1, 1); }
-// CHECK-LABEL: @xvadd_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvadd_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvadd.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvadd_b(v32i8 _1, v32i8 _2) { return __lasx_xvadd_b(_1, _2); }
-// CHECK-LABEL: @xvadd_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvadd_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvadd.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvadd_h(v16i16 _1, v16i16 _2) { return __lasx_xvadd_h(_1, _2); }
-// CHECK-LABEL: @xvadd_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvadd_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvadd.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvadd_w(v8i32 _1, v8i32 _2) { return __lasx_xvadd_w(_1, _2); }
-// CHECK-LABEL: @xvadd_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvadd_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadd.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvadd_d(v4i64 _1, v4i64 _2) { return __lasx_xvadd_d(_1, _2); }
-// CHECK-LABEL: @xvaddi_bu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvaddi_bu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(<32 x i8> [[_1]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvaddi_bu(v32i8 _1) { return __lasx_xvaddi_bu(_1, 1); }
-// CHECK-LABEL: @xvaddi_hu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvaddi_hu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddi.hu(<16 x i16> [[_1]], i32 1)
-// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvaddi_hu(v16i16 _1) { return __lasx_xvaddi_hu(_1, 1); }
-// CHECK-LABEL: @xvaddi_wu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvaddi_wu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddi.wu(<8 x i32> [[_1]], i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvaddi_wu(v8i32 _1) { return __lasx_xvaddi_wu(_1, 1); }
-// CHECK-LABEL: @xvaddi_du(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvaddi_du(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddi.du(<4 x i64> [[_1]], i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvaddi_du(v4i64 _1) { return __lasx_xvaddi_du(_1, 1); }
-// CHECK-LABEL: @xvsub_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsub_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsub.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvsub_b(v32i8 _1, v32i8 _2) { return __lasx_xvsub_b(_1, _2); }
-// CHECK-LABEL: @xvsub_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsub_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsub.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvsub_h(v16i16 _1, v16i16 _2) { return __lasx_xvsub_h(_1, _2); }
-// CHECK-LABEL: @xvsub_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsub_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsub.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvsub_w(v8i32 _1, v8i32 _2) { return __lasx_xvsub_w(_1, _2); }
-// CHECK-LABEL: @xvsub_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsub_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsub.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvsub_d(v4i64 _1, v4i64 _2) { return __lasx_xvsub_d(_1, _2); }
-// CHECK-LABEL: @xvsubi_bu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsubi_bu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(<32 x i8> [[_1]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvsubi_bu(v32i8 _1) { return __lasx_xvsubi_bu(_1, 1); }
-// CHECK-LABEL: @xvsubi_hu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsubi_hu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubi.hu(<16 x i16> [[_1]], i32 1)
-// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvsubi_hu(v16i16 _1) { return __lasx_xvsubi_hu(_1, 1); }
-// CHECK-LABEL: @xvsubi_wu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsubi_wu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubi.wu(<8 x i32> [[_1]], i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvsubi_wu(v8i32 _1) { return __lasx_xvsubi_wu(_1, 1); }
-// CHECK-LABEL: @xvsubi_du(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsubi_du(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubi.du(<4 x i64> [[_1]], i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvsubi_du(v4i64 _1) { return __lasx_xvsubi_du(_1, 1); }
-// CHECK-LABEL: @xvmax_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmax_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmax.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvmax_b(v32i8 _1, v32i8 _2) { return __lasx_xvmax_b(_1, _2); }
-// CHECK-LABEL: @xvmax_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmax_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmax.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvmax_h(v16i16 _1, v16i16 _2) { return __lasx_xvmax_h(_1, _2); }
-// CHECK-LABEL: @xvmax_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmax_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmax.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvmax_w(v8i32 _1, v8i32 _2) { return __lasx_xvmax_w(_1, _2); }
-// CHECK-LABEL: @xvmax_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmax_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmax.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvmax_d(v4i64 _1, v4i64 _2) { return __lasx_xvmax_d(_1, _2); }
-// CHECK-LABEL: @xvmaxi_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmaxi_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(<32 x i8> [[_1]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvmaxi_b(v32i8 _1) { return __lasx_xvmaxi_b(_1, 1); }
-// CHECK-LABEL: @xvmaxi_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmaxi_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaxi.h(<16 x i16> [[_1]], i32 1)
-// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvmaxi_h(v16i16 _1) { return __lasx_xvmaxi_h(_1, 1); }
-// CHECK-LABEL: @xvmaxi_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmaxi_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaxi.w(<8 x i32> [[_1]], i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvmaxi_w(v8i32 _1) { return __lasx_xvmaxi_w(_1, 1); }
-// CHECK-LABEL: @xvmaxi_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmaxi_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaxi.d(<4 x i64> [[_1]], i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvmaxi_d(v4i64 _1) { return __lasx_xvmaxi_d(_1, 1); }
-// CHECK-LABEL: @xvmax_bu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmax_bu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmax.bu(<32 x i8> [[_124]], <32 x i8> [[_235]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32u8 xvmax_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmax_bu(_1, _2); }
-// CHECK-LABEL: @xvmax_hu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmax_hu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmax.hu(<16 x i16> [[_124]], <16 x i16> [[_235]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16u16 xvmax_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmax_hu(_1, _2); }
-// CHECK-LABEL: @xvmax_wu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmax_wu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmax.wu(<8 x i32> [[_124]], <8 x i32> [[_235]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8u32 xvmax_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmax_wu(_1, _2); }
-// CHECK-LABEL: @xvmax_du(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmax_du(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmax.du(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4u64 xvmax_du(v4u64 _1, v4u64 _2) { return __lasx_xvmax_du(_1, _2); }
-// CHECK-LABEL: @xvmaxi_bu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmaxi_bu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu(<32 x i8> [[_1]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32u8 xvmaxi_bu(v32u8 _1) { return __lasx_xvmaxi_bu(_1, 1); }
-// CHECK-LABEL: @xvmaxi_hu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmaxi_hu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu(<16 x i16> [[_1]], i32 1)
-// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16u16 xvmaxi_hu(v16u16 _1) { return __lasx_xvmaxi_hu(_1, 1); }
-// CHECK-LABEL: @xvmaxi_wu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmaxi_wu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu(<8 x i32> [[_1]], i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8u32 xvmaxi_wu(v8u32 _1) { return __lasx_xvmaxi_wu(_1, 1); }
-// CHECK-LABEL: @xvmaxi_du(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmaxi_du(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaxi.du(<4 x i64> [[_1]], i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4u64 xvmaxi_du(v4u64 _1) { return __lasx_xvmaxi_du(_1, 1); }
-// CHECK-LABEL: @xvmin_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmin_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmin.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvmin_b(v32i8 _1, v32i8 _2) { return __lasx_xvmin_b(_1, _2); }
-// CHECK-LABEL: @xvmin_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmin_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmin.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvmin_h(v16i16 _1, v16i16 _2) { return __lasx_xvmin_h(_1, _2); }
-// CHECK-LABEL: @xvmin_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmin_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmin.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvmin_w(v8i32 _1, v8i32 _2) { return __lasx_xvmin_w(_1, _2); }
-// CHECK-LABEL: @xvmin_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmin_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmin.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvmin_d(v4i64 _1, v4i64 _2) { return __lasx_xvmin_d(_1, _2); }
-// CHECK-LABEL: @xvmini_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmini_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmini.b(<32 x i8> [[_1]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvmini_b(v32i8 _1) { return __lasx_xvmini_b(_1, 1); }
-// CHECK-LABEL: @xvmini_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmini_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmini.h(<16 x i16> [[_1]], i32 1)
-// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvmini_h(v16i16 _1) { return __lasx_xvmini_h(_1, 1); }
-// CHECK-LABEL: @xvmini_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmini_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmini.w(<8 x i32> [[_1]], i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvmini_w(v8i32 _1) { return __lasx_xvmini_w(_1, 1); }
-// CHECK-LABEL: @xvmini_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmini_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmini.d(<4 x i64> [[_1]], i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvmini_d(v4i64 _1) { return __lasx_xvmini_d(_1, 1); }
-// CHECK-LABEL: @xvmin_bu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmin_bu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmin.bu(<32 x i8> [[_124]], <32 x i8> [[_235]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32u8 xvmin_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmin_bu(_1, _2); }
-// CHECK-LABEL: @xvmin_hu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmin_hu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmin.hu(<16 x i16> [[_124]], <16 x i16> [[_235]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16u16 xvmin_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmin_hu(_1, _2); }
-// CHECK-LABEL: @xvmin_wu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmin_wu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmin.wu(<8 x i32> [[_124]], <8 x i32> [[_235]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8u32 xvmin_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmin_wu(_1, _2); }
-// CHECK-LABEL: @xvmin_du(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmin_du(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmin.du(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4u64 xvmin_du(v4u64 _1, v4u64 _2) { return __lasx_xvmin_du(_1, _2); }
-// CHECK-LABEL: @xvmini_bu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmini_bu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmini.bu(<32 x i8> [[_1]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32u8 xvmini_bu(v32u8 _1) { return __lasx_xvmini_bu(_1, 1); }
-// CHECK-LABEL: @xvmini_hu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmini_hu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmini.hu(<16 x i16> [[_1]], i32 1)
-// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16u16 xvmini_hu(v16u16 _1) { return __lasx_xvmini_hu(_1, 1); }
-// CHECK-LABEL: @xvmini_wu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmini_wu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmini.wu(<8 x i32> [[_1]], i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8u32 xvmini_wu(v8u32 _1) { return __lasx_xvmini_wu(_1, 1); }
-// CHECK-LABEL: @xvmini_du(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmini_du(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmini.du(<4 x i64> [[_1]], i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4u64 xvmini_du(v4u64 _1) { return __lasx_xvmini_du(_1, 1); }
-// CHECK-LABEL: @xvseq_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvseq_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvseq.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvseq_b(v32i8 _1, v32i8 _2) { return __lasx_xvseq_b(_1, _2); }
-// CHECK-LABEL: @xvseq_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvseq_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvseq.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvseq_h(v16i16 _1, v16i16 _2) { return __lasx_xvseq_h(_1, _2); }
-// CHECK-LABEL: @xvseq_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvseq_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvseq.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvseq_w(v8i32 _1, v8i32 _2) { return __lasx_xvseq_w(_1, _2); }
-// CHECK-LABEL: @xvseq_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvseq_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvseq.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvseq_d(v4i64 _1, v4i64 _2) { return __lasx_xvseq_d(_1, _2); }
-// CHECK-LABEL: @xvseqi_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvseqi_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvseqi.b(<32 x i8> [[_1]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvseqi_b(v32i8 _1) { return __lasx_xvseqi_b(_1, 1); }
-// CHECK-LABEL: @xvseqi_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvseqi_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvseqi.h(<16 x i16> [[_1]], i32 1)
-// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvseqi_h(v16i16 _1) { return __lasx_xvseqi_h(_1, 1); }
-// CHECK-LABEL: @xvseqi_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvseqi_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvseqi.w(<8 x i32> [[_1]], i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvseqi_w(v8i32 _1) { return __lasx_xvseqi_w(_1, 1); }
-// CHECK-LABEL: @xvseqi_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvseqi_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvseqi.d(<4 x i64> [[_1]], i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvseqi_d(v4i64 _1) { return __lasx_xvseqi_d(_1, 1); }
-// CHECK-LABEL: @xvslt_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvslt_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslt.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvslt_b(v32i8 _1, v32i8 _2) { return __lasx_xvslt_b(_1, _2); }
-// CHECK-LABEL: @xvslt_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvslt_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslt.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvslt_h(v16i16 _1, v16i16 _2) { return __lasx_xvslt_h(_1, _2); }
-// CHECK-LABEL: @xvslt_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvslt_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslt.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvslt_w(v8i32 _1, v8i32 _2) { return __lasx_xvslt_w(_1, _2); }
-// CHECK-LABEL: @xvslt_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvslt_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslt.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvslt_d(v4i64 _1, v4i64 _2) { return __lasx_xvslt_d(_1, _2); }
-// CHECK-LABEL: @xvslti_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvslti_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslti.b(<32 x i8> [[_1]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvslti_b(v32i8 _1) { return __lasx_xvslti_b(_1, 1); }
-// CHECK-LABEL: @xvslti_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvslti_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslti.h(<16 x i16> [[_1]], i32 1)
-// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvslti_h(v16i16 _1) { return __lasx_xvslti_h(_1, 1); }
-// CHECK-LABEL: @xvslti_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvslti_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslti.w(<8 x i32> [[_1]], i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvslti_w(v8i32 _1) { return __lasx_xvslti_w(_1, 1); }
-// CHECK-LABEL: @xvslti_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvslti_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslti.d(<4 x i64> [[_1]], i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvslti_d(v4i64 _1) { return __lasx_xvslti_d(_1, 1); }
-// CHECK-LABEL: @xvslt_bu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvslt_bu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslt.bu(<32 x i8> [[_124]], <32 x i8> [[_235]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvslt_bu(v32u8 _1, v32u8 _2) { return __lasx_xvslt_bu(_1, _2); }
-// CHECK-LABEL: @xvslt_hu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvslt_hu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslt.hu(<16 x i16> [[_124]], <16 x i16> [[_235]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvslt_hu(v16u16 _1, v16u16 _2) { return __lasx_xvslt_hu(_1, _2); }
-// CHECK-LABEL: @xvslt_wu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvslt_wu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslt.wu(<8 x i32> [[_124]], <8 x i32> [[_235]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvslt_wu(v8u32 _1, v8u32 _2) { return __lasx_xvslt_wu(_1, _2); }
-// CHECK-LABEL: @xvslt_du(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvslt_du(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslt.du(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvslt_du(v4u64 _1, v4u64 _2) { return __lasx_xvslt_du(_1, _2); }
-// CHECK-LABEL: @xvslti_bu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvslti_bu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslti.bu(<32 x i8> [[_1]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvslti_bu(v32u8 _1) { return __lasx_xvslti_bu(_1, 1); }
-// CHECK-LABEL: @xvslti_hu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvslti_hu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslti.hu(<16 x i16> [[_1]], i32 1)
-// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvslti_hu(v16u16 _1) { return __lasx_xvslti_hu(_1, 1); }
-// CHECK-LABEL: @xvslti_wu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvslti_wu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslti.wu(<8 x i32> [[_1]], i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvslti_wu(v8u32 _1) { return __lasx_xvslti_wu(_1, 1); }
-// CHECK-LABEL: @xvslti_du(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvslti_du(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslti.du(<4 x i64> [[_1]], i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvslti_du(v4u64 _1) { return __lasx_xvslti_du(_1, 1); }
-// CHECK-LABEL: @xvsle_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsle_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsle.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvsle_b(v32i8 _1, v32i8 _2) { return __lasx_xvsle_b(_1, _2); }
-// CHECK-LABEL: @xvsle_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsle_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsle.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvsle_h(v16i16 _1, v16i16 _2) { return __lasx_xvsle_h(_1, _2); }
-// CHECK-LABEL: @xvsle_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsle_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsle.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvsle_w(v8i32 _1, v8i32 _2) { return __lasx_xvsle_w(_1, _2); }
-// CHECK-LABEL: @xvsle_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsle_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsle.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvsle_d(v4i64 _1, v4i64 _2) { return __lasx_xvsle_d(_1, _2); }
-// CHECK-LABEL: @xvslei_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvslei_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslei.b(<32 x i8> [[_1]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvslei_b(v32i8 _1) { return __lasx_xvslei_b(_1, 1); }
-// CHECK-LABEL: @xvslei_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvslei_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslei.h(<16 x i16> [[_1]], i32 1)
-// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvslei_h(v16i16 _1) { return __lasx_xvslei_h(_1, 1); }
-// CHECK-LABEL: @xvslei_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvslei_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslei.w(<8 x i32> [[_1]], i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvslei_w(v8i32 _1) { return __lasx_xvslei_w(_1, 1); }
-// CHECK-LABEL: @xvslei_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvslei_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslei.d(<4 x i64> [[_1]], i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvslei_d(v4i64 _1) { return __lasx_xvslei_d(_1, 1); }
-// CHECK-LABEL: @xvsle_bu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsle_bu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsle.bu(<32 x i8> [[_124]], <32 x i8> [[_235]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvsle_bu(v32u8 _1, v32u8 _2) { return __lasx_xvsle_bu(_1, _2); }
-// CHECK-LABEL: @xvsle_hu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsle_hu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsle.hu(<16 x i16> [[_124]], <16 x i16> [[_235]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvsle_hu(v16u16 _1, v16u16 _2) { return __lasx_xvsle_hu(_1, _2); }
-// CHECK-LABEL: @xvsle_wu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsle_wu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsle.wu(<8 x i32> [[_124]], <8 x i32> [[_235]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvsle_wu(v8u32 _1, v8u32 _2) { return __lasx_xvsle_wu(_1, _2); }
-// CHECK-LABEL: @xvsle_du(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsle_du(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsle.du(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvsle_du(v4u64 _1, v4u64 _2) { return __lasx_xvsle_du(_1, _2); }
-// CHECK-LABEL: @xvslei_bu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvslei_bu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslei.bu(<32 x i8> [[_1]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvslei_bu(v32u8 _1) { return __lasx_xvslei_bu(_1, 1); }
-// CHECK-LABEL: @xvslei_hu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvslei_hu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslei.hu(<16 x i16> [[_1]], i32 1)
-// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvslei_hu(v16u16 _1) { return __lasx_xvslei_hu(_1, 1); }
-// CHECK-LABEL: @xvslei_wu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvslei_wu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslei.wu(<8 x i32> [[_1]], i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvslei_wu(v8u32 _1) { return __lasx_xvslei_wu(_1, 1); }
-// CHECK-LABEL: @xvslei_du(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvslei_du(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslei.du(<4 x i64> [[_1]], i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvslei_du(v4u64 _1) { return __lasx_xvslei_du(_1, 1); }
-// CHECK-LABEL: @xvsat_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsat_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsat.b(<32 x i8> [[_1]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvsat_b(v32i8 _1) { return __lasx_xvsat_b(_1, 1); }
-// CHECK-LABEL: @xvsat_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsat_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsat.h(<16 x i16> [[_1]], i32 1)
-// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvsat_h(v16i16 _1) { return __lasx_xvsat_h(_1, 1); }
-// CHECK-LABEL: @xvsat_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsat_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsat.w(<8 x i32> [[_1]], i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvsat_w(v8i32 _1) { return __lasx_xvsat_w(_1, 1); }
-// CHECK-LABEL: @xvsat_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsat_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsat.d(<4 x i64> [[_1]], i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvsat_d(v4i64 _1) { return __lasx_xvsat_d(_1, 1); }
-// CHECK-LABEL: @xvsat_bu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsat_bu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsat.bu(<32 x i8> [[_1]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32u8 xvsat_bu(v32u8 _1) { return __lasx_xvsat_bu(_1, 1); }
-// CHECK-LABEL: @xvsat_hu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsat_hu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsat.hu(<16 x i16> [[_1]], i32 1)
-// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16u16 xvsat_hu(v16u16 _1) { return __lasx_xvsat_hu(_1, 1); }
-// CHECK-LABEL: @xvsat_wu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsat_wu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsat.wu(<8 x i32> [[_1]], i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8u32 xvsat_wu(v8u32 _1) { return __lasx_xvsat_wu(_1, 1); }
-// CHECK-LABEL: @xvsat_du(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsat_du(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsat.du(<4 x i64> [[_1]], i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4u64 xvsat_du(v4u64 _1) { return __lasx_xvsat_du(_1, 1); }
-// CHECK-LABEL: @xvadda_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvadda_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvadda.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvadda_b(v32i8 _1, v32i8 _2) { return __lasx_xvadda_b(_1, _2); }
-// CHECK-LABEL: @xvadda_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvadda_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvadda.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvadda_h(v16i16 _1, v16i16 _2) { return __lasx_xvadda_h(_1, _2); }
-// CHECK-LABEL: @xvadda_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvadda_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvadda.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvadda_w(v8i32 _1, v8i32 _2) { return __lasx_xvadda_w(_1, _2); }
-// CHECK-LABEL: @xvadda_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvadda_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadda.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvadda_d(v4i64 _1, v4i64 _2) { return __lasx_xvadda_d(_1, _2); }
-// CHECK-LABEL: @xvsadd_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsadd_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsadd.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvsadd_b(v32i8 _1, v32i8 _2) { return __lasx_xvsadd_b(_1, _2); }
-// CHECK-LABEL: @xvsadd_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsadd_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsadd.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvsadd_h(v16i16 _1, v16i16 _2) { return __lasx_xvsadd_h(_1, _2); }
-// CHECK-LABEL: @xvsadd_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsadd_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsadd.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvsadd_w(v8i32 _1, v8i32 _2) { return __lasx_xvsadd_w(_1, _2); }
-// CHECK-LABEL: @xvsadd_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsadd_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsadd.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvsadd_d(v4i64 _1, v4i64 _2) { return __lasx_xvsadd_d(_1, _2); }
-// CHECK-LABEL: @xvsadd_bu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsadd_bu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsadd.bu(<32 x i8> [[_124]], <32 x i8> [[_235]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32u8 xvsadd_bu(v32u8 _1, v32u8 _2) { return __lasx_xvsadd_bu(_1, _2); }
-// CHECK-LABEL: @xvsadd_hu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsadd_hu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsadd.hu(<16 x i16> [[_124]], <16 x i16> [[_235]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16u16 xvsadd_hu(v16u16 _1, v16u16 _2) { return __lasx_xvsadd_hu(_1, _2); }
-// CHECK-LABEL: @xvsadd_wu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsadd_wu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsadd.wu(<8 x i32> [[_124]], <8 x i32> [[_235]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8u32 xvsadd_wu(v8u32 _1, v8u32 _2) { return __lasx_xvsadd_wu(_1, _2); }
-// CHECK-LABEL: @xvsadd_du(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsadd_du(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsadd.du(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4u64 xvsadd_du(v4u64 _1, v4u64 _2) { return __lasx_xvsadd_du(_1, _2); }
-// CHECK-LABEL: @xvavg_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvavg_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavg.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvavg_b(v32i8 _1, v32i8 _2) { return __lasx_xvavg_b(_1, _2); }
-// CHECK-LABEL: @xvavg_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvavg_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavg.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvavg_h(v16i16 _1, v16i16 _2) { return __lasx_xvavg_h(_1, _2); }
-// CHECK-LABEL: @xvavg_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvavg_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavg.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvavg_w(v8i32 _1, v8i32 _2) { return __lasx_xvavg_w(_1, _2); }
-// CHECK-LABEL: @xvavg_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvavg_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavg.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvavg_d(v4i64 _1, v4i64 _2) { return __lasx_xvavg_d(_1, _2); }
-// CHECK-LABEL: @xvavg_bu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvavg_bu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavg.bu(<32 x i8> [[_124]], <32 x i8> [[_235]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32u8 xvavg_bu(v32u8 _1, v32u8 _2) { return __lasx_xvavg_bu(_1, _2); }
-// CHECK-LABEL: @xvavg_hu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvavg_hu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavg.hu(<16 x i16> [[_124]], <16 x i16> [[_235]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16u16 xvavg_hu(v16u16 _1, v16u16 _2) { return __lasx_xvavg_hu(_1, _2); }
-// CHECK-LABEL: @xvavg_wu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvavg_wu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavg.wu(<8 x i32> [[_124]], <8 x i32> [[_235]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8u32 xvavg_wu(v8u32 _1, v8u32 _2) { return __lasx_xvavg_wu(_1, _2); }
-// CHECK-LABEL: @xvavg_du(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvavg_du(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavg.du(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4u64 xvavg_du(v4u64 _1, v4u64 _2) { return __lasx_xvavg_du(_1, _2); }
-// CHECK-LABEL: @xvavgr_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvavgr_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavgr.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvavgr_b(v32i8 _1, v32i8 _2) { return __lasx_xvavgr_b(_1, _2); }
-// CHECK-LABEL: @xvavgr_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvavgr_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavgr.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvavgr_h(v16i16 _1, v16i16 _2) { return __lasx_xvavgr_h(_1, _2); }
-// CHECK-LABEL: @xvavgr_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvavgr_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavgr.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvavgr_w(v8i32 _1, v8i32 _2) { return __lasx_xvavgr_w(_1, _2); }
-// CHECK-LABEL: @xvavgr_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvavgr_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavgr.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvavgr_d(v4i64 _1, v4i64 _2) { return __lasx_xvavgr_d(_1, _2); }
-// CHECK-LABEL: @xvavgr_bu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvavgr_bu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavgr.bu(<32 x i8> [[_124]], <32 x i8> [[_235]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32u8 xvavgr_bu(v32u8 _1, v32u8 _2) { return __lasx_xvavgr_bu(_1, _2); }
-// CHECK-LABEL: @xvavgr_hu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvavgr_hu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavgr.hu(<16 x i16> [[_124]], <16 x i16> [[_235]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16u16 xvavgr_hu(v16u16 _1, v16u16 _2) { return __lasx_xvavgr_hu(_1, _2); }
-// CHECK-LABEL: @xvavgr_wu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvavgr_wu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavgr.wu(<8 x i32> [[_124]], <8 x i32> [[_235]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8u32 xvavgr_wu(v8u32 _1, v8u32 _2) { return __lasx_xvavgr_wu(_1, _2); }
-// CHECK-LABEL: @xvavgr_du(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvavgr_du(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavgr.du(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4u64 xvavgr_du(v4u64 _1, v4u64 _2) { return __lasx_xvavgr_du(_1, _2); }
-// CHECK-LABEL: @xvssub_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssub_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssub.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvssub_b(v32i8 _1, v32i8 _2) { return __lasx_xvssub_b(_1, _2); }
-// CHECK-LABEL: @xvssub_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssub_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssub.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvssub_h(v16i16 _1, v16i16 _2) { return __lasx_xvssub_h(_1, _2); }
-// CHECK-LABEL: @xvssub_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssub_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssub.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvssub_w(v8i32 _1, v8i32 _2) { return __lasx_xvssub_w(_1, _2); }
-// CHECK-LABEL: @xvssub_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssub_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssub.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvssub_d(v4i64 _1, v4i64 _2) { return __lasx_xvssub_d(_1, _2); }
-// CHECK-LABEL: @xvssub_bu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssub_bu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssub.bu(<32 x i8> [[_124]], <32 x i8> [[_235]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32u8 xvssub_bu(v32u8 _1, v32u8 _2) { return __lasx_xvssub_bu(_1, _2); }
-// CHECK-LABEL: @xvssub_hu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssub_hu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssub.hu(<16 x i16> [[_124]], <16 x i16> [[_235]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16u16 xvssub_hu(v16u16 _1, v16u16 _2) { return __lasx_xvssub_hu(_1, _2); }
-// CHECK-LABEL: @xvssub_wu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssub_wu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssub.wu(<8 x i32> [[_124]], <8 x i32> [[_235]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8u32 xvssub_wu(v8u32 _1, v8u32 _2) { return __lasx_xvssub_wu(_1, _2); }
-// CHECK-LABEL: @xvssub_du(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssub_du(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssub.du(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4u64 xvssub_du(v4u64 _1, v4u64 _2) { return __lasx_xvssub_du(_1, _2); }
-// CHECK-LABEL: @xvabsd_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvabsd_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvabsd.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvabsd_b(v32i8 _1, v32i8 _2) { return __lasx_xvabsd_b(_1, _2); }
-// CHECK-LABEL: @xvabsd_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvabsd_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvabsd.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvabsd_h(v16i16 _1, v16i16 _2) { return __lasx_xvabsd_h(_1, _2); }
-// CHECK-LABEL: @xvabsd_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvabsd_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvabsd.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvabsd_w(v8i32 _1, v8i32 _2) { return __lasx_xvabsd_w(_1, _2); }
-// CHECK-LABEL: @xvabsd_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvabsd_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvabsd.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvabsd_d(v4i64 _1, v4i64 _2) { return __lasx_xvabsd_d(_1, _2); }
-// CHECK-LABEL: @xvabsd_bu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvabsd_bu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvabsd.bu(<32 x i8> [[_124]], <32 x i8> [[_235]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32u8 xvabsd_bu(v32u8 _1, v32u8 _2) { return __lasx_xvabsd_bu(_1, _2); }
-// CHECK-LABEL: @xvabsd_hu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvabsd_hu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvabsd.hu(<16 x i16> [[_124]], <16 x i16> [[_235]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16u16 xvabsd_hu(v16u16 _1, v16u16 _2) { return __lasx_xvabsd_hu(_1, _2); }
-// CHECK-LABEL: @xvabsd_wu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvabsd_wu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvabsd.wu(<8 x i32> [[_124]], <8 x i32> [[_235]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8u32 xvabsd_wu(v8u32 _1, v8u32 _2) { return __lasx_xvabsd_wu(_1, _2); }
-// CHECK-LABEL: @xvabsd_du(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvabsd_du(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvabsd.du(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4u64 xvabsd_du(v4u64 _1, v4u64 _2) { return __lasx_xvabsd_du(_1, _2); }
-// CHECK-LABEL: @xvmul_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmul_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmul.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvmul_b(v32i8 _1, v32i8 _2) { return __lasx_xvmul_b(_1, _2); }
-// CHECK-LABEL: @xvmul_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmul_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmul.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvmul_h(v16i16 _1, v16i16 _2) { return __lasx_xvmul_h(_1, _2); }
-// CHECK-LABEL: @xvmul_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmul_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmul.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvmul_w(v8i32 _1, v8i32 _2) { return __lasx_xvmul_w(_1, _2); }
-// CHECK-LABEL: @xvmul_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmul_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmul.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvmul_d(v4i64 _1, v4i64 _2) { return __lasx_xvmul_d(_1, _2); }
-// CHECK-LABEL: @xvmadd_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_136:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmadd_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_136:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_247:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_358:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmadd.b(<32 x i8> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]])
-// CHECK-NEXT:    store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvmadd_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __lasx_xvmadd_b(_1, _2, _3); }
-// CHECK-LABEL: @xvmadd_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmadd_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_136:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_247:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_358:%.*]] = load <16 x i16>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmadd.h(<16 x i16> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]])
-// CHECK-NEXT:    store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvmadd_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __lasx_xvmadd_h(_1, _2, _3); }
-// CHECK-LABEL: @xvmadd_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_247:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_358:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmadd_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_136:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_247:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_358:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmadd.w(<8 x i32> [[_136]], <8 x i32> [[_247]], <8 x i32> [[_358]])
-// CHECK-NEXT:    store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvmadd_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __lasx_xvmadd_w(_1, _2, _3); }
-// CHECK-LABEL: @xvmadd_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmadd_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_3:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmadd.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]])
-// CHECK-NEXT:    store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvmadd_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __lasx_xvmadd_d(_1, _2, _3); }
-// CHECK-LABEL: @xvmsub_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_136:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmsub_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_136:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_247:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_358:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmsub.b(<32 x i8> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]])
-// CHECK-NEXT:    store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvmsub_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __lasx_xvmsub_b(_1, _2, _3); }
-// CHECK-LABEL: @xvmsub_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmsub_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_136:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_247:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_358:%.*]] = load <16 x i16>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmsub.h(<16 x i16> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]])
-// CHECK-NEXT:    store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvmsub_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __lasx_xvmsub_h(_1, _2, _3); }
-// CHECK-LABEL: @xvmsub_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_247:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_358:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmsub_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_136:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_247:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_358:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmsub.w(<8 x i32> [[_136]], <8 x i32> [[_247]], <8 x i32> [[_358]])
-// CHECK-NEXT:    store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvmsub_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __lasx_xvmsub_w(_1, _2, _3); }
-// CHECK-LABEL: @xvmsub_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmsub_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_3:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmsub.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]])
-// CHECK-NEXT:    store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvmsub_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __lasx_xvmsub_d(_1, _2, _3); }
-// CHECK-LABEL: @xvdiv_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvdiv_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvdiv.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvdiv_b(v32i8 _1, v32i8 _2) { return __lasx_xvdiv_b(_1, _2); }
-// CHECK-LABEL: @xvdiv_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvdiv_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvdiv.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvdiv_h(v16i16 _1, v16i16 _2) { return __lasx_xvdiv_h(_1, _2); }
-// CHECK-LABEL: @xvdiv_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvdiv_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvdiv.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvdiv_w(v8i32 _1, v8i32 _2) { return __lasx_xvdiv_w(_1, _2); }
-// CHECK-LABEL: @xvdiv_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvdiv_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvdiv.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvdiv_d(v4i64 _1, v4i64 _2) { return __lasx_xvdiv_d(_1, _2); }
-// CHECK-LABEL: @xvdiv_bu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvdiv_bu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvdiv.bu(<32 x i8> [[_124]], <32 x i8> [[_235]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32u8 xvdiv_bu(v32u8 _1, v32u8 _2) { return __lasx_xvdiv_bu(_1, _2); }
-// CHECK-LABEL: @xvdiv_hu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvdiv_hu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvdiv.hu(<16 x i16> [[_124]], <16 x i16> [[_235]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16u16 xvdiv_hu(v16u16 _1, v16u16 _2) { return __lasx_xvdiv_hu(_1, _2); }
-// CHECK-LABEL: @xvdiv_wu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvdiv_wu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvdiv.wu(<8 x i32> [[_124]], <8 x i32> [[_235]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8u32 xvdiv_wu(v8u32 _1, v8u32 _2) { return __lasx_xvdiv_wu(_1, _2); }
-// CHECK-LABEL: @xvdiv_du(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvdiv_du(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvdiv.du(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4u64 xvdiv_du(v4u64 _1, v4u64 _2) { return __lasx_xvdiv_du(_1, _2); }
-// CHECK-LABEL: @xvhaddw_h_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvhaddw_h_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhaddw.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvhaddw_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvhaddw_h_b(_1, _2); }
-// CHECK-LABEL: @xvhaddw_w_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvhaddw_w_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhaddw.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvhaddw_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvhaddw_w_h(_1, _2); }
-// CHECK-LABEL: @xvhaddw_d_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvhaddw_d_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvhaddw_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvhaddw_d_w(_1, _2); }
-// CHECK-LABEL: @xvhaddw_hu_bu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvhaddw_hu_bu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhaddw.hu.bu(<32 x i8> [[_124]], <32 x i8> [[_235]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16u16 xvhaddw_hu_bu(v32u8 _1, v32u8 _2) { return __lasx_xvhaddw_hu_bu(_1, _2); }
-// CHECK-LABEL: @xvhaddw_wu_hu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvhaddw_wu_hu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhaddw.wu.hu(<16 x i16> [[_124]], <16 x i16> [[_235]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8u32 xvhaddw_wu_hu(v16u16 _1, v16u16 _2) { return __lasx_xvhaddw_wu_hu(_1, _2); }
-// CHECK-LABEL: @xvhaddw_du_wu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvhaddw_du_wu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.du.wu(<8 x i32> [[_124]], <8 x i32> [[_235]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4u64 xvhaddw_du_wu(v8u32 _1, v8u32 _2) { return __lasx_xvhaddw_du_wu(_1, _2); }
-// CHECK-LABEL: @xvhsubw_h_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvhsubw_h_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhsubw.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvhsubw_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvhsubw_h_b(_1, _2); }
-// CHECK-LABEL: @xvhsubw_w_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvhsubw_w_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhsubw.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvhsubw_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvhsubw_w_h(_1, _2); }
-// CHECK-LABEL: @xvhsubw_d_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvhsubw_d_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvhsubw_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvhsubw_d_w(_1, _2); }
-// CHECK-LABEL: @xvhsubw_hu_bu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvhsubw_hu_bu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhsubw.hu.bu(<32 x i8> [[_124]], <32 x i8> [[_235]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvhsubw_hu_bu(v32u8 _1, v32u8 _2) { return __lasx_xvhsubw_hu_bu(_1, _2); }
-// CHECK-LABEL: @xvhsubw_wu_hu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvhsubw_wu_hu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhsubw.wu.hu(<16 x i16> [[_124]], <16 x i16> [[_235]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvhsubw_wu_hu(v16u16 _1, v16u16 _2) { return __lasx_xvhsubw_wu_hu(_1, _2); }
-// CHECK-LABEL: @xvhsubw_du_wu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvhsubw_du_wu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.du.wu(<8 x i32> [[_124]], <8 x i32> [[_235]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvhsubw_du_wu(v8u32 _1, v8u32 _2) { return __lasx_xvhsubw_du_wu(_1, _2); }
-// CHECK-LABEL: @xvmod_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmod_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmod.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvmod_b(v32i8 _1, v32i8 _2) { return __lasx_xvmod_b(_1, _2); }
-// CHECK-LABEL: @xvmod_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmod_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmod.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvmod_h(v16i16 _1, v16i16 _2) { return __lasx_xvmod_h(_1, _2); }
-// CHECK-LABEL: @xvmod_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmod_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmod.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvmod_w(v8i32 _1, v8i32 _2) { return __lasx_xvmod_w(_1, _2); }
-// CHECK-LABEL: @xvmod_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmod_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmod.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvmod_d(v4i64 _1, v4i64 _2) { return __lasx_xvmod_d(_1, _2); }
-// CHECK-LABEL: @xvmod_bu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmod_bu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmod.bu(<32 x i8> [[_124]], <32 x i8> [[_235]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32u8 xvmod_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmod_bu(_1, _2); }
-// CHECK-LABEL: @xvmod_hu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmod_hu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmod.hu(<16 x i16> [[_124]], <16 x i16> [[_235]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16u16 xvmod_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmod_hu(_1, _2); }
-// CHECK-LABEL: @xvmod_wu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmod_wu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmod.wu(<8 x i32> [[_124]], <8 x i32> [[_235]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8u32 xvmod_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmod_wu(_1, _2); }
-// CHECK-LABEL: @xvmod_du(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmod_du(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmod.du(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4u64 xvmod_du(v4u64 _1, v4u64 _2) { return __lasx_xvmod_du(_1, _2); }
-// CHECK-LABEL: @xvrepl128vei_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvrepl128vei_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(<32 x i8> [[_1]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvrepl128vei_b(v32i8 _1) { return __lasx_xvrepl128vei_b(_1, 1); }
-// CHECK-LABEL: @xvrepl128vei_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvrepl128vei_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h(<16 x i16> [[_1]], i32 1)
-// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvrepl128vei_h(v16i16 _1) { return __lasx_xvrepl128vei_h(_1, 1); }
-// CHECK-LABEL: @xvrepl128vei_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvrepl128vei_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w(<8 x i32> [[_1]], i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvrepl128vei_w(v8i32 _1) { return __lasx_xvrepl128vei_w(_1, 1); }
-// CHECK-LABEL: @xvrepl128vei_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvrepl128vei_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d(<4 x i64> [[_1]], i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvrepl128vei_d(v4i64 _1) { return __lasx_xvrepl128vei_d(_1, 1); }
-// CHECK-LABEL: @xvpickev_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvpickev_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpickev.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvpickev_b(v32i8 _1, v32i8 _2) { return __lasx_xvpickev_b(_1, _2); }
-// CHECK-LABEL: @xvpickev_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvpickev_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpickev.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvpickev_h(v16i16 _1, v16i16 _2) { return __lasx_xvpickev_h(_1, _2); }
-// CHECK-LABEL: @xvpickev_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvpickev_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickev.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvpickev_w(v8i32 _1, v8i32 _2) { return __lasx_xvpickev_w(_1, _2); }
-// CHECK-LABEL: @xvpickev_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvpickev_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickev.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvpickev_d(v4i64 _1, v4i64 _2) { return __lasx_xvpickev_d(_1, _2); }
-// CHECK-LABEL: @xvpickod_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvpickod_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpickod.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvpickod_b(v32i8 _1, v32i8 _2) { return __lasx_xvpickod_b(_1, _2); }
-// CHECK-LABEL: @xvpickod_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvpickod_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpickod.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvpickod_h(v16i16 _1, v16i16 _2) { return __lasx_xvpickod_h(_1, _2); }
-// CHECK-LABEL: @xvpickod_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvpickod_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickod.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvpickod_w(v8i32 _1, v8i32 _2) { return __lasx_xvpickod_w(_1, _2); }
-// CHECK-LABEL: @xvpickod_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvpickod_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickod.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvpickod_d(v4i64 _1, v4i64 _2) { return __lasx_xvpickod_d(_1, _2); }
-// CHECK-LABEL: @xvilvh_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvilvh_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvilvh.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvilvh_b(v32i8 _1, v32i8 _2) { return __lasx_xvilvh_b(_1, _2); }
-// CHECK-LABEL: @xvilvh_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvilvh_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvilvh.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvilvh_h(v16i16 _1, v16i16 _2) { return __lasx_xvilvh_h(_1, _2); }
-// CHECK-LABEL: @xvilvh_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvilvh_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvilvh.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvilvh_w(v8i32 _1, v8i32 _2) { return __lasx_xvilvh_w(_1, _2); }
-// CHECK-LABEL: @xvilvh_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvilvh_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvilvh.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvilvh_d(v4i64 _1, v4i64 _2) { return __lasx_xvilvh_d(_1, _2); }
-// CHECK-LABEL: @xvilvl_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvilvl_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvilvl.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvilvl_b(v32i8 _1, v32i8 _2) { return __lasx_xvilvl_b(_1, _2); }
-// CHECK-LABEL: @xvilvl_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvilvl_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvilvl.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvilvl_h(v16i16 _1, v16i16 _2) { return __lasx_xvilvl_h(_1, _2); }
-// CHECK-LABEL: @xvilvl_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvilvl_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvilvl.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvilvl_w(v8i32 _1, v8i32 _2) { return __lasx_xvilvl_w(_1, _2); }
-// CHECK-LABEL: @xvilvl_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvilvl_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvilvl.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvilvl_d(v4i64 _1, v4i64 _2) { return __lasx_xvilvl_d(_1, _2); }
-// CHECK-LABEL: @xvpackev_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvpackev_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpackev.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvpackev_b(v32i8 _1, v32i8 _2) { return __lasx_xvpackev_b(_1, _2); }
-// CHECK-LABEL: @xvpackev_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvpackev_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpackev.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvpackev_h(v16i16 _1, v16i16 _2) { return __lasx_xvpackev_h(_1, _2); }
-// CHECK-LABEL: @xvpackev_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvpackev_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpackev.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvpackev_w(v8i32 _1, v8i32 _2) { return __lasx_xvpackev_w(_1, _2); }
-// CHECK-LABEL: @xvpackev_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvpackev_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpackev.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvpackev_d(v4i64 _1, v4i64 _2) { return __lasx_xvpackev_d(_1, _2); }
-// CHECK-LABEL: @xvpackod_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvpackod_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpackod.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvpackod_b(v32i8 _1, v32i8 _2) { return __lasx_xvpackod_b(_1, _2); }
-// CHECK-LABEL: @xvpackod_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvpackod_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpackod.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvpackod_h(v16i16 _1, v16i16 _2) { return __lasx_xvpackod_h(_1, _2); }
-// CHECK-LABEL: @xvpackod_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvpackod_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpackod.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvpackod_w(v8i32 _1, v8i32 _2) { return __lasx_xvpackod_w(_1, _2); }
-// CHECK-LABEL: @xvpackod_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvpackod_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpackod.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvpackod_d(v4i64 _1, v4i64 _2) { return __lasx_xvpackod_d(_1, _2); }
-// CHECK-LABEL: @xvshuf_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_136:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvshuf_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_136:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_247:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_358:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvshuf.b(<32 x i8> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]])
-// CHECK-NEXT:    store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvshuf_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __lasx_xvshuf_b(_1, _2, _3); }
-// CHECK-LABEL: @xvshuf_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvshuf_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_136:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_247:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_358:%.*]] = load <16 x i16>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvshuf.h(<16 x i16> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]])
-// CHECK-NEXT:    store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvshuf_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __lasx_xvshuf_h(_1, _2, _3); }
-// CHECK-LABEL: @xvshuf_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_247:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_358:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvshuf_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_136:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_247:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_358:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvshuf.w(<8 x i32> [[_136]], <8 x i32> [[_247]], <8 x i32> [[_358]])
-// CHECK-NEXT:    store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvshuf_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __lasx_xvshuf_w(_1, _2, _3); }
-// CHECK-LABEL: @xvshuf_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvshuf_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_3:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvshuf.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]])
-// CHECK-NEXT:    store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvshuf_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __lasx_xvshuf_d(_1, _2, _3); }
-// CHECK-LABEL: @xvand_v(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvand_v(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvand.v(<32 x i8> [[_124]], <32 x i8> [[_235]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32u8 xvand_v(v32u8 _1, v32u8 _2) { return __lasx_xvand_v(_1, _2); }
-// CHECK-LABEL: @xvandi_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvandi_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvandi.b(<32 x i8> [[_1]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32u8 xvandi_b(v32u8 _1) { return __lasx_xvandi_b(_1, 1); }
-// CHECK-LABEL: @xvor_v(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvor_v(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvor.v(<32 x i8> [[_124]], <32 x i8> [[_235]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32u8 xvor_v(v32u8 _1, v32u8 _2) { return __lasx_xvor_v(_1, _2); }
-// CHECK-LABEL: @xvori_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvori_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvori.b(<32 x i8> [[_1]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32u8 xvori_b(v32u8 _1) { return __lasx_xvori_b(_1, 1); }
-// CHECK-LABEL: @xvnor_v(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvnor_v(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvnor.v(<32 x i8> [[_124]], <32 x i8> [[_235]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32u8 xvnor_v(v32u8 _1, v32u8 _2) { return __lasx_xvnor_v(_1, _2); }
-// CHECK-LABEL: @xvnori_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvnori_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvnori.b(<32 x i8> [[_1]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32u8 xvnori_b(v32u8 _1) { return __lasx_xvnori_b(_1, 1); }
-// CHECK-LABEL: @xvxor_v(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvxor_v(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvxor.v(<32 x i8> [[_124]], <32 x i8> [[_235]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32u8 xvxor_v(v32u8 _1, v32u8 _2) { return __lasx_xvxor_v(_1, _2); }
-// CHECK-LABEL: @xvxori_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvxori_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvxori.b(<32 x i8> [[_1]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32u8 xvxori_b(v32u8 _1) { return __lasx_xvxori_b(_1, 1); }
-// CHECK-LABEL: @xvbitsel_v(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_136:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvbitsel_v(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_136:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_247:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_358:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitsel.v(<32 x i8> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]])
-// CHECK-NEXT:    store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32u8 xvbitsel_v(v32u8 _1, v32u8 _2, v32u8 _3) { return __lasx_xvbitsel_v(_1, _2, _3); }
-// CHECK-LABEL: @xvbitseli_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvbitseli_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32u8 xvbitseli_b(v32u8 _1, v32u8 _2) { return __lasx_xvbitseli_b(_1, _2, 1); }
-// CHECK-LABEL: @xvshuf4i_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvshuf4i_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(<32 x i8> [[_1]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvshuf4i_b(v32i8 _1) { return __lasx_xvshuf4i_b(_1, 1); }
-// CHECK-LABEL: @xvshuf4i_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvshuf4i_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h(<16 x i16> [[_1]], i32 1)
-// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvshuf4i_h(v16i16 _1) { return __lasx_xvshuf4i_h(_1, 1); }
-// CHECK-LABEL: @xvshuf4i_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvshuf4i_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w(<8 x i32> [[_1]], i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvshuf4i_w(v8i32 _1) { return __lasx_xvshuf4i_w(_1, 1); }
-// CHECK-LABEL: @xvreplgr2vr_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplgr2vr.b(i32 [[_1:%.*]])
-// CHECK-NEXT:    store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvreplgr2vr_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], i32 noundef signext [[_1:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplgr2vr.b(i32 [[_1]])
+// CHECK-NEXT:    store <32 x i8> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvreplgr2vr_b(int _1) { return __lasx_xvreplgr2vr_b(_1); }
-// CHECK-LABEL: @xvreplgr2vr_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplgr2vr.h(i32 [[_1:%.*]])
-// CHECK-NEXT:    store <16 x i16> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvreplgr2vr_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], i32 noundef signext [[_1:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplgr2vr.h(i32 [[_1]])
+// CHECK-NEXT:    store <16 x i16> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvreplgr2vr_h(int _1) { return __lasx_xvreplgr2vr_h(_1); }
-// CHECK-LABEL: @xvreplgr2vr_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplgr2vr.w(i32 [[_1:%.*]])
-// CHECK-NEXT:    store <8 x i32> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvreplgr2vr_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], i32 noundef signext [[_1:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplgr2vr.w(i32 [[_1]])
+// CHECK-NEXT:    store <8 x i32> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvreplgr2vr_w(int _1) { return __lasx_xvreplgr2vr_w(_1); }
-// CHECK-LABEL: @xvreplgr2vr_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[CONV:%.*]] = sext i32 [[_1:%.*]] to i64
+// CHECK-LABEL: define dso_local void @xvreplgr2vr_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], i32 noundef signext [[_1:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[CONV:%.*]] = sext i32 [[_1]] to i64
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplgr2vr.d(i64 [[CONV]])
-// CHECK-NEXT:    store <4 x i64> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvreplgr2vr_d(int _1) { return __lasx_xvreplgr2vr_d(_1); }
-// CHECK-LABEL: @xvpcnt_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvpcnt_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_112:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpcnt.b(<32 x i8> [[_112]])
-// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvpcnt_b(v32i8 _1) { return __lasx_xvpcnt_b(_1); }
-// CHECK-LABEL: @xvpcnt_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvpcnt_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_112:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpcnt.h(<16 x i16> [[_112]])
-// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvpcnt_h(v16i16 _1) { return __lasx_xvpcnt_h(_1); }
-// CHECK-LABEL: @xvpcnt_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvpcnt_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_112:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpcnt.w(<8 x i32> [[_112]])
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvpcnt_w(v8i32 _1) { return __lasx_xvpcnt_w(_1); }
-// CHECK-LABEL: @xvpcnt_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvpcnt_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpcnt.d(<4 x i64> [[_1]])
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvpcnt_d(v4i64 _1) { return __lasx_xvpcnt_d(_1); }
-// CHECK-LABEL: @xvclo_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvclo_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_112:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvclo.b(<32 x i8> [[_112]])
-// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvclo_b(v32i8 _1) { return __lasx_xvclo_b(_1); }
-// CHECK-LABEL: @xvclo_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvclo_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_112:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvclo.h(<16 x i16> [[_112]])
-// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvclo_h(v16i16 _1) { return __lasx_xvclo_h(_1); }
-// CHECK-LABEL: @xvclo_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvclo_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_112:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvclo.w(<8 x i32> [[_112]])
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvclo_w(v8i32 _1) { return __lasx_xvclo_w(_1); }
-// CHECK-LABEL: @xvclo_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvclo_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvclo.d(<4 x i64> [[_1]])
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvclo_d(v4i64 _1) { return __lasx_xvclo_d(_1); }
-// CHECK-LABEL: @xvclz_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvclz_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_112:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvclz.b(<32 x i8> [[_112]])
-// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvclz_b(v32i8 _1) { return __lasx_xvclz_b(_1); }
-// CHECK-LABEL: @xvclz_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvclz_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_112:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvclz.h(<16 x i16> [[_112]])
-// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvclz_h(v16i16 _1) { return __lasx_xvclz_h(_1); }
-// CHECK-LABEL: @xvclz_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvclz_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_112:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvclz.w(<8 x i32> [[_112]])
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvclz_w(v8i32 _1) { return __lasx_xvclz_w(_1); }
-// CHECK-LABEL: @xvclz_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvclz_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvclz.d(<4 x i64> [[_1]])
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvclz_d(v4i64 _1) { return __lasx_xvclz_d(_1); }
-// CHECK-LABEL: @xvfadd_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfadd_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfadd.s(<8 x float> [[_1]], <8 x float> [[_2]])
-// CHECK-NEXT:    store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8f32 xvfadd_s(v8f32 _1, v8f32 _2) { return __lasx_xvfadd_s(_1, _2); }
-// CHECK-LABEL: @xvfadd_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfadd_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfadd.d(<4 x double> [[_1]], <4 x double> [[_2]])
-// CHECK-NEXT:    store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x double> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4f64 xvfadd_d(v4f64 _1, v4f64 _2) { return __lasx_xvfadd_d(_1, _2); }
-// CHECK-LABEL: @xvfsub_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfsub_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfsub.s(<8 x float> [[_1]], <8 x float> [[_2]])
-// CHECK-NEXT:    store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8f32 xvfsub_s(v8f32 _1, v8f32 _2) { return __lasx_xvfsub_s(_1, _2); }
-// CHECK-LABEL: @xvfsub_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfsub_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfsub.d(<4 x double> [[_1]], <4 x double> [[_2]])
-// CHECK-NEXT:    store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x double> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4f64 xvfsub_d(v4f64 _1, v4f64 _2) { return __lasx_xvfsub_d(_1, _2); }
-// CHECK-LABEL: @xvfmul_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfmul_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmul.s(<8 x float> [[_1]], <8 x float> [[_2]])
-// CHECK-NEXT:    store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8f32 xvfmul_s(v8f32 _1, v8f32 _2) { return __lasx_xvfmul_s(_1, _2); }
-// CHECK-LABEL: @xvfmul_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfmul_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmul.d(<4 x double> [[_1]], <4 x double> [[_2]])
-// CHECK-NEXT:    store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x double> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4f64 xvfmul_d(v4f64 _1, v4f64 _2) { return __lasx_xvfmul_d(_1, _2); }
-// CHECK-LABEL: @xvfdiv_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfdiv_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfdiv.s(<8 x float> [[_1]], <8 x float> [[_2]])
-// CHECK-NEXT:    store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8f32 xvfdiv_s(v8f32 _1, v8f32 _2) { return __lasx_xvfdiv_s(_1, _2); }
-// CHECK-LABEL: @xvfdiv_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfdiv_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfdiv.d(<4 x double> [[_1]], <4 x double> [[_2]])
-// CHECK-NEXT:    store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x double> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4f64 xvfdiv_d(v4f64 _1, v4f64 _2) { return __lasx_xvfdiv_d(_1, _2); }
-// CHECK-LABEL: @xvfcvt_h_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcvt_h_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfcvt.h.s(<8 x float> [[_1]], <8 x float> [[_2]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvfcvt_h_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcvt_h_s(_1, _2); }
-// CHECK-LABEL: @xvfcvt_s_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcvt_s_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvt.s.d(<4 x double> [[_1]], <4 x double> [[_2]])
-// CHECK-NEXT:    store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8f32 xvfcvt_s_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcvt_s_d(_1, _2); }
-// CHECK-LABEL: @xvfmin_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfmin_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmin.s(<8 x float> [[_1]], <8 x float> [[_2]])
-// CHECK-NEXT:    store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8f32 xvfmin_s(v8f32 _1, v8f32 _2) { return __lasx_xvfmin_s(_1, _2); }
-// CHECK-LABEL: @xvfmin_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfmin_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmin.d(<4 x double> [[_1]], <4 x double> [[_2]])
-// CHECK-NEXT:    store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x double> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4f64 xvfmin_d(v4f64 _1, v4f64 _2) { return __lasx_xvfmin_d(_1, _2); }
-// CHECK-LABEL: @xvfmina_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfmina_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmina.s(<8 x float> [[_1]], <8 x float> [[_2]])
-// CHECK-NEXT:    store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8f32 xvfmina_s(v8f32 _1, v8f32 _2) { return __lasx_xvfmina_s(_1, _2); }
-// CHECK-LABEL: @xvfmina_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfmina_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmina.d(<4 x double> [[_1]], <4 x double> [[_2]])
-// CHECK-NEXT:    store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x double> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4f64 xvfmina_d(v4f64 _1, v4f64 _2) { return __lasx_xvfmina_d(_1, _2); }
-// CHECK-LABEL: @xvfmax_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfmax_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmax.s(<8 x float> [[_1]], <8 x float> [[_2]])
-// CHECK-NEXT:    store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8f32 xvfmax_s(v8f32 _1, v8f32 _2) { return __lasx_xvfmax_s(_1, _2); }
-// CHECK-LABEL: @xvfmax_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfmax_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmax.d(<4 x double> [[_1]], <4 x double> [[_2]])
-// CHECK-NEXT:    store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x double> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4f64 xvfmax_d(v4f64 _1, v4f64 _2) { return __lasx_xvfmax_d(_1, _2); }
-// CHECK-LABEL: @xvfmaxa_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfmaxa_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmaxa.s(<8 x float> [[_1]], <8 x float> [[_2]])
-// CHECK-NEXT:    store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8f32 xvfmaxa_s(v8f32 _1, v8f32 _2) { return __lasx_xvfmaxa_s(_1, _2); }
-// CHECK-LABEL: @xvfmaxa_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfmaxa_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmaxa.d(<4 x double> [[_1]], <4 x double> [[_2]])
-// CHECK-NEXT:    store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x double> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4f64 xvfmaxa_d(v4f64 _1, v4f64 _2) { return __lasx_xvfmaxa_d(_1, _2); }
-// CHECK-LABEL: @xvfclass_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfclass_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfclass.s(<8 x float> [[_1]])
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvfclass_s(v8f32 _1) { return __lasx_xvfclass_s(_1); }
-// CHECK-LABEL: @xvfclass_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfclass_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfclass.d(<4 x double> [[_1]])
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvfclass_d(v4f64 _1) { return __lasx_xvfclass_d(_1); }
-// CHECK-LABEL: @xvfsqrt_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfsqrt_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfsqrt.s(<8 x float> [[_1]])
-// CHECK-NEXT:    store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8f32 xvfsqrt_s(v8f32 _1) { return __lasx_xvfsqrt_s(_1); }
-// CHECK-LABEL: @xvfsqrt_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfsqrt_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfsqrt.d(<4 x double> [[_1]])
-// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4f64 xvfsqrt_d(v4f64 _1) { return __lasx_xvfsqrt_d(_1); }
-// CHECK-LABEL: @xvfrecip_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfrecip_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrecip.s(<8 x float> [[_1]])
-// CHECK-NEXT:    store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8f32 xvfrecip_s(v8f32 _1) { return __lasx_xvfrecip_s(_1); }
-// CHECK-LABEL: @xvfrecip_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfrecip_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrecip.d(<4 x double> [[_1]])
-// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4f64 xvfrecip_d(v4f64 _1) { return __lasx_xvfrecip_d(_1); }
-// CHECK-LABEL: @xvfrint_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfrint_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrint.s(<8 x float> [[_1]])
-// CHECK-NEXT:    store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8f32 xvfrint_s(v8f32 _1) { return __lasx_xvfrint_s(_1); }
-// CHECK-LABEL: @xvfrint_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfrint_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrint.d(<4 x double> [[_1]])
-// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4f64 xvfrint_d(v4f64 _1) { return __lasx_xvfrint_d(_1); }
-// CHECK-LABEL: @xvfrsqrt_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfrsqrt_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrsqrt.s(<8 x float> [[_1]])
-// CHECK-NEXT:    store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8f32 xvfrsqrt_s(v8f32 _1) { return __lasx_xvfrsqrt_s(_1); }
-// CHECK-LABEL: @xvfrsqrt_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfrsqrt_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrsqrt.d(<4 x double> [[_1]])
-// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4f64 xvfrsqrt_d(v4f64 _1) { return __lasx_xvfrsqrt_d(_1); }
-// CHECK-LABEL: @xvflogb_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvflogb_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvflogb.s(<8 x float> [[_1]])
-// CHECK-NEXT:    store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8f32 xvflogb_s(v8f32 _1) { return __lasx_xvflogb_s(_1); }
-// CHECK-LABEL: @xvflogb_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvflogb_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvflogb.d(<4 x double> [[_1]])
-// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4f64 xvflogb_d(v4f64 _1) { return __lasx_xvflogb_d(_1); }
-// CHECK-LABEL: @xvfcvth_s_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcvth_s_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_112:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvth.s.h(<16 x i16> [[_112]])
-// CHECK-NEXT:    store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8f32 xvfcvth_s_h(v16i16 _1) { return __lasx_xvfcvth_s_h(_1); }
-// CHECK-LABEL: @xvfcvth_d_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcvth_d_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfcvth.d.s(<8 x float> [[_1]])
-// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4f64 xvfcvth_d_s(v8f32 _1) { return __lasx_xvfcvth_d_s(_1); }
-// CHECK-LABEL: @xvfcvtl_s_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcvtl_s_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_112:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvtl.s.h(<16 x i16> [[_112]])
-// CHECK-NEXT:    store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8f32 xvfcvtl_s_h(v16i16 _1) { return __lasx_xvfcvtl_s_h(_1); }
-// CHECK-LABEL: @xvfcvtl_d_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcvtl_d_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfcvtl.d.s(<8 x float> [[_1]])
-// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4f64 xvfcvtl_d_s(v8f32 _1) { return __lasx_xvfcvtl_d_s(_1); }
-// CHECK-LABEL: @xvftint_w_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvftint_w_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.w.s(<8 x float> [[_1]])
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvftint_w_s(v8f32 _1) { return __lasx_xvftint_w_s(_1); }
-// CHECK-LABEL: @xvftint_l_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvftint_l_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftint.l.d(<4 x double> [[_1]])
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvftint_l_d(v4f64 _1) { return __lasx_xvftint_l_d(_1); }
-// CHECK-LABEL: @xvftint_wu_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvftint_wu_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.wu.s(<8 x float> [[_1]])
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8u32 xvftint_wu_s(v8f32 _1) { return __lasx_xvftint_wu_s(_1); }
-// CHECK-LABEL: @xvftint_lu_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvftint_lu_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftint.lu.d(<4 x double> [[_1]])
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4u64 xvftint_lu_d(v4f64 _1) { return __lasx_xvftint_lu_d(_1); }
-// CHECK-LABEL: @xvftintrz_w_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvftintrz_w_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.s(<8 x float> [[_1]])
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvftintrz_w_s(v8f32 _1) { return __lasx_xvftintrz_w_s(_1); }
-// CHECK-LABEL: @xvftintrz_l_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvftintrz_l_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrz.l.d(<4 x double> [[_1]])
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvftintrz_l_d(v4f64 _1) { return __lasx_xvftintrz_l_d(_1); }
-// CHECK-LABEL: @xvftintrz_wu_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvftintrz_wu_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.wu.s(<8 x float> [[_1]])
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8u32 xvftintrz_wu_s(v8f32 _1) { return __lasx_xvftintrz_wu_s(_1); }
-// CHECK-LABEL: @xvftintrz_lu_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvftintrz_lu_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrz.lu.d(<4 x double> [[_1]])
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4u64 xvftintrz_lu_d(v4f64 _1) { return __lasx_xvftintrz_lu_d(_1); }
-// CHECK-LABEL: @xvffint_s_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvffint_s_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_112:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.w(<8 x i32> [[_112]])
-// CHECK-NEXT:    store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8f32 xvffint_s_w(v8i32 _1) { return __lasx_xvffint_s_w(_1); }
-// CHECK-LABEL: @xvffint_d_l(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvffint_d_l(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffint.d.l(<4 x i64> [[_1]])
-// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4f64 xvffint_d_l(v4i64 _1) { return __lasx_xvffint_d_l(_1); }
-// CHECK-LABEL: @xvffint_s_wu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvffint_s_wu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_112:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.wu(<8 x i32> [[_112]])
-// CHECK-NEXT:    store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8f32 xvffint_s_wu(v8u32 _1) { return __lasx_xvffint_s_wu(_1); }
-// CHECK-LABEL: @xvffint_d_lu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvffint_d_lu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffint.d.lu(<4 x i64> [[_1]])
-// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4f64 xvffint_d_lu(v4u64 _1) { return __lasx_xvffint_d_lu(_1); }
-// CHECK-LABEL: @xvreplve_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve.b(<32 x i8> [[_112]], i32 [[_2:%.*]])
-// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvreplve_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], i32 noundef signext [[_2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_112:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve.b(<32 x i8> [[_112]], i32 [[_2]])
+// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvreplve_b(v32i8 _1, int _2) { return __lasx_xvreplve_b(_1, _2); }
-// CHECK-LABEL: @xvreplve_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplve.h(<16 x i16> [[_112]], i32 [[_2:%.*]])
-// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvreplve_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], i32 noundef signext [[_2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_112:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplve.h(<16 x i16> [[_112]], i32 [[_2]])
+// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvreplve_h(v16i16 _1, int _2) { return __lasx_xvreplve_h(_1, _2); }
-// CHECK-LABEL: @xvreplve_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplve.w(<8 x i32> [[_112]], i32 [[_2:%.*]])
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvreplve_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], i32 noundef signext [[_2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_112:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplve.w(<8 x i32> [[_112]], i32 [[_2]])
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvreplve_w(v8i32 _1, int _2) { return __lasx_xvreplve_w(_1, _2); }
-// CHECK-LABEL: @xvreplve_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplve.d(<4 x i64> [[_1]], i32 [[_2:%.*]])
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvreplve_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], i32 noundef signext [[_2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplve.d(<4 x i64> [[_1]], i32 [[_2]])
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvreplve_d(v4i64 _1, int _2) { return __lasx_xvreplve_d(_1, _2); }
-// CHECK-LABEL: @xvpermi_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvpermi_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpermi.w(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvpermi_w(v8i32 _1, v8i32 _2) { return __lasx_xvpermi_w(_1, _2, 1); }
-// CHECK-LABEL: @xvandn_v(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvandn_v(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvandn.v(<32 x i8> [[_124]], <32 x i8> [[_235]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32u8 xvandn_v(v32u8 _1, v32u8 _2) { return __lasx_xvandn_v(_1, _2); }
-// CHECK-LABEL: @xvneg_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvneg_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_112:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvneg.b(<32 x i8> [[_112]])
-// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvneg_b(v32i8 _1) { return __lasx_xvneg_b(_1); }
-// CHECK-LABEL: @xvneg_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvneg_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_112:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvneg.h(<16 x i16> [[_112]])
-// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvneg_h(v16i16 _1) { return __lasx_xvneg_h(_1); }
-// CHECK-LABEL: @xvneg_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvneg_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_112:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvneg.w(<8 x i32> [[_112]])
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvneg_w(v8i32 _1) { return __lasx_xvneg_w(_1); }
-// CHECK-LABEL: @xvneg_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvneg_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvneg.d(<4 x i64> [[_1]])
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvneg_d(v4i64 _1) { return __lasx_xvneg_d(_1); }
-// CHECK-LABEL: @xvmuh_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmuh_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmuh.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvmuh_b(v32i8 _1, v32i8 _2) { return __lasx_xvmuh_b(_1, _2); }
-// CHECK-LABEL: @xvmuh_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmuh_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmuh.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvmuh_h(v16i16 _1, v16i16 _2) { return __lasx_xvmuh_h(_1, _2); }
-// CHECK-LABEL: @xvmuh_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmuh_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmuh.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvmuh_w(v8i32 _1, v8i32 _2) { return __lasx_xvmuh_w(_1, _2); }
-// CHECK-LABEL: @xvmuh_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmuh_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmuh.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvmuh_d(v4i64 _1, v4i64 _2) { return __lasx_xvmuh_d(_1, _2); }
-// CHECK-LABEL: @xvmuh_bu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmuh_bu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmuh.bu(<32 x i8> [[_124]], <32 x i8> [[_235]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32u8 xvmuh_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmuh_bu(_1, _2); }
-// CHECK-LABEL: @xvmuh_hu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmuh_hu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmuh.hu(<16 x i16> [[_124]], <16 x i16> [[_235]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16u16 xvmuh_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmuh_hu(_1, _2); }
-// CHECK-LABEL: @xvmuh_wu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmuh_wu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmuh.wu(<8 x i32> [[_124]], <8 x i32> [[_235]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8u32 xvmuh_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmuh_wu(_1, _2); }
-// CHECK-LABEL: @xvmuh_du(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmuh_du(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmuh.du(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4u64 xvmuh_du(v4u64 _1, v4u64 _2) { return __lasx_xvmuh_du(_1, _2); }
-// CHECK-LABEL: @xvsllwil_h_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsllwil_h_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(<32 x i8> [[_1]], i32 1)
-// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvsllwil_h_b(v32i8 _1) { return __lasx_xvsllwil_h_b(_1, 1); }
-// CHECK-LABEL: @xvsllwil_w_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsllwil_w_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h(<16 x i16> [[_1]], i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvsllwil_w_h(v16i16 _1) { return __lasx_xvsllwil_w_h(_1, 1); }
-// CHECK-LABEL: @xvsllwil_d_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsllwil_d_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w(<8 x i32> [[_1]], i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvsllwil_d_w(v8i32 _1) { return __lasx_xvsllwil_d_w(_1, 1); }
-// CHECK-LABEL: @xvsllwil_hu_bu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsllwil_hu_bu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu(<32 x i8> [[_1]], i32 1)
-// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16u16 xvsllwil_hu_bu(v32u8 _1) { return __lasx_xvsllwil_hu_bu(_1, 1); }
-// CHECK-LABEL: @xvsllwil_wu_hu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsllwil_wu_hu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu(<16 x i16> [[_1]], i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8u32 xvsllwil_wu_hu(v16u16 _1) { return __lasx_xvsllwil_wu_hu(_1, 1); }
-// CHECK-LABEL: @xvsllwil_du_wu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsllwil_du_wu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu(<8 x i32> [[_1]], i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4u64 xvsllwil_du_wu(v8u32 _1) { return __lasx_xvsllwil_du_wu(_1, 1); }
-// CHECK-LABEL: @xvsran_b_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsran_b_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsran.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvsran_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvsran_b_h(_1, _2); }
-// CHECK-LABEL: @xvsran_h_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsran_h_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsran.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvsran_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvsran_h_w(_1, _2); }
-// CHECK-LABEL: @xvsran_w_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsran_w_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsran.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvsran_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvsran_w_d(_1, _2); }
-// CHECK-LABEL: @xvssran_b_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssran_b_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssran.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvssran_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvssran_b_h(_1, _2); }
-// CHECK-LABEL: @xvssran_h_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssran_h_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssran.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvssran_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvssran_h_w(_1, _2); }
-// CHECK-LABEL: @xvssran_w_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssran_w_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssran.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvssran_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvssran_w_d(_1, _2); }
-// CHECK-LABEL: @xvssran_bu_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssran_bu_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssran.bu.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32u8 xvssran_bu_h(v16u16 _1, v16u16 _2) { return __lasx_xvssran_bu_h(_1, _2); }
-// CHECK-LABEL: @xvssran_hu_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssran_hu_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssran.hu.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16u16 xvssran_hu_w(v8u32 _1, v8u32 _2) { return __lasx_xvssran_hu_w(_1, _2); }
-// CHECK-LABEL: @xvssran_wu_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssran_wu_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssran.wu.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8u32 xvssran_wu_d(v4u64 _1, v4u64 _2) { return __lasx_xvssran_wu_d(_1, _2); }
-// CHECK-LABEL: @xvsrarn_b_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrarn_b_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrarn.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvsrarn_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrarn_b_h(_1, _2); }
-// CHECK-LABEL: @xvsrarn_h_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrarn_h_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrarn.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvsrarn_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrarn_h_w(_1, _2); }
-// CHECK-LABEL: @xvsrarn_w_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrarn_w_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrarn.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvsrarn_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrarn_w_d(_1, _2); }
-// CHECK-LABEL: @xvssrarn_b_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrarn_b_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarn.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvssrarn_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvssrarn_b_h(_1, _2); }
-// CHECK-LABEL: @xvssrarn_h_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrarn_h_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarn.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvssrarn_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvssrarn_h_w(_1, _2); }
-// CHECK-LABEL: @xvssrarn_w_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrarn_w_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarn.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvssrarn_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvssrarn_w_d(_1, _2); }
-// CHECK-LABEL: @xvssrarn_bu_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrarn_bu_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarn.bu.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32u8 xvssrarn_bu_h(v16u16 _1, v16u16 _2) { return __lasx_xvssrarn_bu_h(_1, _2); }
-// CHECK-LABEL: @xvssrarn_hu_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrarn_hu_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarn.hu.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16u16 xvssrarn_hu_w(v8u32 _1, v8u32 _2) { return __lasx_xvssrarn_hu_w(_1, _2); }
-// CHECK-LABEL: @xvssrarn_wu_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrarn_wu_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarn.wu.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8u32 xvssrarn_wu_d(v4u64 _1, v4u64 _2) { return __lasx_xvssrarn_wu_d(_1, _2); }
-// CHECK-LABEL: @xvsrln_b_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrln_b_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrln.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvsrln_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrln_b_h(_1, _2); }
-// CHECK-LABEL: @xvsrln_h_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrln_h_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrln.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvsrln_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrln_h_w(_1, _2); }
-// CHECK-LABEL: @xvsrln_w_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrln_w_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrln.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvsrln_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrln_w_d(_1, _2); }
-// CHECK-LABEL: @xvssrln_bu_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrln_bu_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrln.bu.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32u8 xvssrln_bu_h(v16u16 _1, v16u16 _2) { return __lasx_xvssrln_bu_h(_1, _2); }
-// CHECK-LABEL: @xvssrln_hu_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrln_hu_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrln.hu.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16u16 xvssrln_hu_w(v8u32 _1, v8u32 _2) { return __lasx_xvssrln_hu_w(_1, _2); }
-// CHECK-LABEL: @xvssrln_wu_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrln_wu_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrln.wu.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8u32 xvssrln_wu_d(v4u64 _1, v4u64 _2) { return __lasx_xvssrln_wu_d(_1, _2); }
-// CHECK-LABEL: @xvsrlrn_b_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrlrn_b_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlrn.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvsrlrn_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrlrn_b_h(_1, _2); }
-// CHECK-LABEL: @xvsrlrn_h_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrlrn_h_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlrn.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvsrlrn_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrlrn_h_w(_1, _2); }
-// CHECK-LABEL: @xvsrlrn_w_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrlrn_w_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlrn.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvsrlrn_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrlrn_w_d(_1, _2); }
-// CHECK-LABEL: @xvssrlrn_bu_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrlrn_bu_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.bu.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32u8 xvssrlrn_bu_h(v16u16 _1, v16u16 _2) { return __lasx_xvssrlrn_bu_h(_1, _2); }
-// CHECK-LABEL: @xvssrlrn_hu_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrlrn_hu_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.hu.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16u16 xvssrlrn_hu_w(v8u32 _1, v8u32 _2) { return __lasx_xvssrlrn_hu_w(_1, _2); }
-// CHECK-LABEL: @xvssrlrn_wu_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrlrn_wu_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.wu.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8u32 xvssrlrn_wu_d(v4u64 _1, v4u64 _2) { return __lasx_xvssrlrn_wu_d(_1, _2); }
-// CHECK-LABEL: @xvfrstpi_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfrstpi_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvfrstpi_b(v32i8 _1, v32i8 _2) { return __lasx_xvfrstpi_b(_1, _2, 1); }
-// CHECK-LABEL: @xvfrstpi_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfrstpi_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1)
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvfrstpi_h(v16i16 _1, v16i16 _2) { return __lasx_xvfrstpi_h(_1, _2, 1); }
-// CHECK-LABEL: @xvfrstp_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_136:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfrstp_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_136:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_247:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_358:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvfrstp.b(<32 x i8> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]])
-// CHECK-NEXT:    store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvfrstp_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __lasx_xvfrstp_b(_1, _2, _3); }
-// CHECK-LABEL: @xvfrstp_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfrstp_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_136:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_247:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_358:%.*]] = load <16 x i16>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfrstp.h(<16 x i16> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]])
-// CHECK-NEXT:    store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvfrstp_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __lasx_xvfrstp_h(_1, _2, _3); }
-// CHECK-LABEL: @xvshuf4i_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvshuf4i_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvshuf4i_d(v4i64 _1, v4i64 _2) { return __lasx_xvshuf4i_d(_1, _2, 1); }
-// CHECK-LABEL: @xvbsrl_v(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvbsrl_v(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(<32 x i8> [[_1]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvbsrl_v(v32i8 _1) { return __lasx_xvbsrl_v(_1, 1); }
-// CHECK-LABEL: @xvbsll_v(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvbsll_v(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbsll.v(<32 x i8> [[_1]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvbsll_v(v32i8 _1) { return __lasx_xvbsll_v(_1, 1); }
-// CHECK-LABEL: @xvextrins_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvextrins_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvextrins.b(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvextrins_b(v32i8 _1, v32i8 _2) { return __lasx_xvextrins_b(_1, _2, 1); }
-// CHECK-LABEL: @xvextrins_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvextrins_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvextrins.h(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1)
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvextrins_h(v16i16 _1, v16i16 _2) { return __lasx_xvextrins_h(_1, _2, 1); }
-// CHECK-LABEL: @xvextrins_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvextrins_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvextrins.w(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvextrins_w(v8i32 _1, v8i32 _2) { return __lasx_xvextrins_w(_1, _2, 1); }
-// CHECK-LABEL: @xvextrins_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvextrins_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextrins.d(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvextrins_d(v4i64 _1, v4i64 _2) { return __lasx_xvextrins_d(_1, _2, 1); }
-// CHECK-LABEL: @xvmskltz_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmskltz_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_112:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmskltz.b(<32 x i8> [[_112]])
-// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvmskltz_b(v32i8 _1) { return __lasx_xvmskltz_b(_1); }
-// CHECK-LABEL: @xvmskltz_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmskltz_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_112:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmskltz.h(<16 x i16> [[_112]])
-// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvmskltz_h(v16i16 _1) { return __lasx_xvmskltz_h(_1); }
-// CHECK-LABEL: @xvmskltz_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmskltz_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_112:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmskltz.w(<8 x i32> [[_112]])
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvmskltz_w(v8i32 _1) { return __lasx_xvmskltz_w(_1); }
-// CHECK-LABEL: @xvmskltz_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmskltz_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmskltz.d(<4 x i64> [[_1]])
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvmskltz_d(v4i64 _1) { return __lasx_xvmskltz_d(_1); }
-// CHECK-LABEL: @xvsigncov_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsigncov_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsigncov.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvsigncov_b(v32i8 _1, v32i8 _2) { return __lasx_xvsigncov_b(_1, _2); }
-// CHECK-LABEL: @xvsigncov_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsigncov_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsigncov.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvsigncov_h(v16i16 _1, v16i16 _2) { return __lasx_xvsigncov_h(_1, _2); }
-// CHECK-LABEL: @xvsigncov_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsigncov_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsigncov.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvsigncov_w(v8i32 _1, v8i32 _2) { return __lasx_xvsigncov_w(_1, _2); }
-// CHECK-LABEL: @xvsigncov_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsigncov_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsigncov.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvsigncov_d(v4i64 _1, v4i64 _2) { return __lasx_xvsigncov_d(_1, _2); }
-// CHECK-LABEL: @xvfmadd_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_3:%.*]] = load <8 x float>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfmadd_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_3:%.*]] = load <8 x float>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmadd.s(<8 x float> [[_1]], <8 x float> [[_2]], <8 x float> [[_3]])
-// CHECK-NEXT:    store <8 x float> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x float> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8f32 xvfmadd_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __lasx_xvfmadd_s(_1, _2, _3); }
-// CHECK-LABEL: @xvfmadd_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_3:%.*]] = load <4 x double>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfmadd_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_3:%.*]] = load <4 x double>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmadd.d(<4 x double> [[_1]], <4 x double> [[_2]], <4 x double> [[_3]])
-// CHECK-NEXT:    store <4 x double> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x double> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4f64 xvfmadd_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __lasx_xvfmadd_d(_1, _2, _3); }
-// CHECK-LABEL: @xvfmsub_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_3:%.*]] = load <8 x float>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfmsub_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_3:%.*]] = load <8 x float>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmsub.s(<8 x float> [[_1]], <8 x float> [[_2]], <8 x float> [[_3]])
-// CHECK-NEXT:    store <8 x float> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x float> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8f32 xvfmsub_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __lasx_xvfmsub_s(_1, _2, _3); }
-// CHECK-LABEL: @xvfmsub_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_3:%.*]] = load <4 x double>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfmsub_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_3:%.*]] = load <4 x double>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmsub.d(<4 x double> [[_1]], <4 x double> [[_2]], <4 x double> [[_3]])
-// CHECK-NEXT:    store <4 x double> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x double> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4f64 xvfmsub_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __lasx_xvfmsub_d(_1, _2, _3); }
-// CHECK-LABEL: @xvfnmadd_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_3:%.*]] = load <8 x float>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfnmadd_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_3:%.*]] = load <8 x float>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfnmadd.s(<8 x float> [[_1]], <8 x float> [[_2]], <8 x float> [[_3]])
-// CHECK-NEXT:    store <8 x float> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x float> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8f32 xvfnmadd_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __lasx_xvfnmadd_s(_1, _2, _3); }
-// CHECK-LABEL: @xvfnmadd_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_3:%.*]] = load <4 x double>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfnmadd_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_3:%.*]] = load <4 x double>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfnmadd.d(<4 x double> [[_1]], <4 x double> [[_2]], <4 x double> [[_3]])
-// CHECK-NEXT:    store <4 x double> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x double> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4f64 xvfnmadd_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __lasx_xvfnmadd_d(_1, _2, _3); }
-// CHECK-LABEL: @xvfnmsub_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_3:%.*]] = load <8 x float>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfnmsub_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_3:%.*]] = load <8 x float>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfnmsub.s(<8 x float> [[_1]], <8 x float> [[_2]], <8 x float> [[_3]])
-// CHECK-NEXT:    store <8 x float> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x float> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8f32 xvfnmsub_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __lasx_xvfnmsub_s(_1, _2, _3); }
-// CHECK-LABEL: @xvfnmsub_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_3:%.*]] = load <4 x double>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfnmsub_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_3:%.*]] = load <4 x double>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfnmsub.d(<4 x double> [[_1]], <4 x double> [[_2]], <4 x double> [[_3]])
-// CHECK-NEXT:    store <4 x double> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x double> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4f64 xvfnmsub_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __lasx_xvfnmsub_d(_1, _2, _3); }
-// CHECK-LABEL: @xvftintrne_w_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvftintrne_w_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.s(<8 x float> [[_1]])
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvftintrne_w_s(v8f32 _1) { return __lasx_xvftintrne_w_s(_1); }
-// CHECK-LABEL: @xvftintrne_l_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvftintrne_l_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrne.l.d(<4 x double> [[_1]])
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvftintrne_l_d(v4f64 _1) { return __lasx_xvftintrne_l_d(_1); }
-// CHECK-LABEL: @xvftintrp_w_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvftintrp_w_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.s(<8 x float> [[_1]])
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvftintrp_w_s(v8f32 _1) { return __lasx_xvftintrp_w_s(_1); }
-// CHECK-LABEL: @xvftintrp_l_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvftintrp_l_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrp.l.d(<4 x double> [[_1]])
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvftintrp_l_d(v4f64 _1) { return __lasx_xvftintrp_l_d(_1); }
-// CHECK-LABEL: @xvftintrm_w_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvftintrm_w_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.s(<8 x float> [[_1]])
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvftintrm_w_s(v8f32 _1) { return __lasx_xvftintrm_w_s(_1); }
-// CHECK-LABEL: @xvftintrm_l_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvftintrm_l_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrm.l.d(<4 x double> [[_1]])
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvftintrm_l_d(v4f64 _1) { return __lasx_xvftintrm_l_d(_1); }
-// CHECK-LABEL: @xvftint_w_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvftint_w_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.w.d(<4 x double> [[_1]], <4 x double> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvftint_w_d(v4f64 _1, v4f64 _2) { return __lasx_xvftint_w_d(_1, _2); }
-// CHECK-LABEL: @xvffint_s_l(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvffint_s_l(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.l(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8f32 xvffint_s_l(v4i64 _1, v4i64 _2) { return __lasx_xvffint_s_l(_1, _2); }
-// CHECK-LABEL: @xvftintrz_w_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvftintrz_w_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.d(<4 x double> [[_1]], <4 x double> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvftintrz_w_d(v4f64 _1, v4f64 _2) { return __lasx_xvftintrz_w_d(_1, _2); }
-// CHECK-LABEL: @xvftintrp_w_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvftintrp_w_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.d(<4 x double> [[_1]], <4 x double> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvftintrp_w_d(v4f64 _1, v4f64 _2) { return __lasx_xvftintrp_w_d(_1, _2); }
-// CHECK-LABEL: @xvftintrm_w_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvftintrm_w_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.d(<4 x double> [[_1]], <4 x double> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvftintrm_w_d(v4f64 _1, v4f64 _2) { return __lasx_xvftintrm_w_d(_1, _2); }
-// CHECK-LABEL: @xvftintrne_w_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvftintrne_w_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.d(<4 x double> [[_1]], <4 x double> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvftintrne_w_d(v4f64 _1, v4f64 _2) { return __lasx_xvftintrne_w_d(_1, _2); }
-// CHECK-LABEL: @xvftinth_l_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvftinth_l_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftinth.l.s(<8 x float> [[_1]])
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvftinth_l_s(v8f32 _1) { return __lasx_xvftinth_l_s(_1); }
-// CHECK-LABEL: @xvftintl_l_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvftintl_l_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintl.l.s(<8 x float> [[_1]])
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvftintl_l_s(v8f32 _1) { return __lasx_xvftintl_l_s(_1); }
-// CHECK-LABEL: @xvffinth_d_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvffinth_d_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_112:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffinth.d.w(<8 x i32> [[_112]])
-// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4f64 xvffinth_d_w(v8i32 _1) { return __lasx_xvffinth_d_w(_1); }
-// CHECK-LABEL: @xvffintl_d_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvffintl_d_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_112:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffintl.d.w(<8 x i32> [[_112]])
-// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4f64 xvffintl_d_w(v8i32 _1) { return __lasx_xvffintl_d_w(_1); }
-// CHECK-LABEL: @xvftintrzh_l_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvftintrzh_l_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrzh.l.s(<8 x float> [[_1]])
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvftintrzh_l_s(v8f32 _1) { return __lasx_xvftintrzh_l_s(_1); }
-// CHECK-LABEL: @xvftintrzl_l_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvftintrzl_l_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrzl.l.s(<8 x float> [[_1]])
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvftintrzl_l_s(v8f32 _1) { return __lasx_xvftintrzl_l_s(_1); }
-// CHECK-LABEL: @xvftintrph_l_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvftintrph_l_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrph.l.s(<8 x float> [[_1]])
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvftintrph_l_s(v8f32 _1) { return __lasx_xvftintrph_l_s(_1); }
-// CHECK-LABEL: @xvftintrpl_l_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvftintrpl_l_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrpl.l.s(<8 x float> [[_1]])
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvftintrpl_l_s(v8f32 _1) { return __lasx_xvftintrpl_l_s(_1); }
-// CHECK-LABEL: @xvftintrmh_l_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvftintrmh_l_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrmh.l.s(<8 x float> [[_1]])
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvftintrmh_l_s(v8f32 _1) { return __lasx_xvftintrmh_l_s(_1); }
-// CHECK-LABEL: @xvftintrml_l_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvftintrml_l_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrml.l.s(<8 x float> [[_1]])
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvftintrml_l_s(v8f32 _1) { return __lasx_xvftintrml_l_s(_1); }
-// CHECK-LABEL: @xvftintrneh_l_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvftintrneh_l_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrneh.l.s(<8 x float> [[_1]])
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvftintrneh_l_s(v8f32 _1) { return __lasx_xvftintrneh_l_s(_1); }
-// CHECK-LABEL: @xvftintrnel_l_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvftintrnel_l_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrnel.l.s(<8 x float> [[_1]])
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvftintrnel_l_s(v8f32 _1) { return __lasx_xvftintrnel_l_s(_1); }
-// CHECK-LABEL: @xvfrintrne_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfrintrne_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrne.s(<8 x float> [[_1]])
-// CHECK-NEXT:    store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvfrintrne_s(v8f32 _1) { return __lasx_xvfrintrne_s(_1); }
-// CHECK-LABEL: @xvfrintrne_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfrintrne_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrne.d(<4 x double> [[_1]])
-// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvfrintrne_d(v4f64 _1) { return __lasx_xvfrintrne_d(_1); }
-// CHECK-LABEL: @xvfrintrz_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfrintrz_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrz.s(<8 x float> [[_1]])
-// CHECK-NEXT:    store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvfrintrz_s(v8f32 _1) { return __lasx_xvfrintrz_s(_1); }
-// CHECK-LABEL: @xvfrintrz_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfrintrz_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrz.d(<4 x double> [[_1]])
-// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvfrintrz_d(v4f64 _1) { return __lasx_xvfrintrz_d(_1); }
-// CHECK-LABEL: @xvfrintrp_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfrintrp_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrp.s(<8 x float> [[_1]])
-// CHECK-NEXT:    store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvfrintrp_s(v8f32 _1) { return __lasx_xvfrintrp_s(_1); }
-// CHECK-LABEL: @xvfrintrp_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfrintrp_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrp.d(<4 x double> [[_1]])
-// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvfrintrp_d(v4f64 _1) { return __lasx_xvfrintrp_d(_1); }
-// CHECK-LABEL: @xvfrintrm_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfrintrm_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrm.s(<8 x float> [[_1]])
-// CHECK-NEXT:    store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvfrintrm_s(v8f32 _1) { return __lasx_xvfrintrm_s(_1); }
-// CHECK-LABEL: @xvfrintrm_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfrintrm_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrm.d(<4 x double> [[_1]])
-// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvfrintrm_d(v4f64 _1) { return __lasx_xvfrintrm_d(_1); }
-// CHECK-LABEL: @xvld(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvld(ptr [[_1:%.*]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvld(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr noundef [[_1:%.*]]) local_unnamed_addr #[[ATTR3:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvld(ptr [[_1]], i32 1)
+// CHECK-NEXT:    store <32 x i8> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvld(void * _1) { return __lasx_xvld(_1, 1); }
-// CHECK-LABEL: @xvst(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    tail call void @llvm.loongarch.lasx.xvst(<32 x i8> [[_1]], ptr [[_2:%.*]], i32 1)
+// CHECK-LABEL: define dso_local void @xvst(
+// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr noundef [[_2:%.*]]) local_unnamed_addr #[[ATTR5:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    tail call void @llvm.loongarch.lasx.xvst(<32 x i8> [[_1]], ptr [[_2]], i32 1)
 // CHECK-NEXT:    ret void
 //
 void xvst(v32i8 _1, void * _2) { return __lasx_xvst(_1, _2, 1); }
-// CHECK-LABEL: @xvstelm_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    tail call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> [[_1]], ptr [[_2:%.*]], i32 1, i32 1)
+// CHECK-LABEL: define dso_local void @xvstelm_b(
+// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr noundef [[_2:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    tail call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> [[_1]], ptr [[_2]], i32 1, i32 1)
 // CHECK-NEXT:    ret void
 //
 void xvstelm_b(v32i8 _1, void * _2) { return __lasx_xvstelm_b(_1, _2, 1, 1); }
-// CHECK-LABEL: @xvstelm_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    tail call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> [[_1]], ptr [[_2:%.*]], i32 2, i32 1)
+// CHECK-LABEL: define dso_local void @xvstelm_h(
+// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr noundef [[_2:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    tail call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> [[_1]], ptr [[_2]], i32 2, i32 1)
 // CHECK-NEXT:    ret void
 //
 void xvstelm_h(v16i16 _1, void * _2) { return __lasx_xvstelm_h(_1, _2, 2, 1); }
-// CHECK-LABEL: @xvstelm_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    tail call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> [[_1]], ptr [[_2:%.*]], i32 4, i32 1)
+// CHECK-LABEL: define dso_local void @xvstelm_w(
+// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr noundef [[_2:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    tail call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> [[_1]], ptr [[_2]], i32 4, i32 1)
 // CHECK-NEXT:    ret void
 //
 void xvstelm_w(v8i32 _1, void * _2) { return __lasx_xvstelm_w(_1, _2, 4, 1); }
-// CHECK-LABEL: @xvstelm_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    tail call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> [[_1]], ptr [[_2:%.*]], i32 8, i32 1)
+// CHECK-LABEL: define dso_local void @xvstelm_d(
+// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr noundef [[_2:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    tail call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> [[_1]], ptr [[_2]], i32 8, i32 1)
 // CHECK-NEXT:    ret void
 //
 void xvstelm_d(v4i64 _1, void * _2) { return __lasx_xvstelm_d(_1, _2, 8, 1); }
-// CHECK-LABEL: @xvinsve0_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvinsve0_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvinsve0_w(v8i32 _1, v8i32 _2) { return __lasx_xvinsve0_w(_1, _2, 1); }
-// CHECK-LABEL: @xvinsve0_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvinsve0_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvinsve0.d(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvinsve0_d(v4i64 _1, v4i64 _2) { return __lasx_xvinsve0_d(_1, _2, 1); }
-// CHECK-LABEL: @xvpickve_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvpickve_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickve.w(<8 x i32> [[_1]], i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvpickve_w(v8i32 _1) { return __lasx_xvpickve_w(_1, 1); }
-// CHECK-LABEL: @xvpickve_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvpickve_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickve.d(<4 x i64> [[_1]], i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvpickve_d(v4i64 _1) { return __lasx_xvpickve_d(_1, 1); }
-// CHECK-LABEL: @xvssrlrn_b_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrlrn_b_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvssrlrn_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvssrlrn_b_h(_1, _2); }
-// CHECK-LABEL: @xvssrlrn_h_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrlrn_h_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvssrlrn_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvssrlrn_h_w(_1, _2); }
-// CHECK-LABEL: @xvssrlrn_w_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrlrn_w_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvssrlrn_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvssrlrn_w_d(_1, _2); }
-// CHECK-LABEL: @xvssrln_b_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrln_b_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrln.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvssrln_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvssrln_b_h(_1, _2); }
-// CHECK-LABEL: @xvssrln_h_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrln_h_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrln.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvssrln_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvssrln_h_w(_1, _2); }
-// CHECK-LABEL: @xvssrln_w_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrln_w_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrln.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvssrln_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvssrln_w_d(_1, _2); }
-// CHECK-LABEL: @xvorn_v(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvorn_v(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvorn.v(<32 x i8> [[_124]], <32 x i8> [[_235]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvorn_v(v32i8 _1, v32i8 _2) { return __lasx_xvorn_v(_1, _2); }
-// CHECK-LABEL: @xvldi(
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define dso_local void @xvldi(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvldi(i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvldi() { return __lasx_xvldi(1); }
-// CHECK-LABEL: @xvldx(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvldx(ptr [[_1:%.*]], i64 1), !noalias [[META5:![0-9]+]]
-// CHECK-NEXT:    store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvldx(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr noundef [[_1:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvldx(ptr [[_1]], i64 1), !noalias [[META5:![0-9]+]]
+// CHECK-NEXT:    store <32 x i8> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvldx(void * _1) { return __lasx_xvldx(_1, 1); }
-// CHECK-LABEL: @xvstx(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    tail call void @llvm.loongarch.lasx.xvstx(<32 x i8> [[_112]], ptr [[_2:%.*]], i64 1)
+// CHECK-LABEL: define dso_local void @xvstx(
+// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr noundef [[_2:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_112:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    tail call void @llvm.loongarch.lasx.xvstx(<32 x i8> [[_112]], ptr [[_2]], i64 1)
 // CHECK-NEXT:    ret void
 //
 void xvstx(v32i8 _1, void * _2) { return __lasx_xvstx(_1, _2, 1); }
-// CHECK-LABEL: @xvextl_qu_du(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvextl_qu_du(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextl.qu.du(<4 x i64> [[_1]])
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4u64 xvextl_qu_du(v4u64 _1) { return __lasx_xvextl_qu_du(_1); }
-// CHECK-LABEL: @xvinsgr2vr_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvinsgr2vr_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32> [[_1]], i32 1, i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvinsgr2vr_w(v8i32 _1) { return __lasx_xvinsgr2vr_w(_1, 1, 1); }
-// CHECK-LABEL: @xvinsgr2vr_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvinsgr2vr_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64> [[_1]], i64 1, i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvinsgr2vr_d(v4i64 _1) { return __lasx_xvinsgr2vr_d(_1, 1, 1); }
-// CHECK-LABEL: @xvreplve0_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvreplve0_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_112:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve0.b(<32 x i8> [[_112]])
-// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvreplve0_b(v32i8 _1) { return __lasx_xvreplve0_b(_1); }
-// CHECK-LABEL: @xvreplve0_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvreplve0_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_112:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplve0.h(<16 x i16> [[_112]])
-// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvreplve0_h(v16i16 _1) { return __lasx_xvreplve0_h(_1); }
-// CHECK-LABEL: @xvreplve0_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvreplve0_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_112:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplve0.w(<8 x i32> [[_112]])
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvreplve0_w(v8i32 _1) { return __lasx_xvreplve0_w(_1); }
-// CHECK-LABEL: @xvreplve0_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvreplve0_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplve0.d(<4 x i64> [[_1]])
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvreplve0_d(v4i64 _1) { return __lasx_xvreplve0_d(_1); }
-// CHECK-LABEL: @xvreplve0_q(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvreplve0_q(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_112:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve0.q(<32 x i8> [[_112]])
-// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvreplve0_q(v32i8 _1) { return __lasx_xvreplve0_q(_1); }
-// CHECK-LABEL: @vext2xv_h_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @vext2xv_h_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_112:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.vext2xv.h.b(<32 x i8> [[_112]])
-// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 vext2xv_h_b(v32i8 _1) { return __lasx_vext2xv_h_b(_1); }
-// CHECK-LABEL: @vext2xv_w_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @vext2xv_w_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_112:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.h(<16 x i16> [[_112]])
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 vext2xv_w_h(v16i16 _1) { return __lasx_vext2xv_w_h(_1); }
-// CHECK-LABEL: @vext2xv_d_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @vext2xv_d_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_112:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.w(<8 x i32> [[_112]])
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 vext2xv_d_w(v8i32 _1) { return __lasx_vext2xv_d_w(_1); }
-// CHECK-LABEL: @vext2xv_w_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @vext2xv_w_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_112:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.b(<32 x i8> [[_112]])
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 vext2xv_w_b(v32i8 _1) { return __lasx_vext2xv_w_b(_1); }
-// CHECK-LABEL: @vext2xv_d_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @vext2xv_d_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_112:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.h(<16 x i16> [[_112]])
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 vext2xv_d_h(v16i16 _1) { return __lasx_vext2xv_d_h(_1); }
-// CHECK-LABEL: @vext2xv_d_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @vext2xv_d_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_112:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.b(<32 x i8> [[_112]])
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 vext2xv_d_b(v32i8 _1) { return __lasx_vext2xv_d_b(_1); }
-// CHECK-LABEL: @vext2xv_hu_bu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @vext2xv_hu_bu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_112:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.vext2xv.hu.bu(<32 x i8> [[_112]])
-// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 vext2xv_hu_bu(v32i8 _1) { return __lasx_vext2xv_hu_bu(_1); }
-// CHECK-LABEL: @vext2xv_wu_hu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @vext2xv_wu_hu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_112:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.hu(<16 x i16> [[_112]])
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 vext2xv_wu_hu(v16i16 _1) { return __lasx_vext2xv_wu_hu(_1); }
-// CHECK-LABEL: @vext2xv_du_wu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @vext2xv_du_wu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_112:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.wu(<8 x i32> [[_112]])
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 vext2xv_du_wu(v8i32 _1) { return __lasx_vext2xv_du_wu(_1); }
-// CHECK-LABEL: @vext2xv_wu_bu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @vext2xv_wu_bu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_112:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.bu(<32 x i8> [[_112]])
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 vext2xv_wu_bu(v32i8 _1) { return __lasx_vext2xv_wu_bu(_1); }
-// CHECK-LABEL: @vext2xv_du_hu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @vext2xv_du_hu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_112:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.hu(<16 x i16> [[_112]])
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 vext2xv_du_hu(v16i16 _1) { return __lasx_vext2xv_du_hu(_1); }
-// CHECK-LABEL: @vext2xv_du_bu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @vext2xv_du_bu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_112:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.bu(<32 x i8> [[_112]])
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 vext2xv_du_bu(v32i8 _1) { return __lasx_vext2xv_du_bu(_1); }
-// CHECK-LABEL: @xvpermi_q(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvpermi_q(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpermi.q(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvpermi_q(v32i8 _1, v32i8 _2) { return __lasx_xvpermi_q(_1, _2, 1); }
-// CHECK-LABEL: @xvpermi_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvpermi_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpermi.d(<4 x i64> [[_1]], i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvpermi_d(v4i64 _1) { return __lasx_xvpermi_d(_1, 1); }
-// CHECK-LABEL: @xvperm_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvperm_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvperm.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvperm_w(v8i32 _1, v8i32 _2) { return __lasx_xvperm_w(_1, _2); }
-// CHECK-LABEL: @xvldrepl_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(ptr [[_1:%.*]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvldrepl_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr noundef [[_1:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(ptr [[_1]], i32 1)
+// CHECK-NEXT:    store <32 x i8> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvldrepl_b(void * _1) { return __lasx_xvldrepl_b(_1, 1); }
-// CHECK-LABEL: @xvldrepl_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(ptr [[_1:%.*]], i32 2)
-// CHECK-NEXT:    store <16 x i16> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvldrepl_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr noundef [[_1:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(ptr [[_1]], i32 2)
+// CHECK-NEXT:    store <16 x i16> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvldrepl_h(void * _1) { return __lasx_xvldrepl_h(_1, 2); }
-// CHECK-LABEL: @xvldrepl_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(ptr [[_1:%.*]], i32 4)
-// CHECK-NEXT:    store <8 x i32> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvldrepl_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr noundef [[_1:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(ptr [[_1]], i32 4)
+// CHECK-NEXT:    store <8 x i32> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvldrepl_w(void * _1) { return __lasx_xvldrepl_w(_1, 4); }
-// CHECK-LABEL: @xvldrepl_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(ptr [[_1:%.*]], i32 8)
-// CHECK-NEXT:    store <4 x i64> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvldrepl_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr noundef [[_1:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(ptr [[_1]], i32 8)
+// CHECK-NEXT:    store <4 x i64> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvldrepl_d(void * _1) { return __lasx_xvldrepl_d(_1, 8); }
-// CHECK-LABEL: @xvpickve2gr_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local signext i32 @xvpickve2gr_w(
+// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xvpickve2gr.w(<8 x i32> [[_1]], i32 1)
 // CHECK-NEXT:    ret i32 [[TMP1]]
 //
 int xvpickve2gr_w(v8i32 _1) { return __lasx_xvpickve2gr_w(_1, 1); }
-// CHECK-LABEL: @xvpickve2gr_wu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local signext i32 @xvpickve2gr_wu(
+// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32> [[_1]], i32 1)
 // CHECK-NEXT:    ret i32 [[TMP1]]
 //
 unsigned int xvpickve2gr_wu(v8i32 _1) { return __lasx_xvpickve2gr_wu(_1, 1); }
-// CHECK-LABEL: @xvpickve2gr_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local i64 @xvpickve2gr_d(
+// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64> [[_1]], i32 1)
 // CHECK-NEXT:    ret i64 [[TMP1]]
 //
 long xvpickve2gr_d(v4i64 _1) { return __lasx_xvpickve2gr_d(_1, 1); }
-// CHECK-LABEL: @xvpickve2gr_du(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local i64 @xvpickve2gr_du(
+// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64> [[_1]], i32 1)
 // CHECK-NEXT:    ret i64 [[TMP1]]
 //
 unsigned long int xvpickve2gr_du(v4i64 _1) { return __lasx_xvpickve2gr_du(_1, 1); }
-// CHECK-LABEL: @xvaddwev_q_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvaddwev_q_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvaddwev_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvaddwev_q_d(_1, _2); }
-// CHECK-LABEL: @xvaddwev_d_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvaddwev_d_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvaddwev_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvaddwev_d_w(_1, _2); }
-// CHECK-LABEL: @xvaddwev_w_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvaddwev_w_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvaddwev_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvaddwev_w_h(_1, _2); }
-// CHECK-LABEL: @xvaddwev_h_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvaddwev_h_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvaddwev_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvaddwev_h_b(_1, _2); }
-// CHECK-LABEL: @xvaddwev_q_du(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvaddwev_q_du(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvaddwev_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvaddwev_q_du(_1, _2); }
-// CHECK-LABEL: @xvaddwev_d_wu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvaddwev_d_wu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu(<8 x i32> [[_124]], <8 x i32> [[_235]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvaddwev_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvaddwev_d_wu(_1, _2); }
-// CHECK-LABEL: @xvaddwev_w_hu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvaddwev_w_hu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu(<16 x i16> [[_124]], <16 x i16> [[_235]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvaddwev_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvaddwev_w_hu(_1, _2); }
-// CHECK-LABEL: @xvaddwev_h_bu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvaddwev_h_bu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu(<32 x i8> [[_124]], <32 x i8> [[_235]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvaddwev_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvaddwev_h_bu(_1, _2); }
-// CHECK-LABEL: @xvsubwev_q_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsubwev_q_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvsubwev_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvsubwev_q_d(_1, _2); }
-// CHECK-LABEL: @xvsubwev_d_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsubwev_d_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvsubwev_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvsubwev_d_w(_1, _2); }
-// CHECK-LABEL: @xvsubwev_w_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsubwev_w_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvsubwev_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvsubwev_w_h(_1, _2); }
-// CHECK-LABEL: @xvsubwev_h_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsubwev_h_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvsubwev_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvsubwev_h_b(_1, _2); }
-// CHECK-LABEL: @xvsubwev_q_du(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsubwev_q_du(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvsubwev_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvsubwev_q_du(_1, _2); }
-// CHECK-LABEL: @xvsubwev_d_wu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsubwev_d_wu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.wu(<8 x i32> [[_124]], <8 x i32> [[_235]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvsubwev_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvsubwev_d_wu(_1, _2); }
-// CHECK-LABEL: @xvsubwev_w_hu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsubwev_w_hu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.hu(<16 x i16> [[_124]], <16 x i16> [[_235]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvsubwev_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvsubwev_w_hu(_1, _2); }
-// CHECK-LABEL: @xvsubwev_h_bu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsubwev_h_bu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.bu(<32 x i8> [[_124]], <32 x i8> [[_235]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvsubwev_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvsubwev_h_bu(_1, _2); }
-// CHECK-LABEL: @xvmulwev_q_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmulwev_q_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvmulwev_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvmulwev_q_d(_1, _2); }
-// CHECK-LABEL: @xvmulwev_d_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmulwev_d_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvmulwev_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvmulwev_d_w(_1, _2); }
-// CHECK-LABEL: @xvmulwev_w_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmulwev_w_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvmulwev_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvmulwev_w_h(_1, _2); }
-// CHECK-LABEL: @xvmulwev_h_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmulwev_h_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvmulwev_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvmulwev_h_b(_1, _2); }
-// CHECK-LABEL: @xvmulwev_q_du(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmulwev_q_du(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvmulwev_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvmulwev_q_du(_1, _2); }
-// CHECK-LABEL: @xvmulwev_d_wu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmulwev_d_wu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu(<8 x i32> [[_124]], <8 x i32> [[_235]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvmulwev_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmulwev_d_wu(_1, _2); }
-// CHECK-LABEL: @xvmulwev_w_hu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmulwev_w_hu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu(<16 x i16> [[_124]], <16 x i16> [[_235]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvmulwev_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmulwev_w_hu(_1, _2); }
-// CHECK-LABEL: @xvmulwev_h_bu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmulwev_h_bu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu(<32 x i8> [[_124]], <32 x i8> [[_235]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvmulwev_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmulwev_h_bu(_1, _2); }
-// CHECK-LABEL: @xvaddwod_q_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvaddwod_q_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvaddwod_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvaddwod_q_d(_1, _2); }
-// CHECK-LABEL: @xvaddwod_d_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvaddwod_d_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvaddwod_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvaddwod_d_w(_1, _2); }
-// CHECK-LABEL: @xvaddwod_w_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvaddwod_w_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvaddwod_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvaddwod_w_h(_1, _2); }
-// CHECK-LABEL: @xvaddwod_h_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvaddwod_h_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvaddwod_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvaddwod_h_b(_1, _2); }
-// CHECK-LABEL: @xvaddwod_q_du(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvaddwod_q_du(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvaddwod_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvaddwod_q_du(_1, _2); }
-// CHECK-LABEL: @xvaddwod_d_wu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvaddwod_d_wu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu(<8 x i32> [[_124]], <8 x i32> [[_235]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvaddwod_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvaddwod_d_wu(_1, _2); }
-// CHECK-LABEL: @xvaddwod_w_hu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvaddwod_w_hu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu(<16 x i16> [[_124]], <16 x i16> [[_235]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvaddwod_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvaddwod_w_hu(_1, _2); }
-// CHECK-LABEL: @xvaddwod_h_bu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvaddwod_h_bu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu(<32 x i8> [[_124]], <32 x i8> [[_235]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvaddwod_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvaddwod_h_bu(_1, _2); }
-// CHECK-LABEL: @xvsubwod_q_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsubwod_q_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvsubwod_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvsubwod_q_d(_1, _2); }
-// CHECK-LABEL: @xvsubwod_d_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsubwod_d_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvsubwod_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvsubwod_d_w(_1, _2); }
-// CHECK-LABEL: @xvsubwod_w_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsubwod_w_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvsubwod_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvsubwod_w_h(_1, _2); }
-// CHECK-LABEL: @xvsubwod_h_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsubwod_h_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvsubwod_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvsubwod_h_b(_1, _2); }
-// CHECK-LABEL: @xvsubwod_q_du(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsubwod_q_du(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvsubwod_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvsubwod_q_du(_1, _2); }
-// CHECK-LABEL: @xvsubwod_d_wu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsubwod_d_wu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.wu(<8 x i32> [[_124]], <8 x i32> [[_235]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvsubwod_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvsubwod_d_wu(_1, _2); }
-// CHECK-LABEL: @xvsubwod_w_hu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsubwod_w_hu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.hu(<16 x i16> [[_124]], <16 x i16> [[_235]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvsubwod_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvsubwod_w_hu(_1, _2); }
-// CHECK-LABEL: @xvsubwod_h_bu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsubwod_h_bu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.bu(<32 x i8> [[_124]], <32 x i8> [[_235]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvsubwod_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvsubwod_h_bu(_1, _2); }
-// CHECK-LABEL: @xvmulwod_q_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmulwod_q_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvmulwod_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvmulwod_q_d(_1, _2); }
-// CHECK-LABEL: @xvmulwod_d_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmulwod_d_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvmulwod_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvmulwod_d_w(_1, _2); }
-// CHECK-LABEL: @xvmulwod_w_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmulwod_w_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvmulwod_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvmulwod_w_h(_1, _2); }
-// CHECK-LABEL: @xvmulwod_h_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmulwod_h_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvmulwod_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvmulwod_h_b(_1, _2); }
-// CHECK-LABEL: @xvmulwod_q_du(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmulwod_q_du(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvmulwod_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvmulwod_q_du(_1, _2); }
-// CHECK-LABEL: @xvmulwod_d_wu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmulwod_d_wu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu(<8 x i32> [[_124]], <8 x i32> [[_235]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvmulwod_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmulwod_d_wu(_1, _2); }
-// CHECK-LABEL: @xvmulwod_w_hu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmulwod_w_hu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu(<16 x i16> [[_124]], <16 x i16> [[_235]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvmulwod_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmulwod_w_hu(_1, _2); }
-// CHECK-LABEL: @xvmulwod_h_bu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmulwod_h_bu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu(<32 x i8> [[_124]], <32 x i8> [[_235]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvmulwod_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmulwod_h_bu(_1, _2); }
-// CHECK-LABEL: @xvaddwev_d_wu_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvaddwev_d_wu_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvaddwev_d_wu_w(v8u32 _1, v8i32 _2) { return __lasx_xvaddwev_d_wu_w(_1, _2); }
-// CHECK-LABEL: @xvaddwev_w_hu_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvaddwev_w_hu_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvaddwev_w_hu_h(v16u16 _1, v16i16 _2) { return __lasx_xvaddwev_w_hu_h(_1, _2); }
-// CHECK-LABEL: @xvaddwev_h_bu_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvaddwev_h_bu_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvaddwev_h_bu_b(v32u8 _1, v32i8 _2) { return __lasx_xvaddwev_h_bu_b(_1, _2); }
-// CHECK-LABEL: @xvmulwev_d_wu_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmulwev_d_wu_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvmulwev_d_wu_w(v8u32 _1, v8i32 _2) { return __lasx_xvmulwev_d_wu_w(_1, _2); }
-// CHECK-LABEL: @xvmulwev_w_hu_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmulwev_w_hu_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvmulwev_w_hu_h(v16u16 _1, v16i16 _2) { return __lasx_xvmulwev_w_hu_h(_1, _2); }
-// CHECK-LABEL: @xvmulwev_h_bu_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmulwev_h_bu_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvmulwev_h_bu_b(v32u8 _1, v32i8 _2) { return __lasx_xvmulwev_h_bu_b(_1, _2); }
-// CHECK-LABEL: @xvaddwod_d_wu_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvaddwod_d_wu_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvaddwod_d_wu_w(v8u32 _1, v8i32 _2) { return __lasx_xvaddwod_d_wu_w(_1, _2); }
-// CHECK-LABEL: @xvaddwod_w_hu_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvaddwod_w_hu_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvaddwod_w_hu_h(v16u16 _1, v16i16 _2) { return __lasx_xvaddwod_w_hu_h(_1, _2); }
-// CHECK-LABEL: @xvaddwod_h_bu_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvaddwod_h_bu_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvaddwod_h_bu_b(v32u8 _1, v32i8 _2) { return __lasx_xvaddwod_h_bu_b(_1, _2); }
-// CHECK-LABEL: @xvmulwod_d_wu_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmulwod_d_wu_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvmulwod_d_wu_w(v8u32 _1, v8i32 _2) { return __lasx_xvmulwod_d_wu_w(_1, _2); }
-// CHECK-LABEL: @xvmulwod_w_hu_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmulwod_w_hu_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvmulwod_w_hu_h(v16u16 _1, v16i16 _2) { return __lasx_xvmulwod_w_hu_h(_1, _2); }
-// CHECK-LABEL: @xvmulwod_h_bu_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmulwod_h_bu_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvmulwod_h_bu_b(v32u8 _1, v32i8 _2) { return __lasx_xvmulwod_h_bu_b(_1, _2); }
-// CHECK-LABEL: @xvhaddw_q_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvhaddw_q_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvhaddw_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvhaddw_q_d(_1, _2); }
-// CHECK-LABEL: @xvhaddw_qu_du(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvhaddw_qu_du(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.qu.du(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4u64 xvhaddw_qu_du(v4u64 _1, v4u64 _2) { return __lasx_xvhaddw_qu_du(_1, _2); }
-// CHECK-LABEL: @xvhsubw_q_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvhsubw_q_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvhsubw_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvhsubw_q_d(_1, _2); }
-// CHECK-LABEL: @xvhsubw_qu_du(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvhsubw_qu_du(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.qu.du(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4u64 xvhsubw_qu_du(v4u64 _1, v4u64 _2) { return __lasx_xvhsubw_qu_du(_1, _2); }
-// CHECK-LABEL: @xvmaddwev_q_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmaddwev_q_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_3:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]])
-// CHECK-NEXT:    store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvmaddwev_q_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __lasx_xvmaddwev_q_d(_1, _2, _3); }
-// CHECK-LABEL: @xvmaddwev_d_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_346:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmaddwev_d_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_346:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.w(<4 x i64> [[_1]], <8 x i32> [[_235]], <8 x i32> [[_346]])
-// CHECK-NEXT:    store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvmaddwev_d_w(v4i64 _1, v8i32 _2, v8i32 _3) { return __lasx_xvmaddwev_d_w(_1, _2, _3); }
-// CHECK-LABEL: @xvmaddwev_w_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmaddwev_w_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_136:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_247:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_358:%.*]] = load <16 x i16>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.h(<8 x i32> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]])
-// CHECK-NEXT:    store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvmaddwev_w_h(v8i32 _1, v16i16 _2, v16i16 _3) { return __lasx_xvmaddwev_w_h(_1, _2, _3); }
-// CHECK-LABEL: @xvmaddwev_h_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmaddwev_h_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_136:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_247:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_358:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.b(<16 x i16> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]])
-// CHECK-NEXT:    store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvmaddwev_h_b(v16i16 _1, v32i8 _2, v32i8 _3) { return __lasx_xvmaddwev_h_b(_1, _2, _3); }
-// CHECK-LABEL: @xvmaddwev_q_du(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmaddwev_q_du(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_3:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]])
-// CHECK-NEXT:    store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4u64 xvmaddwev_q_du(v4u64 _1, v4u64 _2, v4u64 _3) { return __lasx_xvmaddwev_q_du(_1, _2, _3); }
-// CHECK-LABEL: @xvmaddwev_d_wu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_346:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmaddwev_d_wu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_346:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu(<4 x i64> [[_1]], <8 x i32> [[_235]], <8 x i32> [[_346]])
-// CHECK-NEXT:    store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4u64 xvmaddwev_d_wu(v4u64 _1, v8u32 _2, v8u32 _3) { return __lasx_xvmaddwev_d_wu(_1, _2, _3); }
-// CHECK-LABEL: @xvmaddwev_w_hu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmaddwev_w_hu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_136:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_247:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_358:%.*]] = load <16 x i16>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu(<8 x i32> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]])
-// CHECK-NEXT:    store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8u32 xvmaddwev_w_hu(v8u32 _1, v16u16 _2, v16u16 _3) { return __lasx_xvmaddwev_w_hu(_1, _2, _3); }
-// CHECK-LABEL: @xvmaddwev_h_bu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmaddwev_h_bu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_136:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_247:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_358:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu(<16 x i16> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]])
-// CHECK-NEXT:    store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16u16 xvmaddwev_h_bu(v16u16 _1, v32u8 _2, v32u8 _3) { return __lasx_xvmaddwev_h_bu(_1, _2, _3); }
-// CHECK-LABEL: @xvmaddwod_q_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmaddwod_q_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_3:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]])
-// CHECK-NEXT:    store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvmaddwod_q_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __lasx_xvmaddwod_q_d(_1, _2, _3); }
-// CHECK-LABEL: @xvmaddwod_d_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_346:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmaddwod_d_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_346:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.w(<4 x i64> [[_1]], <8 x i32> [[_235]], <8 x i32> [[_346]])
-// CHECK-NEXT:    store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvmaddwod_d_w(v4i64 _1, v8i32 _2, v8i32 _3) { return __lasx_xvmaddwod_d_w(_1, _2, _3); }
-// CHECK-LABEL: @xvmaddwod_w_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmaddwod_w_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_136:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_247:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_358:%.*]] = load <16 x i16>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.h(<8 x i32> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]])
-// CHECK-NEXT:    store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvmaddwod_w_h(v8i32 _1, v16i16 _2, v16i16 _3) { return __lasx_xvmaddwod_w_h(_1, _2, _3); }
-// CHECK-LABEL: @xvmaddwod_h_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmaddwod_h_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_136:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_247:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_358:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.b(<16 x i16> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]])
-// CHECK-NEXT:    store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvmaddwod_h_b(v16i16 _1, v32i8 _2, v32i8 _3) { return __lasx_xvmaddwod_h_b(_1, _2, _3); }
-// CHECK-LABEL: @xvmaddwod_q_du(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmaddwod_q_du(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_3:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]])
-// CHECK-NEXT:    store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4u64 xvmaddwod_q_du(v4u64 _1, v4u64 _2, v4u64 _3) { return __lasx_xvmaddwod_q_du(_1, _2, _3); }
-// CHECK-LABEL: @xvmaddwod_d_wu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_346:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmaddwod_d_wu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_346:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu(<4 x i64> [[_1]], <8 x i32> [[_235]], <8 x i32> [[_346]])
-// CHECK-NEXT:    store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4u64 xvmaddwod_d_wu(v4u64 _1, v8u32 _2, v8u32 _3) { return __lasx_xvmaddwod_d_wu(_1, _2, _3); }
-// CHECK-LABEL: @xvmaddwod_w_hu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmaddwod_w_hu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_136:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_247:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_358:%.*]] = load <16 x i16>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu(<8 x i32> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]])
-// CHECK-NEXT:    store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8u32 xvmaddwod_w_hu(v8u32 _1, v16u16 _2, v16u16 _3) { return __lasx_xvmaddwod_w_hu(_1, _2, _3); }
-// CHECK-LABEL: @xvmaddwod_h_bu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmaddwod_h_bu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_136:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_247:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_358:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu(<16 x i16> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]])
-// CHECK-NEXT:    store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16u16 xvmaddwod_h_bu(v16u16 _1, v32u8 _2, v32u8 _3) { return __lasx_xvmaddwod_h_bu(_1, _2, _3); }
-// CHECK-LABEL: @xvmaddwev_q_du_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmaddwev_q_du_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_3:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]])
-// CHECK-NEXT:    store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvmaddwev_q_du_d(v4i64 _1, v4u64 _2, v4i64 _3) { return __lasx_xvmaddwev_q_du_d(_1, _2, _3); }
-// CHECK-LABEL: @xvmaddwev_d_wu_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_346:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmaddwev_d_wu_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_346:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu.w(<4 x i64> [[_1]], <8 x i32> [[_235]], <8 x i32> [[_346]])
-// CHECK-NEXT:    store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvmaddwev_d_wu_w(v4i64 _1, v8u32 _2, v8i32 _3) { return __lasx_xvmaddwev_d_wu_w(_1, _2, _3); }
-// CHECK-LABEL: @xvmaddwev_w_hu_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmaddwev_w_hu_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_136:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_247:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_358:%.*]] = load <16 x i16>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu.h(<8 x i32> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]])
-// CHECK-NEXT:    store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvmaddwev_w_hu_h(v8i32 _1, v16u16 _2, v16i16 _3) { return __lasx_xvmaddwev_w_hu_h(_1, _2, _3); }
-// CHECK-LABEL: @xvmaddwev_h_bu_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmaddwev_h_bu_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_136:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_247:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_358:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu.b(<16 x i16> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]])
-// CHECK-NEXT:    store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvmaddwev_h_bu_b(v16i16 _1, v32u8 _2, v32i8 _3) { return __lasx_xvmaddwev_h_bu_b(_1, _2, _3); }
-// CHECK-LABEL: @xvmaddwod_q_du_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmaddwod_q_du_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_3:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]])
-// CHECK-NEXT:    store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvmaddwod_q_du_d(v4i64 _1, v4u64 _2, v4i64 _3) { return __lasx_xvmaddwod_q_du_d(_1, _2, _3); }
-// CHECK-LABEL: @xvmaddwod_d_wu_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_346:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmaddwod_d_wu_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_346:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu.w(<4 x i64> [[_1]], <8 x i32> [[_235]], <8 x i32> [[_346]])
-// CHECK-NEXT:    store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvmaddwod_d_wu_w(v4i64 _1, v8u32 _2, v8i32 _3) { return __lasx_xvmaddwod_d_wu_w(_1, _2, _3); }
-// CHECK-LABEL: @xvmaddwod_w_hu_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmaddwod_w_hu_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_136:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_247:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_358:%.*]] = load <16 x i16>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu.h(<8 x i32> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]])
-// CHECK-NEXT:    store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvmaddwod_w_hu_h(v8i32 _1, v16u16 _2, v16i16 _3) { return __lasx_xvmaddwod_w_hu_h(_1, _2, _3); }
-// CHECK-LABEL: @xvmaddwod_h_bu_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmaddwod_h_bu_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_136:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_247:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_358:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu.b(<16 x i16> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]])
-// CHECK-NEXT:    store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvmaddwod_h_bu_b(v16i16 _1, v32u8 _2, v32i8 _3) { return __lasx_xvmaddwod_h_bu_b(_1, _2, _3); }
-// CHECK-LABEL: @xvrotr_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvrotr_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrotr.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvrotr_b(v32i8 _1, v32i8 _2) { return __lasx_xvrotr_b(_1, _2); }
-// CHECK-LABEL: @xvrotr_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvrotr_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrotr.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvrotr_h(v16i16 _1, v16i16 _2) { return __lasx_xvrotr_h(_1, _2); }
-// CHECK-LABEL: @xvrotr_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvrotr_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrotr.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvrotr_w(v8i32 _1, v8i32 _2) { return __lasx_xvrotr_w(_1, _2); }
-// CHECK-LABEL: @xvrotr_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvrotr_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrotr.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvrotr_d(v4i64 _1, v4i64 _2) { return __lasx_xvrotr_d(_1, _2); }
-// CHECK-LABEL: @xvadd_q(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvadd_q(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadd.q(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvadd_q(v4i64 _1, v4i64 _2) { return __lasx_xvadd_q(_1, _2); }
-// CHECK-LABEL: @xvsub_q(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsub_q(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsub.q(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvsub_q(v4i64 _1, v4i64 _2) { return __lasx_xvsub_q(_1, _2); }
-// CHECK-LABEL: @xvaddwev_q_du_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvaddwev_q_du_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvaddwev_q_du_d(v4u64 _1, v4i64 _2) { return __lasx_xvaddwev_q_du_d(_1, _2); }
-// CHECK-LABEL: @xvaddwod_q_du_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvaddwod_q_du_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvaddwod_q_du_d(v4u64 _1, v4i64 _2) { return __lasx_xvaddwod_q_du_d(_1, _2); }
-// CHECK-LABEL: @xvmulwev_q_du_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmulwev_q_du_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvmulwev_q_du_d(v4u64 _1, v4i64 _2) { return __lasx_xvmulwev_q_du_d(_1, _2); }
-// CHECK-LABEL: @xvmulwod_q_du_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmulwod_q_du_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvmulwod_q_du_d(v4u64 _1, v4i64 _2) { return __lasx_xvmulwod_q_du_d(_1, _2); }
-// CHECK-LABEL: @xvmskgez_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmskgez_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_112:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmskgez.b(<32 x i8> [[_112]])
-// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvmskgez_b(v32i8 _1) { return __lasx_xvmskgez_b(_1); }
-// CHECK-LABEL: @xvmsknz_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmsknz_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_112:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmsknz.b(<32 x i8> [[_112]])
-// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvmsknz_b(v32i8 _1) { return __lasx_xvmsknz_b(_1); }
-// CHECK-LABEL: @xvexth_h_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvexth_h_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_112:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvexth.h.b(<32 x i8> [[_112]])
-// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvexth_h_b(v32i8 _1) { return __lasx_xvexth_h_b(_1); }
-// CHECK-LABEL: @xvexth_w_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvexth_w_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_112:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvexth.w.h(<16 x i16> [[_112]])
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvexth_w_h(v16i16 _1) { return __lasx_xvexth_w_h(_1); }
-// CHECK-LABEL: @xvexth_d_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvexth_d_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_112:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.d.w(<8 x i32> [[_112]])
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvexth_d_w(v8i32 _1) { return __lasx_xvexth_d_w(_1); }
-// CHECK-LABEL: @xvexth_q_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvexth_q_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.q.d(<4 x i64> [[_1]])
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvexth_q_d(v4i64 _1) { return __lasx_xvexth_q_d(_1); }
-// CHECK-LABEL: @xvexth_hu_bu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvexth_hu_bu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_112:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvexth.hu.bu(<32 x i8> [[_112]])
-// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16u16 xvexth_hu_bu(v32u8 _1) { return __lasx_xvexth_hu_bu(_1); }
-// CHECK-LABEL: @xvexth_wu_hu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvexth_wu_hu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_112:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvexth.wu.hu(<16 x i16> [[_112]])
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8u32 xvexth_wu_hu(v16u16 _1) { return __lasx_xvexth_wu_hu(_1); }
-// CHECK-LABEL: @xvexth_du_wu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvexth_du_wu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_112:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.du.wu(<8 x i32> [[_112]])
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4u64 xvexth_du_wu(v8u32 _1) { return __lasx_xvexth_du_wu(_1); }
-// CHECK-LABEL: @xvexth_qu_du(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvexth_qu_du(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.qu.du(<4 x i64> [[_1]])
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4u64 xvexth_qu_du(v4u64 _1) { return __lasx_xvexth_qu_du(_1); }
-// CHECK-LABEL: @xvrotri_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvrotri_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrotri.b(<32 x i8> [[_1]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvrotri_b(v32i8 _1) { return __lasx_xvrotri_b(_1, 1); }
-// CHECK-LABEL: @xvrotri_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvrotri_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrotri.h(<16 x i16> [[_1]], i32 1)
-// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvrotri_h(v16i16 _1) { return __lasx_xvrotri_h(_1, 1); }
-// CHECK-LABEL: @xvrotri_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvrotri_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrotri.w(<8 x i32> [[_1]], i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvrotri_w(v8i32 _1) { return __lasx_xvrotri_w(_1, 1); }
-// CHECK-LABEL: @xvrotri_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvrotri_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrotri.d(<4 x i64> [[_1]], i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvrotri_d(v4i64 _1) { return __lasx_xvrotri_d(_1, 1); }
-// CHECK-LABEL: @xvextl_q_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvextl_q_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextl.q.d(<4 x i64> [[_1]])
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvextl_q_d(v4i64 _1) { return __lasx_xvextl_q_d(_1); }
-// CHECK-LABEL: @xvsrlni_b_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrlni_b_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvsrlni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvsrlni_b_h(_1, _2, 1); }
-// CHECK-LABEL: @xvsrlni_h_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrlni_h_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1)
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvsrlni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvsrlni_h_w(_1, _2, 1); }
-// CHECK-LABEL: @xvsrlni_w_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrlni_w_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvsrlni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvsrlni_w_d(_1, _2, 1); }
-// CHECK-LABEL: @xvsrlni_d_q(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrlni_d_q(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvsrlni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvsrlni_d_q(_1, _2, 1); }
-// CHECK-LABEL: @xvsrlrni_b_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrlrni_b_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvsrlrni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvsrlrni_b_h(_1, _2, 1); }
-// CHECK-LABEL: @xvsrlrni_h_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrlrni_h_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1)
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvsrlrni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvsrlrni_h_w(_1, _2, 1); }
-// CHECK-LABEL: @xvsrlrni_w_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrlrni_w_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvsrlrni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvsrlrni_w_d(_1, _2, 1); }
-// CHECK-LABEL: @xvsrlrni_d_q(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrlrni_d_q(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvsrlrni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvsrlrni_d_q(_1, _2, 1); }
-// CHECK-LABEL: @xvssrlni_b_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrlni_b_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvssrlni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvssrlni_b_h(_1, _2, 1); }
-// CHECK-LABEL: @xvssrlni_h_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrlni_h_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1)
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvssrlni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvssrlni_h_w(_1, _2, 1); }
-// CHECK-LABEL: @xvssrlni_w_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrlni_w_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvssrlni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvssrlni_w_d(_1, _2, 1); }
-// CHECK-LABEL: @xvssrlni_d_q(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrlni_d_q(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvssrlni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvssrlni_d_q(_1, _2, 1); }
-// CHECK-LABEL: @xvssrlni_bu_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrlni_bu_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32u8 xvssrlni_bu_h(v32u8 _1, v32i8 _2) { return __lasx_xvssrlni_bu_h(_1, _2, 1); }
-// CHECK-LABEL: @xvssrlni_hu_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrlni_hu_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1)
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16u16 xvssrlni_hu_w(v16u16 _1, v16i16 _2) { return __lasx_xvssrlni_hu_w(_1, _2, 1); }
-// CHECK-LABEL: @xvssrlni_wu_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrlni_wu_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8u32 xvssrlni_wu_d(v8u32 _1, v8i32 _2) { return __lasx_xvssrlni_wu_d(_1, _2, 1); }
-// CHECK-LABEL: @xvssrlni_du_q(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrlni_du_q(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4u64 xvssrlni_du_q(v4u64 _1, v4i64 _2) { return __lasx_xvssrlni_du_q(_1, _2, 1); }
-// CHECK-LABEL: @xvssrlrni_b_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrlrni_b_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvssrlrni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvssrlrni_b_h(_1, _2, 1); }
-// CHECK-LABEL: @xvssrlrni_h_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrlrni_h_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1)
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvssrlrni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvssrlrni_h_w(_1, _2, 1); }
-// CHECK-LABEL: @xvssrlrni_w_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrlrni_w_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvssrlrni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvssrlrni_w_d(_1, _2, 1); }
-// CHECK-LABEL: @xvssrlrni_d_q(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrlrni_d_q(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvssrlrni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvssrlrni_d_q(_1, _2, 1); }
-// CHECK-LABEL: @xvssrlrni_bu_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrlrni_bu_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32u8 xvssrlrni_bu_h(v32u8 _1, v32i8 _2) { return __lasx_xvssrlrni_bu_h(_1, _2, 1); }
-// CHECK-LABEL: @xvssrlrni_hu_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrlrni_hu_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1)
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16u16 xvssrlrni_hu_w(v16u16 _1, v16i16 _2) { return __lasx_xvssrlrni_hu_w(_1, _2, 1); }
-// CHECK-LABEL: @xvssrlrni_wu_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrlrni_wu_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8u32 xvssrlrni_wu_d(v8u32 _1, v8i32 _2) { return __lasx_xvssrlrni_wu_d(_1, _2, 1); }
-// CHECK-LABEL: @xvssrlrni_du_q(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrlrni_du_q(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4u64 xvssrlrni_du_q(v4u64 _1, v4i64 _2) { return __lasx_xvssrlrni_du_q(_1, _2, 1); }
-// CHECK-LABEL: @xvsrani_b_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrani_b_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvsrani_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvsrani_b_h(_1, _2, 1); }
-// CHECK-LABEL: @xvsrani_h_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrani_h_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1)
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvsrani_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvsrani_h_w(_1, _2, 1); }
-// CHECK-LABEL: @xvsrani_w_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrani_w_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvsrani_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvsrani_w_d(_1, _2, 1); }
-// CHECK-LABEL: @xvsrani_d_q(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrani_d_q(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvsrani_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvsrani_d_q(_1, _2, 1); }
-// CHECK-LABEL: @xvsrarni_b_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrarni_b_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvsrarni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvsrarni_b_h(_1, _2, 1); }
-// CHECK-LABEL: @xvsrarni_h_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrarni_h_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1)
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvsrarni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvsrarni_h_w(_1, _2, 1); }
-// CHECK-LABEL: @xvsrarni_w_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrarni_w_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvsrarni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvsrarni_w_d(_1, _2, 1); }
-// CHECK-LABEL: @xvsrarni_d_q(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrarni_d_q(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvsrarni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvsrarni_d_q(_1, _2, 1); }
-// CHECK-LABEL: @xvssrani_b_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrani_b_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvssrani_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvssrani_b_h(_1, _2, 1); }
-// CHECK-LABEL: @xvssrani_h_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrani_h_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1)
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvssrani_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvssrani_h_w(_1, _2, 1); }
-// CHECK-LABEL: @xvssrani_w_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrani_w_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvssrani_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvssrani_w_d(_1, _2, 1); }
-// CHECK-LABEL: @xvssrani_d_q(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrani_d_q(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvssrani_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvssrani_d_q(_1, _2, 1); }
-// CHECK-LABEL: @xvssrani_bu_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrani_bu_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32u8 xvssrani_bu_h(v32u8 _1, v32i8 _2) { return __lasx_xvssrani_bu_h(_1, _2, 1); }
-// CHECK-LABEL: @xvssrani_hu_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrani_hu_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1)
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16u16 xvssrani_hu_w(v16u16 _1, v16i16 _2) { return __lasx_xvssrani_hu_w(_1, _2, 1); }
-// CHECK-LABEL: @xvssrani_wu_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrani_wu_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8u32 xvssrani_wu_d(v8u32 _1, v8i32 _2) { return __lasx_xvssrani_wu_d(_1, _2, 1); }
-// CHECK-LABEL: @xvssrani_du_q(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrani_du_q(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4u64 xvssrani_du_q(v4u64 _1, v4i64 _2) { return __lasx_xvssrani_du_q(_1, _2, 1); }
-// CHECK-LABEL: @xvssrarni_b_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrarni_b_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvssrarni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvssrarni_b_h(_1, _2, 1); }
-// CHECK-LABEL: @xvssrarni_h_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrarni_h_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1)
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvssrarni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvssrarni_h_w(_1, _2, 1); }
-// CHECK-LABEL: @xvssrarni_w_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrarni_w_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvssrarni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvssrarni_w_d(_1, _2, 1); }
-// CHECK-LABEL: @xvssrarni_d_q(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrarni_d_q(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvssrarni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvssrarni_d_q(_1, _2, 1); }
-// CHECK-LABEL: @xvssrarni_bu_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrarni_bu_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32u8 xvssrarni_bu_h(v32u8 _1, v32i8 _2) { return __lasx_xvssrarni_bu_h(_1, _2, 1); }
-// CHECK-LABEL: @xvssrarni_hu_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrarni_hu_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1)
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16u16 xvssrarni_hu_w(v16u16 _1, v16i16 _2) { return __lasx_xvssrarni_hu_w(_1, _2, 1); }
-// CHECK-LABEL: @xvssrarni_wu_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrarni_wu_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8u32 xvssrarni_wu_d(v8u32 _1, v8i32 _2) { return __lasx_xvssrarni_wu_d(_1, _2, 1); }
-// CHECK-LABEL: @xvssrarni_du_q(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrarni_du_q(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4u64 xvssrarni_du_q(v4u64 _1, v4i64 _2) { return __lasx_xvssrarni_du_q(_1, _2, 1); }
-// CHECK-LABEL: @xbnz_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local signext i32 @xbnz_b(
+// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.b(<32 x i8> [[_1]])
 // CHECK-NEXT:    ret i32 [[TMP1]]
 //
 int xbnz_b(v32u8 _1) { return __lasx_xbnz_b(_1); }
-// CHECK-LABEL: @xbnz_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local signext i32 @xbnz_d(
+// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.d(<4 x i64> [[_1]])
 // CHECK-NEXT:    ret i32 [[TMP1]]
 //
 int xbnz_d(v4u64 _1) { return __lasx_xbnz_d(_1); }
-// CHECK-LABEL: @xbnz_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local signext i32 @xbnz_h(
+// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.h(<16 x i16> [[_1]])
 // CHECK-NEXT:    ret i32 [[TMP1]]
 //
 int xbnz_h(v16u16 _1) { return __lasx_xbnz_h(_1); }
-// CHECK-LABEL: @xbnz_v(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local signext i32 @xbnz_v(
+// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.v(<32 x i8> [[_1]])
 // CHECK-NEXT:    ret i32 [[TMP1]]
 //
 int xbnz_v(v32u8 _1) { return __lasx_xbnz_v(_1); }
-// CHECK-LABEL: @xbnz_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local signext i32 @xbnz_w(
+// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.w(<8 x i32> [[_1]])
 // CHECK-NEXT:    ret i32 [[TMP1]]
 //
 int xbnz_w(v8u32 _1) { return __lasx_xbnz_w(_1); }
-// CHECK-LABEL: @xbz_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local signext i32 @xbz_b(
+// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.b(<32 x i8> [[_1]])
 // CHECK-NEXT:    ret i32 [[TMP1]]
 //
 int xbz_b(v32u8 _1) { return __lasx_xbz_b(_1); }
-// CHECK-LABEL: @xbz_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local signext i32 @xbz_d(
+// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.d(<4 x i64> [[_1]])
 // CHECK-NEXT:    ret i32 [[TMP1]]
 //
 int xbz_d(v4u64 _1) { return __lasx_xbz_d(_1); }
-// CHECK-LABEL: @xbz_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local signext i32 @xbz_h(
+// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.h(<16 x i16> [[_1]])
 // CHECK-NEXT:    ret i32 [[TMP1]]
 //
 int xbz_h(v16u16 _1) { return __lasx_xbz_h(_1); }
-// CHECK-LABEL: @xbz_v(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local signext i32 @xbz_v(
+// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.v(<32 x i8> [[_1]])
 // CHECK-NEXT:    ret i32 [[TMP1]]
 //
 int xbz_v(v32u8 _1) { return __lasx_xbz_v(_1); }
-// CHECK-LABEL: @xbz_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local signext i32 @xbz_w(
+// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.w(<8 x i32> [[_1]])
 // CHECK-NEXT:    ret i32 [[TMP1]]
 //
 int xbz_w(v8u32 _1) { return __lasx_xbz_w(_1); }
-// CHECK-LABEL: @xvfcmp_caf_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_caf_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.caf.d(<4 x double> [[_1]], <4 x double> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvfcmp_caf_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_caf_d(_1, _2); }
-// CHECK-LABEL: @xvfcmp_caf_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_caf_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.caf.s(<8 x float> [[_1]], <8 x float> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvfcmp_caf_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_caf_s(_1, _2); }
-// CHECK-LABEL: @xvfcmp_ceq_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_ceq_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.ceq.d(<4 x double> [[_1]], <4 x double> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvfcmp_ceq_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_ceq_d(_1, _2); }
-// CHECK-LABEL: @xvfcmp_ceq_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_ceq_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.ceq.s(<8 x float> [[_1]], <8 x float> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvfcmp_ceq_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_ceq_s(_1, _2); }
-// CHECK-LABEL: @xvfcmp_cle_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_cle_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cle.d(<4 x double> [[_1]], <4 x double> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvfcmp_cle_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cle_d(_1, _2); }
-// CHECK-LABEL: @xvfcmp_cle_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_cle_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cle.s(<8 x float> [[_1]], <8 x float> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvfcmp_cle_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cle_s(_1, _2); }
-// CHECK-LABEL: @xvfcmp_clt_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_clt_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.clt.d(<4 x double> [[_1]], <4 x double> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvfcmp_clt_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_clt_d(_1, _2); }
-// CHECK-LABEL: @xvfcmp_clt_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_clt_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.clt.s(<8 x float> [[_1]], <8 x float> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvfcmp_clt_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_clt_s(_1, _2); }
-// CHECK-LABEL: @xvfcmp_cne_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_cne_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cne.d(<4 x double> [[_1]], <4 x double> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvfcmp_cne_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cne_d(_1, _2); }
-// CHECK-LABEL: @xvfcmp_cne_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_cne_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cne.s(<8 x float> [[_1]], <8 x float> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvfcmp_cne_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cne_s(_1, _2); }
-// CHECK-LABEL: @xvfcmp_cor_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_cor_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cor.d(<4 x double> [[_1]], <4 x double> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvfcmp_cor_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cor_d(_1, _2); }
-// CHECK-LABEL: @xvfcmp_cor_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_cor_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cor.s(<8 x float> [[_1]], <8 x float> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvfcmp_cor_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cor_s(_1, _2); }
-// CHECK-LABEL: @xvfcmp_cueq_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_cueq_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cueq.d(<4 x double> [[_1]], <4 x double> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvfcmp_cueq_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cueq_d(_1, _2); }
-// CHECK-LABEL: @xvfcmp_cueq_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_cueq_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cueq.s(<8 x float> [[_1]], <8 x float> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvfcmp_cueq_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cueq_s(_1, _2); }
-// CHECK-LABEL: @xvfcmp_cule_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_cule_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cule.d(<4 x double> [[_1]], <4 x double> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvfcmp_cule_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cule_d(_1, _2); }
-// CHECK-LABEL: @xvfcmp_cule_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_cule_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cule.s(<8 x float> [[_1]], <8 x float> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvfcmp_cule_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cule_s(_1, _2); }
-// CHECK-LABEL: @xvfcmp_cult_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_cult_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cult.d(<4 x double> [[_1]], <4 x double> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvfcmp_cult_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cult_d(_1, _2); }
-// CHECK-LABEL: @xvfcmp_cult_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_cult_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cult.s(<8 x float> [[_1]], <8 x float> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvfcmp_cult_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cult_s(_1, _2); }
-// CHECK-LABEL: @xvfcmp_cun_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_cun_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cun.d(<4 x double> [[_1]], <4 x double> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvfcmp_cun_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cun_d(_1, _2); }
-// CHECK-LABEL: @xvfcmp_cune_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_cune_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cune.d(<4 x double> [[_1]], <4 x double> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvfcmp_cune_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cune_d(_1, _2); }
-// CHECK-LABEL: @xvfcmp_cune_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_cune_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cune.s(<8 x float> [[_1]], <8 x float> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvfcmp_cune_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cune_s(_1, _2); }
-// CHECK-LABEL: @xvfcmp_cun_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_cun_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cun.s(<8 x float> [[_1]], <8 x float> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvfcmp_cun_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cun_s(_1, _2); }
-// CHECK-LABEL: @xvfcmp_saf_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_saf_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.saf.d(<4 x double> [[_1]], <4 x double> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvfcmp_saf_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_saf_d(_1, _2); }
-// CHECK-LABEL: @xvfcmp_saf_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_saf_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.saf.s(<8 x float> [[_1]], <8 x float> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvfcmp_saf_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_saf_s(_1, _2); }
-// CHECK-LABEL: @xvfcmp_seq_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_seq_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.seq.d(<4 x double> [[_1]], <4 x double> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvfcmp_seq_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_seq_d(_1, _2); }
-// CHECK-LABEL: @xvfcmp_seq_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_seq_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.seq.s(<8 x float> [[_1]], <8 x float> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvfcmp_seq_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_seq_s(_1, _2); }
-// CHECK-LABEL: @xvfcmp_sle_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_sle_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sle.d(<4 x double> [[_1]], <4 x double> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvfcmp_sle_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sle_d(_1, _2); }
-// CHECK-LABEL: @xvfcmp_sle_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_sle_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sle.s(<8 x float> [[_1]], <8 x float> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvfcmp_sle_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sle_s(_1, _2); }
-// CHECK-LABEL: @xvfcmp_slt_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_slt_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.slt.d(<4 x double> [[_1]], <4 x double> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvfcmp_slt_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_slt_d(_1, _2); }
-// CHECK-LABEL: @xvfcmp_slt_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_slt_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.slt.s(<8 x float> [[_1]], <8 x float> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvfcmp_slt_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_slt_s(_1, _2); }
-// CHECK-LABEL: @xvfcmp_sne_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_sne_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sne.d(<4 x double> [[_1]], <4 x double> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvfcmp_sne_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sne_d(_1, _2); }
-// CHECK-LABEL: @xvfcmp_sne_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_sne_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sne.s(<8 x float> [[_1]], <8 x float> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvfcmp_sne_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sne_s(_1, _2); }
-// CHECK-LABEL: @xvfcmp_sor_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_sor_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sor.d(<4 x double> [[_1]], <4 x double> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvfcmp_sor_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sor_d(_1, _2); }
-// CHECK-LABEL: @xvfcmp_sor_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_sor_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sor.s(<8 x float> [[_1]], <8 x float> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvfcmp_sor_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sor_s(_1, _2); }
-// CHECK-LABEL: @xvfcmp_sueq_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_sueq_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sueq.d(<4 x double> [[_1]], <4 x double> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvfcmp_sueq_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sueq_d(_1, _2); }
-// CHECK-LABEL: @xvfcmp_sueq_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_sueq_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sueq.s(<8 x float> [[_1]], <8 x float> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvfcmp_sueq_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sueq_s(_1, _2); }
-// CHECK-LABEL: @xvfcmp_sule_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_sule_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sule.d(<4 x double> [[_1]], <4 x double> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvfcmp_sule_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sule_d(_1, _2); }
-// CHECK-LABEL: @xvfcmp_sule_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_sule_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sule.s(<8 x float> [[_1]], <8 x float> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvfcmp_sule_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sule_s(_1, _2); }
-// CHECK-LABEL: @xvfcmp_sult_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_sult_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sult.d(<4 x double> [[_1]], <4 x double> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvfcmp_sult_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sult_d(_1, _2); }
-// CHECK-LABEL: @xvfcmp_sult_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_sult_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sult.s(<8 x float> [[_1]], <8 x float> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvfcmp_sult_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sult_s(_1, _2); }
-// CHECK-LABEL: @xvfcmp_sun_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_sun_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sun.d(<4 x double> [[_1]], <4 x double> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvfcmp_sun_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sun_d(_1, _2); }
-// CHECK-LABEL: @xvfcmp_sune_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_sune_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sune.d(<4 x double> [[_1]], <4 x double> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvfcmp_sune_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sune_d(_1, _2); }
-// CHECK-LABEL: @xvfcmp_sune_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_sune_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sune.s(<8 x float> [[_1]], <8 x float> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvfcmp_sune_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sune_s(_1, _2); }
-// CHECK-LABEL: @xvfcmp_sun_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_sun_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sun.s(<8 x float> [[_1]], <8 x float> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvfcmp_sun_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sun_s(_1, _2); }
-// CHECK-LABEL: @xvpickve_d_f(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvpickve_d_f(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double> [[_1]], i32 1)
-// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4f64 xvpickve_d_f(v4f64 _1) { return __lasx_xvpickve_d_f(_1, 1); }
-// CHECK-LABEL: @xvpickve_w_f(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvpickve_w_f(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float> [[_1]], i32 1)
-// CHECK-NEXT:    store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8f32 xvpickve_w_f(v8f32 _1) { return __lasx_xvpickve_w_f(_1, 1); }
-// CHECK-LABEL: @xvrepli_b(
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define dso_local void @xvrepli_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrepli.b(i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvrepli_b() { return __lasx_xvrepli_b(1); }
-// CHECK-LABEL: @xvrepli_d(
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define dso_local void @xvrepli_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrepli.d(i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvrepli_d() { return __lasx_xvrepli_d(1); }
-// CHECK-LABEL: @xvrepli_h(
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define dso_local void @xvrepli_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrepli.h(i32 1)
-// CHECK-NEXT:    store <16 x i16> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvrepli_h() { return __lasx_xvrepli_h(1); }
-// CHECK-LABEL: @xvrepli_w(
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define dso_local void @xvrepli_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrepli.w(i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvrepli_w() { return __lasx_xvrepli_w(1); }
+//.
+// CHECK: [[CHAR_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0}
+// CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0}
+// CHECK: [[META4]] = !{!"Simple C/C++ TBAA"}
+// CHECK: [[META5]] = !{[[META6:![0-9]+]]}
+// CHECK: [[META6]] = distinct !{[[META6]], [[META7:![0-9]+]], !"__lasx_xvldx: %agg.result"}
+// CHECK: [[META7]] = distinct !{[[META7]], !"__lasx_xvldx"}
+//.
diff --git a/clang/test/CodeGen/LoongArch/lasx/builtin-approximate-alias.c b/clang/test/CodeGen/LoongArch/lasx/builtin-approximate-alias.c
index b79f939..b194ea8 100644
--- a/clang/test/CodeGen/LoongArch/lasx/builtin-approximate-alias.c
+++ b/clang/test/CodeGen/LoongArch/lasx/builtin-approximate-alias.c
@@ -1,37 +1,46 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
 // RUN: %clang_cc1 -triple loongarch64 -target-feature +lasx -target-feature +frecipe -O2 -emit-llvm %s -o - | FileCheck %s
 
 #include <lasxintrin.h>
 
-// CHECK-LABEL: @xvfrecipe_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2:![0-9]+]]
+// CHECK-LABEL: define dso_local void @xvfrecipe_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2:![0-9]+]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrecipe.s(<8 x float> [[_1]])
-// CHECK-NEXT:    store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8f32 xvfrecipe_s(v8f32 _1) { return __lasx_xvfrecipe_s(_1); }
-// CHECK-LABEL: @xvfrecipe_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2:![0-9]+]]
+// CHECK-LABEL: define dso_local void @xvfrecipe_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrecipe.d(<4 x double> [[_1]])
-// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4f64 xvfrecipe_d(v4f64 _1) { return __lasx_xvfrecipe_d(_1); }
-// CHECK-LABEL: @xvfrsqrte_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2:![0-9]+]]
+// CHECK-LABEL: define dso_local void @xvfrsqrte_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrsqrte.s(<8 x float> [[_1]])
-// CHECK-NEXT:    store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8f32 xvfrsqrte_s(v8f32 _1) { return __lasx_xvfrsqrte_s(_1); }
-// CHECK-LABEL: @xvfrsqrte_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2:![0-9]+]]
+// CHECK-LABEL: define dso_local void @xvfrsqrte_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrsqrte.d(<4 x double> [[_1]])
-// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4f64 xvfrsqrte_d(v4f64 _1) { return __lasx_xvfrsqrte_d(_1); }
+//.
+// CHECK: [[CHAR_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0}
+// CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0}
+// CHECK: [[META4]] = !{!"Simple C/C++ TBAA"}
+//.
diff --git a/clang/test/CodeGen/LoongArch/lasx/builtin-approximate.c b/clang/test/CodeGen/LoongArch/lasx/builtin-approximate.c
index 63e9ba6..9d543df 100644
--- a/clang/test/CodeGen/LoongArch/lasx/builtin-approximate.c
+++ b/clang/test/CodeGen/LoongArch/lasx/builtin-approximate.c
@@ -1,38 +1,47 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
 // RUN: %clang_cc1 -triple loongarch64 -target-feature +lasx -target-feature +frecipe -O2 -emit-llvm %s -o - | FileCheck %s
 
 typedef float v8f32 __attribute__((vector_size(32), aligned(32)));
 typedef double v4f64 __attribute__((vector_size(32), aligned(32)));
 
-// CHECK-LABEL: @xvfrecipe_s
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2:![0-9]+]]
+// CHECK-LABEL: define dso_local void @xvfrecipe_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2:![0-9]+]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrecipe.s(<8 x float> [[_1]])
-// CHECK-NEXT:    store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8f32 xvfrecipe_s(v8f32 _1) { return __builtin_lasx_xvfrecipe_s(_1); }
-// CHECK-LABEL: @xvfrecipe_d
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2:![0-9]+]]
+// CHECK-LABEL: define dso_local void @xvfrecipe_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrecipe.d(<4 x double> [[_1]])
-// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4f64 xvfrecipe_d(v4f64 _1) { return __builtin_lasx_xvfrecipe_d(_1); }
-// CHECK-LABEL: @xvfrsqrte_s
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2:![0-9]+]]
+// CHECK-LABEL: define dso_local void @xvfrsqrte_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrsqrte.s(<8 x float> [[_1]])
-// CHECK-NEXT:    store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8f32 xvfrsqrte_s(v8f32 _1) { return __builtin_lasx_xvfrsqrte_s(_1); }
-// CHECK-LABEL: @xvfrsqrte_d
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2:![0-9]+]]
+// CHECK-LABEL: define dso_local void @xvfrsqrte_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrsqrte.d(<4 x double> [[_1]])
-// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4f64 xvfrsqrte_d(v4f64 _1) { return __builtin_lasx_xvfrsqrte_d(_1); }
+//.
+// CHECK: [[CHAR_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0}
+// CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0}
+// CHECK: [[META4]] = !{!"Simple C/C++ TBAA"}
+//.
diff --git a/clang/test/CodeGen/LoongArch/lasx/builtin.c b/clang/test/CodeGen/LoongArch/lasx/builtin.c
index f52a23a..9b21c7e 100644
--- a/clang/test/CodeGen/LoongArch/lasx/builtin.c
+++ b/clang/test/CodeGen/LoongArch/lasx/builtin.c
@@ -1,4 +1,4 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
 // RUN: %clang_cc1 -triple loongarch64 -target-feature +lasx -O2 -emit-llvm %s -o - | FileCheck %s
 
 typedef signed char v32i8 __attribute__((vector_size(32), aligned(32)));
@@ -25,6384 +25,7125 @@ typedef double v4f64_d __attribute__((vector_size(32), aligned(8)));
 typedef double v4f64 __attribute__((vector_size(32), aligned(32)));
 typedef double v4f64_d __attribute__((vector_size(32), aligned(8)));
 
-// CHECK-LABEL: @xvsll_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2:![0-9]+]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsll_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2:![0-9]+]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsll.b(<32 x i8> [[_1]], <32 x i8> [[_2]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvsll_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsll_b(_1, _2); }
-// CHECK-LABEL: @xvsll_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsll_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsll.h(<16 x i16> [[_1]], <16 x i16> [[_2]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvsll_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsll_h(_1, _2); }
-// CHECK-LABEL: @xvsll_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsll_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsll.w(<8 x i32> [[_1]], <8 x i32> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvsll_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsll_w(_1, _2); }
-// CHECK-LABEL: @xvsll_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsll_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsll.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvsll_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsll_d(_1, _2); }
-// CHECK-LABEL: @xvslli_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvslli_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8> [[_1]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvslli_b(v32i8 _1) { return __builtin_lasx_xvslli_b(_1, 1); }
-// CHECK-LABEL: @xvslli_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvslli_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslli.h(<16 x i16> [[_1]], i32 1)
-// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvslli_h(v16i16 _1) { return __builtin_lasx_xvslli_h(_1, 1); }
-// CHECK-LABEL: @xvslli_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvslli_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslli.w(<8 x i32> [[_1]], i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvslli_w(v8i32 _1) { return __builtin_lasx_xvslli_w(_1, 1); }
-// CHECK-LABEL: @xvslli_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvslli_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslli.d(<4 x i64> [[_1]], i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvslli_d(v4i64 _1) { return __builtin_lasx_xvslli_d(_1, 1); }
-// CHECK-LABEL: @xvsra_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsra_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsra.b(<32 x i8> [[_1]], <32 x i8> [[_2]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvsra_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsra_b(_1, _2); }
-// CHECK-LABEL: @xvsra_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsra_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsra.h(<16 x i16> [[_1]], <16 x i16> [[_2]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvsra_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsra_h(_1, _2); }
-// CHECK-LABEL: @xvsra_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsra_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsra.w(<8 x i32> [[_1]], <8 x i32> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvsra_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsra_w(_1, _2); }
-// CHECK-LABEL: @xvsra_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsra_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsra.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvsra_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsra_d(_1, _2); }
-// CHECK-LABEL: @xvsrai_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrai_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8> [[_1]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvsrai_b(v32i8 _1) { return __builtin_lasx_xvsrai_b(_1, 1); }
-// CHECK-LABEL: @xvsrai_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrai_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrai.h(<16 x i16> [[_1]], i32 1)
-// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvsrai_h(v16i16 _1) { return __builtin_lasx_xvsrai_h(_1, 1); }
-// CHECK-LABEL: @xvsrai_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrai_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrai.w(<8 x i32> [[_1]], i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvsrai_w(v8i32 _1) { return __builtin_lasx_xvsrai_w(_1, 1); }
-// CHECK-LABEL: @xvsrai_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrai_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrai.d(<4 x i64> [[_1]], i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvsrai_d(v4i64 _1) { return __builtin_lasx_xvsrai_d(_1, 1); }
-// CHECK-LABEL: @xvsrar_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrar_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrar.b(<32 x i8> [[_1]], <32 x i8> [[_2]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvsrar_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrar_b(_1, _2); }
-// CHECK-LABEL: @xvsrar_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrar_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrar.h(<16 x i16> [[_1]], <16 x i16> [[_2]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvsrar_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrar_h(_1, _2); }
-// CHECK-LABEL: @xvsrar_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrar_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrar.w(<8 x i32> [[_1]], <8 x i32> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvsrar_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrar_w(_1, _2); }
-// CHECK-LABEL: @xvsrar_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrar_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrar.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvsrar_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrar_d(_1, _2); }
-// CHECK-LABEL: @xvsrari_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrari_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8> [[_1]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvsrari_b(v32i8 _1) { return __builtin_lasx_xvsrari_b(_1, 1); }
-// CHECK-LABEL: @xvsrari_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrari_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrari.h(<16 x i16> [[_1]], i32 1)
-// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvsrari_h(v16i16 _1) { return __builtin_lasx_xvsrari_h(_1, 1); }
-// CHECK-LABEL: @xvsrari_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrari_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrari.w(<8 x i32> [[_1]], i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvsrari_w(v8i32 _1) { return __builtin_lasx_xvsrari_w(_1, 1); }
-// CHECK-LABEL: @xvsrari_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrari_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrari.d(<4 x i64> [[_1]], i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvsrari_d(v4i64 _1) { return __builtin_lasx_xvsrari_d(_1, 1); }
-// CHECK-LABEL: @xvsrl_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrl_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrl.b(<32 x i8> [[_1]], <32 x i8> [[_2]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvsrl_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrl_b(_1, _2); }
-// CHECK-LABEL: @xvsrl_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrl_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrl.h(<16 x i16> [[_1]], <16 x i16> [[_2]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvsrl_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrl_h(_1, _2); }
-// CHECK-LABEL: @xvsrl_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrl_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrl.w(<8 x i32> [[_1]], <8 x i32> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvsrl_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrl_w(_1, _2); }
-// CHECK-LABEL: @xvsrl_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrl_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrl.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvsrl_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrl_d(_1, _2); }
-// CHECK-LABEL: @xvsrli_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrli_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrli.b(<32 x i8> [[_1]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvsrli_b(v32i8 _1) { return __builtin_lasx_xvsrli_b(_1, 1); }
-// CHECK-LABEL: @xvsrli_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrli_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrli.h(<16 x i16> [[_1]], i32 1)
-// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvsrli_h(v16i16 _1) { return __builtin_lasx_xvsrli_h(_1, 1); }
-// CHECK-LABEL: @xvsrli_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrli_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrli.w(<8 x i32> [[_1]], i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvsrli_w(v8i32 _1) { return __builtin_lasx_xvsrli_w(_1, 1); }
-// CHECK-LABEL: @xvsrli_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrli_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrli.d(<4 x i64> [[_1]], i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvsrli_d(v4i64 _1) { return __builtin_lasx_xvsrli_d(_1, 1); }
-// CHECK-LABEL: @xvsrlr_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrlr_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlr.b(<32 x i8> [[_1]], <32 x i8> [[_2]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvsrlr_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrlr_b(_1, _2); }
-// CHECK-LABEL: @xvsrlr_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrlr_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlr.h(<16 x i16> [[_1]], <16 x i16> [[_2]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvsrlr_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrlr_h(_1, _2); }
-// CHECK-LABEL: @xvsrlr_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrlr_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlr.w(<8 x i32> [[_1]], <8 x i32> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvsrlr_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrlr_w(_1, _2); }
-// CHECK-LABEL: @xvsrlr_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrlr_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlr.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvsrlr_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrlr_d(_1, _2); }
-// CHECK-LABEL: @xvsrlri_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrlri_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(<32 x i8> [[_1]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvsrlri_b(v32i8 _1) { return __builtin_lasx_xvsrlri_b(_1, 1); }
-// CHECK-LABEL: @xvsrlri_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrlri_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlri.h(<16 x i16> [[_1]], i32 1)
-// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvsrlri_h(v16i16 _1) { return __builtin_lasx_xvsrlri_h(_1, 1); }
-// CHECK-LABEL: @xvsrlri_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrlri_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlri.w(<8 x i32> [[_1]], i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvsrlri_w(v8i32 _1) { return __builtin_lasx_xvsrlri_w(_1, 1); }
-// CHECK-LABEL: @xvsrlri_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrlri_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlri.d(<4 x i64> [[_1]], i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvsrlri_d(v4i64 _1) { return __builtin_lasx_xvsrlri_d(_1, 1); }
-// CHECK-LABEL: @xvbitclr_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvbitclr_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitclr.b(<32 x i8> [[_1]], <32 x i8> [[_2]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32u8 xvbitclr_b(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvbitclr_b(_1, _2); }
-// CHECK-LABEL: @xvbitclr_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvbitclr_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitclr.h(<16 x i16> [[_1]], <16 x i16> [[_2]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16u16 xvbitclr_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvbitclr_h(_1, _2); }
-// CHECK-LABEL: @xvbitclr_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvbitclr_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitclr.w(<8 x i32> [[_1]], <8 x i32> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8u32 xvbitclr_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvbitclr_w(_1, _2); }
-// CHECK-LABEL: @xvbitclr_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvbitclr_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitclr.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4u64 xvbitclr_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvbitclr_d(_1, _2); }
-// CHECK-LABEL: @xvbitclri_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvbitclri_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(<32 x i8> [[_1]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32u8 xvbitclri_b(v32u8 _1) { return __builtin_lasx_xvbitclri_b(_1, 1); }
-// CHECK-LABEL: @xvbitclri_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvbitclri_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitclri.h(<16 x i16> [[_1]], i32 1)
-// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16u16 xvbitclri_h(v16u16 _1) { return __builtin_lasx_xvbitclri_h(_1, 1); }
-// CHECK-LABEL: @xvbitclri_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvbitclri_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitclri.w(<8 x i32> [[_1]], i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8u32 xvbitclri_w(v8u32 _1) { return __builtin_lasx_xvbitclri_w(_1, 1); }
-// CHECK-LABEL: @xvbitclri_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvbitclri_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitclri.d(<4 x i64> [[_1]], i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4u64 xvbitclri_d(v4u64 _1) { return __builtin_lasx_xvbitclri_d(_1, 1); }
-// CHECK-LABEL: @xvbitset_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvbitset_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitset.b(<32 x i8> [[_1]], <32 x i8> [[_2]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32u8 xvbitset_b(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvbitset_b(_1, _2); }
-// CHECK-LABEL: @xvbitset_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvbitset_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitset.h(<16 x i16> [[_1]], <16 x i16> [[_2]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16u16 xvbitset_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvbitset_h(_1, _2); }
-// CHECK-LABEL: @xvbitset_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvbitset_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitset.w(<8 x i32> [[_1]], <8 x i32> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8u32 xvbitset_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvbitset_w(_1, _2); }
-// CHECK-LABEL: @xvbitset_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvbitset_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitset.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4u64 xvbitset_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvbitset_d(_1, _2); }
-// CHECK-LABEL: @xvbitseti_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvbitseti_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(<32 x i8> [[_1]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32u8 xvbitseti_b(v32u8 _1) { return __builtin_lasx_xvbitseti_b(_1, 1); }
-// CHECK-LABEL: @xvbitseti_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvbitseti_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitseti.h(<16 x i16> [[_1]], i32 1)
-// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16u16 xvbitseti_h(v16u16 _1) { return __builtin_lasx_xvbitseti_h(_1, 1); }
-// CHECK-LABEL: @xvbitseti_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvbitseti_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitseti.w(<8 x i32> [[_1]], i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8u32 xvbitseti_w(v8u32 _1) { return __builtin_lasx_xvbitseti_w(_1, 1); }
-// CHECK-LABEL: @xvbitseti_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvbitseti_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitseti.d(<4 x i64> [[_1]], i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4u64 xvbitseti_d(v4u64 _1) { return __builtin_lasx_xvbitseti_d(_1, 1); }
-// CHECK-LABEL: @xvbitrev_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvbitrev_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitrev.b(<32 x i8> [[_1]], <32 x i8> [[_2]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32u8 xvbitrev_b(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvbitrev_b(_1, _2); }
-// CHECK-LABEL: @xvbitrev_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvbitrev_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitrev.h(<16 x i16> [[_1]], <16 x i16> [[_2]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16u16 xvbitrev_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvbitrev_h(_1, _2); }
-// CHECK-LABEL: @xvbitrev_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvbitrev_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitrev.w(<8 x i32> [[_1]], <8 x i32> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8u32 xvbitrev_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvbitrev_w(_1, _2); }
-// CHECK-LABEL: @xvbitrev_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvbitrev_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitrev.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4u64 xvbitrev_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvbitrev_d(_1, _2); }
-// CHECK-LABEL: @xvbitrevi_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvbitrevi_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(<32 x i8> [[_1]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32u8 xvbitrevi_b(v32u8 _1) { return __builtin_lasx_xvbitrevi_b(_1, 1); }
-// CHECK-LABEL: @xvbitrevi_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvbitrevi_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h(<16 x i16> [[_1]], i32 1)
-// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16u16 xvbitrevi_h(v16u16 _1) { return __builtin_lasx_xvbitrevi_h(_1, 1); }
-// CHECK-LABEL: @xvbitrevi_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvbitrevi_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w(<8 x i32> [[_1]], i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8u32 xvbitrevi_w(v8u32 _1) { return __builtin_lasx_xvbitrevi_w(_1, 1); }
-// CHECK-LABEL: @xvbitrevi_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvbitrevi_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d(<4 x i64> [[_1]], i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4u64 xvbitrevi_d(v4u64 _1) { return __builtin_lasx_xvbitrevi_d(_1, 1); }
-// CHECK-LABEL: @xvadd_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvadd_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvadd.b(<32 x i8> [[_1]], <32 x i8> [[_2]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvadd_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvadd_b(_1, _2); }
-// CHECK-LABEL: @xvadd_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvadd_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvadd.h(<16 x i16> [[_1]], <16 x i16> [[_2]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvadd_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvadd_h(_1, _2); }
-// CHECK-LABEL: @xvadd_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvadd_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvadd.w(<8 x i32> [[_1]], <8 x i32> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvadd_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvadd_w(_1, _2); }
-// CHECK-LABEL: @xvadd_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvadd_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadd.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvadd_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvadd_d(_1, _2); }
-// CHECK-LABEL: @xvaddi_bu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvaddi_bu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(<32 x i8> [[_1]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvaddi_bu(v32i8 _1) { return __builtin_lasx_xvaddi_bu(_1, 1); }
-// CHECK-LABEL: @xvaddi_hu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvaddi_hu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddi.hu(<16 x i16> [[_1]], i32 1)
-// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvaddi_hu(v16i16 _1) { return __builtin_lasx_xvaddi_hu(_1, 1); }
-// CHECK-LABEL: @xvaddi_wu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvaddi_wu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddi.wu(<8 x i32> [[_1]], i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvaddi_wu(v8i32 _1) { return __builtin_lasx_xvaddi_wu(_1, 1); }
-// CHECK-LABEL: @xvaddi_du(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvaddi_du(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddi.du(<4 x i64> [[_1]], i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvaddi_du(v4i64 _1) { return __builtin_lasx_xvaddi_du(_1, 1); }
-// CHECK-LABEL: @xvsub_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsub_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsub.b(<32 x i8> [[_1]], <32 x i8> [[_2]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvsub_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsub_b(_1, _2); }
-// CHECK-LABEL: @xvsub_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsub_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsub.h(<16 x i16> [[_1]], <16 x i16> [[_2]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvsub_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsub_h(_1, _2); }
-// CHECK-LABEL: @xvsub_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsub_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsub.w(<8 x i32> [[_1]], <8 x i32> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvsub_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsub_w(_1, _2); }
-// CHECK-LABEL: @xvsub_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsub_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsub.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvsub_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsub_d(_1, _2); }
-// CHECK-LABEL: @xvsubi_bu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsubi_bu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(<32 x i8> [[_1]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvsubi_bu(v32i8 _1) { return __builtin_lasx_xvsubi_bu(_1, 1); }
-// CHECK-LABEL: @xvsubi_hu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsubi_hu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubi.hu(<16 x i16> [[_1]], i32 1)
-// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvsubi_hu(v16i16 _1) { return __builtin_lasx_xvsubi_hu(_1, 1); }
-// CHECK-LABEL: @xvsubi_wu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsubi_wu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubi.wu(<8 x i32> [[_1]], i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvsubi_wu(v8i32 _1) { return __builtin_lasx_xvsubi_wu(_1, 1); }
-// CHECK-LABEL: @xvsubi_du(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsubi_du(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubi.du(<4 x i64> [[_1]], i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvsubi_du(v4i64 _1) { return __builtin_lasx_xvsubi_du(_1, 1); }
-// CHECK-LABEL: @xvmax_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmax_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmax.b(<32 x i8> [[_1]], <32 x i8> [[_2]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvmax_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmax_b(_1, _2); }
-// CHECK-LABEL: @xvmax_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmax_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmax.h(<16 x i16> [[_1]], <16 x i16> [[_2]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvmax_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmax_h(_1, _2); }
-// CHECK-LABEL: @xvmax_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmax_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmax.w(<8 x i32> [[_1]], <8 x i32> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvmax_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmax_w(_1, _2); }
-// CHECK-LABEL: @xvmax_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmax_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmax.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvmax_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmax_d(_1, _2); }
-// CHECK-LABEL: @xvmaxi_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmaxi_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(<32 x i8> [[_1]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvmaxi_b(v32i8 _1) { return __builtin_lasx_xvmaxi_b(_1, 1); }
-// CHECK-LABEL: @xvmaxi_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmaxi_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaxi.h(<16 x i16> [[_1]], i32 1)
-// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvmaxi_h(v16i16 _1) { return __builtin_lasx_xvmaxi_h(_1, 1); }
-// CHECK-LABEL: @xvmaxi_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmaxi_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaxi.w(<8 x i32> [[_1]], i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvmaxi_w(v8i32 _1) { return __builtin_lasx_xvmaxi_w(_1, 1); }
-// CHECK-LABEL: @xvmaxi_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmaxi_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaxi.d(<4 x i64> [[_1]], i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvmaxi_d(v4i64 _1) { return __builtin_lasx_xvmaxi_d(_1, 1); }
-// CHECK-LABEL: @xvmax_bu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmax_bu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmax.bu(<32 x i8> [[_1]], <32 x i8> [[_2]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32u8 xvmax_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmax_bu(_1, _2); }
-// CHECK-LABEL: @xvmax_hu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmax_hu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmax.hu(<16 x i16> [[_1]], <16 x i16> [[_2]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16u16 xvmax_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmax_hu(_1, _2); }
-// CHECK-LABEL: @xvmax_wu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmax_wu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmax.wu(<8 x i32> [[_1]], <8 x i32> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8u32 xvmax_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmax_wu(_1, _2); }
-// CHECK-LABEL: @xvmax_du(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmax_du(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmax.du(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4u64 xvmax_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmax_du(_1, _2); }
-// CHECK-LABEL: @xvmaxi_bu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmaxi_bu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu(<32 x i8> [[_1]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32u8 xvmaxi_bu(v32u8 _1) { return __builtin_lasx_xvmaxi_bu(_1, 1); }
-// CHECK-LABEL: @xvmaxi_hu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmaxi_hu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu(<16 x i16> [[_1]], i32 1)
-// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16u16 xvmaxi_hu(v16u16 _1) { return __builtin_lasx_xvmaxi_hu(_1, 1); }
-// CHECK-LABEL: @xvmaxi_wu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmaxi_wu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu(<8 x i32> [[_1]], i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8u32 xvmaxi_wu(v8u32 _1) { return __builtin_lasx_xvmaxi_wu(_1, 1); }
-// CHECK-LABEL: @xvmaxi_du(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmaxi_du(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaxi.du(<4 x i64> [[_1]], i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4u64 xvmaxi_du(v4u64 _1) { return __builtin_lasx_xvmaxi_du(_1, 1); }
-// CHECK-LABEL: @xvmin_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmin_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmin.b(<32 x i8> [[_1]], <32 x i8> [[_2]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvmin_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmin_b(_1, _2); }
-// CHECK-LABEL: @xvmin_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmin_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmin.h(<16 x i16> [[_1]], <16 x i16> [[_2]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvmin_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmin_h(_1, _2); }
-// CHECK-LABEL: @xvmin_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmin_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmin.w(<8 x i32> [[_1]], <8 x i32> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvmin_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmin_w(_1, _2); }
-// CHECK-LABEL: @xvmin_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmin_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmin.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvmin_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmin_d(_1, _2); }
-// CHECK-LABEL: @xvmini_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmini_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmini.b(<32 x i8> [[_1]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvmini_b(v32i8 _1) { return __builtin_lasx_xvmini_b(_1, 1); }
-// CHECK-LABEL: @xvmini_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmini_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmini.h(<16 x i16> [[_1]], i32 1)
-// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvmini_h(v16i16 _1) { return __builtin_lasx_xvmini_h(_1, 1); }
-// CHECK-LABEL: @xvmini_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmini_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmini.w(<8 x i32> [[_1]], i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvmini_w(v8i32 _1) { return __builtin_lasx_xvmini_w(_1, 1); }
-// CHECK-LABEL: @xvmini_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmini_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmini.d(<4 x i64> [[_1]], i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvmini_d(v4i64 _1) { return __builtin_lasx_xvmini_d(_1, 1); }
-// CHECK-LABEL: @xvmin_bu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmin_bu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmin.bu(<32 x i8> [[_1]], <32 x i8> [[_2]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32u8 xvmin_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmin_bu(_1, _2); }
-// CHECK-LABEL: @xvmin_hu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmin_hu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmin.hu(<16 x i16> [[_1]], <16 x i16> [[_2]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16u16 xvmin_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmin_hu(_1, _2); }
-// CHECK-LABEL: @xvmin_wu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmin_wu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmin.wu(<8 x i32> [[_1]], <8 x i32> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8u32 xvmin_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmin_wu(_1, _2); }
-// CHECK-LABEL: @xvmin_du(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmin_du(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmin.du(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4u64 xvmin_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmin_du(_1, _2); }
-// CHECK-LABEL: @xvmini_bu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmini_bu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmini.bu(<32 x i8> [[_1]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32u8 xvmini_bu(v32u8 _1) { return __builtin_lasx_xvmini_bu(_1, 1); }
-// CHECK-LABEL: @xvmini_hu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmini_hu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmini.hu(<16 x i16> [[_1]], i32 1)
-// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16u16 xvmini_hu(v16u16 _1) { return __builtin_lasx_xvmini_hu(_1, 1); }
-// CHECK-LABEL: @xvmini_wu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmini_wu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmini.wu(<8 x i32> [[_1]], i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8u32 xvmini_wu(v8u32 _1) { return __builtin_lasx_xvmini_wu(_1, 1); }
-// CHECK-LABEL: @xvmini_du(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmini_du(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmini.du(<4 x i64> [[_1]], i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4u64 xvmini_du(v4u64 _1) { return __builtin_lasx_xvmini_du(_1, 1); }
-// CHECK-LABEL: @xvseq_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvseq_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvseq.b(<32 x i8> [[_1]], <32 x i8> [[_2]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvseq_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvseq_b(_1, _2); }
-// CHECK-LABEL: @xvseq_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvseq_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvseq.h(<16 x i16> [[_1]], <16 x i16> [[_2]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvseq_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvseq_h(_1, _2); }
-// CHECK-LABEL: @xvseq_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvseq_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvseq.w(<8 x i32> [[_1]], <8 x i32> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvseq_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvseq_w(_1, _2); }
-// CHECK-LABEL: @xvseq_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvseq_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvseq.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvseq_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvseq_d(_1, _2); }
-// CHECK-LABEL: @xvseqi_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvseqi_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvseqi.b(<32 x i8> [[_1]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvseqi_b(v32i8 _1) { return __builtin_lasx_xvseqi_b(_1, 1); }
-// CHECK-LABEL: @xvseqi_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvseqi_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvseqi.h(<16 x i16> [[_1]], i32 1)
-// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvseqi_h(v16i16 _1) { return __builtin_lasx_xvseqi_h(_1, 1); }
-// CHECK-LABEL: @xvseqi_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvseqi_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvseqi.w(<8 x i32> [[_1]], i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvseqi_w(v8i32 _1) { return __builtin_lasx_xvseqi_w(_1, 1); }
-// CHECK-LABEL: @xvseqi_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvseqi_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvseqi.d(<4 x i64> [[_1]], i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvseqi_d(v4i64 _1) { return __builtin_lasx_xvseqi_d(_1, 1); }
-// CHECK-LABEL: @xvslt_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvslt_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslt.b(<32 x i8> [[_1]], <32 x i8> [[_2]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvslt_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvslt_b(_1, _2); }
-// CHECK-LABEL: @xvslt_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvslt_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslt.h(<16 x i16> [[_1]], <16 x i16> [[_2]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvslt_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvslt_h(_1, _2); }
-// CHECK-LABEL: @xvslt_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvslt_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslt.w(<8 x i32> [[_1]], <8 x i32> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvslt_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvslt_w(_1, _2); }
-// CHECK-LABEL: @xvslt_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvslt_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslt.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvslt_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvslt_d(_1, _2); }
-// CHECK-LABEL: @xvslti_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvslti_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslti.b(<32 x i8> [[_1]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvslti_b(v32i8 _1) { return __builtin_lasx_xvslti_b(_1, 1); }
-// CHECK-LABEL: @xvslti_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvslti_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslti.h(<16 x i16> [[_1]], i32 1)
-// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvslti_h(v16i16 _1) { return __builtin_lasx_xvslti_h(_1, 1); }
-// CHECK-LABEL: @xvslti_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvslti_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslti.w(<8 x i32> [[_1]], i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvslti_w(v8i32 _1) { return __builtin_lasx_xvslti_w(_1, 1); }
-// CHECK-LABEL: @xvslti_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvslti_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslti.d(<4 x i64> [[_1]], i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvslti_d(v4i64 _1) { return __builtin_lasx_xvslti_d(_1, 1); }
-// CHECK-LABEL: @xvslt_bu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvslt_bu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslt.bu(<32 x i8> [[_1]], <32 x i8> [[_2]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvslt_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvslt_bu(_1, _2); }
-// CHECK-LABEL: @xvslt_hu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvslt_hu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslt.hu(<16 x i16> [[_1]], <16 x i16> [[_2]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvslt_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvslt_hu(_1, _2); }
-// CHECK-LABEL: @xvslt_wu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvslt_wu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslt.wu(<8 x i32> [[_1]], <8 x i32> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvslt_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvslt_wu(_1, _2); }
-// CHECK-LABEL: @xvslt_du(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvslt_du(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslt.du(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvslt_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvslt_du(_1, _2); }
-// CHECK-LABEL: @xvslti_bu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvslti_bu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslti.bu(<32 x i8> [[_1]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvslti_bu(v32u8 _1) { return __builtin_lasx_xvslti_bu(_1, 1); }
-// CHECK-LABEL: @xvslti_hu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvslti_hu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslti.hu(<16 x i16> [[_1]], i32 1)
-// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvslti_hu(v16u16 _1) { return __builtin_lasx_xvslti_hu(_1, 1); }
-// CHECK-LABEL: @xvslti_wu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvslti_wu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslti.wu(<8 x i32> [[_1]], i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvslti_wu(v8u32 _1) { return __builtin_lasx_xvslti_wu(_1, 1); }
-// CHECK-LABEL: @xvslti_du(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvslti_du(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslti.du(<4 x i64> [[_1]], i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvslti_du(v4u64 _1) { return __builtin_lasx_xvslti_du(_1, 1); }
-// CHECK-LABEL: @xvsle_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsle_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsle.b(<32 x i8> [[_1]], <32 x i8> [[_2]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvsle_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsle_b(_1, _2); }
-// CHECK-LABEL: @xvsle_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsle_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsle.h(<16 x i16> [[_1]], <16 x i16> [[_2]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvsle_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsle_h(_1, _2); }
-// CHECK-LABEL: @xvsle_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsle_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsle.w(<8 x i32> [[_1]], <8 x i32> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvsle_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsle_w(_1, _2); }
-// CHECK-LABEL: @xvsle_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsle_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsle.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvsle_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsle_d(_1, _2); }
-// CHECK-LABEL: @xvslei_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvslei_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslei.b(<32 x i8> [[_1]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvslei_b(v32i8 _1) { return __builtin_lasx_xvslei_b(_1, 1); }
-// CHECK-LABEL: @xvslei_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvslei_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslei.h(<16 x i16> [[_1]], i32 1)
-// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvslei_h(v16i16 _1) { return __builtin_lasx_xvslei_h(_1, 1); }
-// CHECK-LABEL: @xvslei_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvslei_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslei.w(<8 x i32> [[_1]], i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvslei_w(v8i32 _1) { return __builtin_lasx_xvslei_w(_1, 1); }
-// CHECK-LABEL: @xvslei_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvslei_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslei.d(<4 x i64> [[_1]], i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvslei_d(v4i64 _1) { return __builtin_lasx_xvslei_d(_1, 1); }
-// CHECK-LABEL: @xvsle_bu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsle_bu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsle.bu(<32 x i8> [[_1]], <32 x i8> [[_2]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvsle_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvsle_bu(_1, _2); }
-// CHECK-LABEL: @xvsle_hu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsle_hu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsle.hu(<16 x i16> [[_1]], <16 x i16> [[_2]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvsle_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvsle_hu(_1, _2); }
-// CHECK-LABEL: @xvsle_wu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsle_wu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsle.wu(<8 x i32> [[_1]], <8 x i32> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvsle_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvsle_wu(_1, _2); }
-// CHECK-LABEL: @xvsle_du(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsle_du(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsle.du(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvsle_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvsle_du(_1, _2); }
-// CHECK-LABEL: @xvslei_bu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvslei_bu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslei.bu(<32 x i8> [[_1]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvslei_bu(v32u8 _1) { return __builtin_lasx_xvslei_bu(_1, 1); }
-// CHECK-LABEL: @xvslei_hu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvslei_hu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslei.hu(<16 x i16> [[_1]], i32 1)
-// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvslei_hu(v16u16 _1) { return __builtin_lasx_xvslei_hu(_1, 1); }
-// CHECK-LABEL: @xvslei_wu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvslei_wu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslei.wu(<8 x i32> [[_1]], i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvslei_wu(v8u32 _1) { return __builtin_lasx_xvslei_wu(_1, 1); }
-// CHECK-LABEL: @xvslei_du(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvslei_du(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslei.du(<4 x i64> [[_1]], i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvslei_du(v4u64 _1) { return __builtin_lasx_xvslei_du(_1, 1); }
-// CHECK-LABEL: @xvsat_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsat_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsat.b(<32 x i8> [[_1]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvsat_b(v32i8 _1) { return __builtin_lasx_xvsat_b(_1, 1); }
-// CHECK-LABEL: @xvsat_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsat_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsat.h(<16 x i16> [[_1]], i32 1)
-// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvsat_h(v16i16 _1) { return __builtin_lasx_xvsat_h(_1, 1); }
-// CHECK-LABEL: @xvsat_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsat_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsat.w(<8 x i32> [[_1]], i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvsat_w(v8i32 _1) { return __builtin_lasx_xvsat_w(_1, 1); }
-// CHECK-LABEL: @xvsat_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsat_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsat.d(<4 x i64> [[_1]], i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvsat_d(v4i64 _1) { return __builtin_lasx_xvsat_d(_1, 1); }
-// CHECK-LABEL: @xvsat_bu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsat_bu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsat.bu(<32 x i8> [[_1]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32u8 xvsat_bu(v32u8 _1) { return __builtin_lasx_xvsat_bu(_1, 1); }
-// CHECK-LABEL: @xvsat_hu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsat_hu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsat.hu(<16 x i16> [[_1]], i32 1)
-// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16u16 xvsat_hu(v16u16 _1) { return __builtin_lasx_xvsat_hu(_1, 1); }
-// CHECK-LABEL: @xvsat_wu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsat_wu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsat.wu(<8 x i32> [[_1]], i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8u32 xvsat_wu(v8u32 _1) { return __builtin_lasx_xvsat_wu(_1, 1); }
-// CHECK-LABEL: @xvsat_du(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsat_du(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsat.du(<4 x i64> [[_1]], i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4u64 xvsat_du(v4u64 _1) { return __builtin_lasx_xvsat_du(_1, 1); }
-// CHECK-LABEL: @xvadda_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvadda_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvadda.b(<32 x i8> [[_1]], <32 x i8> [[_2]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvadda_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvadda_b(_1, _2); }
-// CHECK-LABEL: @xvadda_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvadda_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvadda.h(<16 x i16> [[_1]], <16 x i16> [[_2]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvadda_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvadda_h(_1, _2); }
-// CHECK-LABEL: @xvadda_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvadda_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvadda.w(<8 x i32> [[_1]], <8 x i32> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvadda_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvadda_w(_1, _2); }
-// CHECK-LABEL: @xvadda_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvadda_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadda.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvadda_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvadda_d(_1, _2); }
-// CHECK-LABEL: @xvsadd_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsadd_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsadd.b(<32 x i8> [[_1]], <32 x i8> [[_2]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvsadd_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsadd_b(_1, _2); }
-// CHECK-LABEL: @xvsadd_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsadd_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsadd.h(<16 x i16> [[_1]], <16 x i16> [[_2]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvsadd_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsadd_h(_1, _2); }
-// CHECK-LABEL: @xvsadd_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsadd_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsadd.w(<8 x i32> [[_1]], <8 x i32> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvsadd_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsadd_w(_1, _2); }
-// CHECK-LABEL: @xvsadd_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsadd_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsadd.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvsadd_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsadd_d(_1, _2); }
-// CHECK-LABEL: @xvsadd_bu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsadd_bu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsadd.bu(<32 x i8> [[_1]], <32 x i8> [[_2]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32u8 xvsadd_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvsadd_bu(_1, _2); }
-// CHECK-LABEL: @xvsadd_hu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsadd_hu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsadd.hu(<16 x i16> [[_1]], <16 x i16> [[_2]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16u16 xvsadd_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvsadd_hu(_1, _2); }
-// CHECK-LABEL: @xvsadd_wu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsadd_wu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsadd.wu(<8 x i32> [[_1]], <8 x i32> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8u32 xvsadd_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvsadd_wu(_1, _2); }
-// CHECK-LABEL: @xvsadd_du(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsadd_du(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsadd.du(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4u64 xvsadd_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvsadd_du(_1, _2); }
-// CHECK-LABEL: @xvavg_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvavg_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavg.b(<32 x i8> [[_1]], <32 x i8> [[_2]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvavg_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvavg_b(_1, _2); }
-// CHECK-LABEL: @xvavg_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvavg_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavg.h(<16 x i16> [[_1]], <16 x i16> [[_2]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvavg_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvavg_h(_1, _2); }
-// CHECK-LABEL: @xvavg_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvavg_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavg.w(<8 x i32> [[_1]], <8 x i32> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvavg_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvavg_w(_1, _2); }
-// CHECK-LABEL: @xvavg_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvavg_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavg.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvavg_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvavg_d(_1, _2); }
-// CHECK-LABEL: @xvavg_bu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvavg_bu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavg.bu(<32 x i8> [[_1]], <32 x i8> [[_2]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32u8 xvavg_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvavg_bu(_1, _2); }
-// CHECK-LABEL: @xvavg_hu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvavg_hu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavg.hu(<16 x i16> [[_1]], <16 x i16> [[_2]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16u16 xvavg_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvavg_hu(_1, _2); }
-// CHECK-LABEL: @xvavg_wu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvavg_wu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavg.wu(<8 x i32> [[_1]], <8 x i32> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8u32 xvavg_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvavg_wu(_1, _2); }
-// CHECK-LABEL: @xvavg_du(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvavg_du(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavg.du(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4u64 xvavg_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvavg_du(_1, _2); }
-// CHECK-LABEL: @xvavgr_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvavgr_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavgr.b(<32 x i8> [[_1]], <32 x i8> [[_2]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvavgr_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvavgr_b(_1, _2); }
-// CHECK-LABEL: @xvavgr_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvavgr_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavgr.h(<16 x i16> [[_1]], <16 x i16> [[_2]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvavgr_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvavgr_h(_1, _2); }
-// CHECK-LABEL: @xvavgr_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvavgr_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavgr.w(<8 x i32> [[_1]], <8 x i32> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvavgr_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvavgr_w(_1, _2); }
-// CHECK-LABEL: @xvavgr_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvavgr_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavgr.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvavgr_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvavgr_d(_1, _2); }
-// CHECK-LABEL: @xvavgr_bu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvavgr_bu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavgr.bu(<32 x i8> [[_1]], <32 x i8> [[_2]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32u8 xvavgr_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvavgr_bu(_1, _2); }
-// CHECK-LABEL: @xvavgr_hu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvavgr_hu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavgr.hu(<16 x i16> [[_1]], <16 x i16> [[_2]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16u16 xvavgr_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvavgr_hu(_1, _2); }
-// CHECK-LABEL: @xvavgr_wu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvavgr_wu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavgr.wu(<8 x i32> [[_1]], <8 x i32> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8u32 xvavgr_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvavgr_wu(_1, _2); }
-// CHECK-LABEL: @xvavgr_du(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvavgr_du(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavgr.du(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4u64 xvavgr_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvavgr_du(_1, _2); }
-// CHECK-LABEL: @xvssub_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssub_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssub.b(<32 x i8> [[_1]], <32 x i8> [[_2]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvssub_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvssub_b(_1, _2); }
-// CHECK-LABEL: @xvssub_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssub_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssub.h(<16 x i16> [[_1]], <16 x i16> [[_2]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvssub_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssub_h(_1, _2); }
-// CHECK-LABEL: @xvssub_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssub_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssub.w(<8 x i32> [[_1]], <8 x i32> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvssub_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssub_w(_1, _2); }
-// CHECK-LABEL: @xvssub_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssub_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssub.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvssub_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssub_d(_1, _2); }
-// CHECK-LABEL: @xvssub_bu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssub_bu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssub.bu(<32 x i8> [[_1]], <32 x i8> [[_2]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32u8 xvssub_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvssub_bu(_1, _2); }
-// CHECK-LABEL: @xvssub_hu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssub_hu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssub.hu(<16 x i16> [[_1]], <16 x i16> [[_2]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16u16 xvssub_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvssub_hu(_1, _2); }
-// CHECK-LABEL: @xvssub_wu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssub_wu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssub.wu(<8 x i32> [[_1]], <8 x i32> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8u32 xvssub_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvssub_wu(_1, _2); }
-// CHECK-LABEL: @xvssub_du(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssub_du(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssub.du(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4u64 xvssub_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvssub_du(_1, _2); }
-// CHECK-LABEL: @xvabsd_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvabsd_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvabsd.b(<32 x i8> [[_1]], <32 x i8> [[_2]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvabsd_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvabsd_b(_1, _2); }
-// CHECK-LABEL: @xvabsd_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvabsd_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvabsd.h(<16 x i16> [[_1]], <16 x i16> [[_2]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvabsd_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvabsd_h(_1, _2); }
-// CHECK-LABEL: @xvabsd_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvabsd_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvabsd.w(<8 x i32> [[_1]], <8 x i32> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvabsd_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvabsd_w(_1, _2); }
-// CHECK-LABEL: @xvabsd_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvabsd_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvabsd.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvabsd_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvabsd_d(_1, _2); }
-// CHECK-LABEL: @xvabsd_bu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvabsd_bu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvabsd.bu(<32 x i8> [[_1]], <32 x i8> [[_2]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32u8 xvabsd_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvabsd_bu(_1, _2); }
-// CHECK-LABEL: @xvabsd_hu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvabsd_hu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvabsd.hu(<16 x i16> [[_1]], <16 x i16> [[_2]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16u16 xvabsd_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvabsd_hu(_1, _2); }
-// CHECK-LABEL: @xvabsd_wu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvabsd_wu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvabsd.wu(<8 x i32> [[_1]], <8 x i32> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8u32 xvabsd_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvabsd_wu(_1, _2); }
-// CHECK-LABEL: @xvabsd_du(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvabsd_du(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvabsd.du(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4u64 xvabsd_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvabsd_du(_1, _2); }
-// CHECK-LABEL: @xvmul_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmul_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmul.b(<32 x i8> [[_1]], <32 x i8> [[_2]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvmul_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmul_b(_1, _2); }
-// CHECK-LABEL: @xvmul_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmul_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmul.h(<16 x i16> [[_1]], <16 x i16> [[_2]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvmul_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmul_h(_1, _2); }
-// CHECK-LABEL: @xvmul_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmul_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmul.w(<8 x i32> [[_1]], <8 x i32> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvmul_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmul_w(_1, _2); }
-// CHECK-LABEL: @xvmul_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmul_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmul.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvmul_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmul_d(_1, _2); }
-// CHECK-LABEL: @xvmadd_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmadd_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_3:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmadd.b(<32 x i8> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]])
-// CHECK-NEXT:    store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvmadd_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvmadd_b(_1, _2, _3); }
-// CHECK-LABEL: @xvmadd_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_3:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmadd_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_3:%.*]] = load <16 x i16>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmadd.h(<16 x i16> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]])
-// CHECK-NEXT:    store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvmadd_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvmadd_h(_1, _2, _3); }
-// CHECK-LABEL: @xvmadd_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_3:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmadd_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_3:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmadd.w(<8 x i32> [[_1]], <8 x i32> [[_2]], <8 x i32> [[_3]])
-// CHECK-NEXT:    store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvmadd_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __builtin_lasx_xvmadd_w(_1, _2, _3); }
-// CHECK-LABEL: @xvmadd_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmadd_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_3:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmadd.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]])
-// CHECK-NEXT:    store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvmadd_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __builtin_lasx_xvmadd_d(_1, _2, _3); }
-// CHECK-LABEL: @xvmsub_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmsub_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_3:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmsub.b(<32 x i8> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]])
-// CHECK-NEXT:    store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvmsub_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvmsub_b(_1, _2, _3); }
-// CHECK-LABEL: @xvmsub_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_3:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmsub_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_3:%.*]] = load <16 x i16>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmsub.h(<16 x i16> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]])
-// CHECK-NEXT:    store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvmsub_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvmsub_h(_1, _2, _3); }
-// CHECK-LABEL: @xvmsub_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_3:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmsub_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_3:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmsub.w(<8 x i32> [[_1]], <8 x i32> [[_2]], <8 x i32> [[_3]])
-// CHECK-NEXT:    store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvmsub_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __builtin_lasx_xvmsub_w(_1, _2, _3); }
-// CHECK-LABEL: @xvmsub_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmsub_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_3:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmsub.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]])
-// CHECK-NEXT:    store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvmsub_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __builtin_lasx_xvmsub_d(_1, _2, _3); }
-// CHECK-LABEL: @xvdiv_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvdiv_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvdiv.b(<32 x i8> [[_1]], <32 x i8> [[_2]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvdiv_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvdiv_b(_1, _2); }
-// CHECK-LABEL: @xvdiv_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvdiv_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvdiv.h(<16 x i16> [[_1]], <16 x i16> [[_2]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvdiv_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvdiv_h(_1, _2); }
-// CHECK-LABEL: @xvdiv_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvdiv_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvdiv.w(<8 x i32> [[_1]], <8 x i32> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvdiv_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvdiv_w(_1, _2); }
-// CHECK-LABEL: @xvdiv_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvdiv_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvdiv.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvdiv_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvdiv_d(_1, _2); }
-// CHECK-LABEL: @xvdiv_bu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvdiv_bu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvdiv.bu(<32 x i8> [[_1]], <32 x i8> [[_2]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32u8 xvdiv_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvdiv_bu(_1, _2); }
-// CHECK-LABEL: @xvdiv_hu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvdiv_hu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvdiv.hu(<16 x i16> [[_1]], <16 x i16> [[_2]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16u16 xvdiv_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvdiv_hu(_1, _2); }
-// CHECK-LABEL: @xvdiv_wu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvdiv_wu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvdiv.wu(<8 x i32> [[_1]], <8 x i32> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8u32 xvdiv_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvdiv_wu(_1, _2); }
-// CHECK-LABEL: @xvdiv_du(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvdiv_du(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvdiv.du(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4u64 xvdiv_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvdiv_du(_1, _2); }
-// CHECK-LABEL: @xvhaddw_h_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvhaddw_h_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhaddw.h.b(<32 x i8> [[_1]], <32 x i8> [[_2]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvhaddw_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvhaddw_h_b(_1, _2); }
-// CHECK-LABEL: @xvhaddw_w_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvhaddw_w_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhaddw.w.h(<16 x i16> [[_1]], <16 x i16> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvhaddw_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvhaddw_w_h(_1, _2); }
-// CHECK-LABEL: @xvhaddw_d_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvhaddw_d_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.d.w(<8 x i32> [[_1]], <8 x i32> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvhaddw_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvhaddw_d_w(_1, _2); }
-// CHECK-LABEL: @xvhaddw_hu_bu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvhaddw_hu_bu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhaddw.hu.bu(<32 x i8> [[_1]], <32 x i8> [[_2]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16u16 xvhaddw_hu_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvhaddw_hu_bu(_1, _2); }
-// CHECK-LABEL: @xvhaddw_wu_hu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvhaddw_wu_hu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhaddw.wu.hu(<16 x i16> [[_1]], <16 x i16> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8u32 xvhaddw_wu_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvhaddw_wu_hu(_1, _2); }
-// CHECK-LABEL: @xvhaddw_du_wu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvhaddw_du_wu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.du.wu(<8 x i32> [[_1]], <8 x i32> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4u64 xvhaddw_du_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvhaddw_du_wu(_1, _2); }
-// CHECK-LABEL: @xvhsubw_h_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvhsubw_h_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhsubw.h.b(<32 x i8> [[_1]], <32 x i8> [[_2]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvhsubw_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvhsubw_h_b(_1, _2); }
-// CHECK-LABEL: @xvhsubw_w_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvhsubw_w_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhsubw.w.h(<16 x i16> [[_1]], <16 x i16> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvhsubw_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvhsubw_w_h(_1, _2); }
-// CHECK-LABEL: @xvhsubw_d_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvhsubw_d_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.d.w(<8 x i32> [[_1]], <8 x i32> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvhsubw_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvhsubw_d_w(_1, _2); }
-// CHECK-LABEL: @xvhsubw_hu_bu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvhsubw_hu_bu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhsubw.hu.bu(<32 x i8> [[_1]], <32 x i8> [[_2]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvhsubw_hu_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvhsubw_hu_bu(_1, _2); }
-// CHECK-LABEL: @xvhsubw_wu_hu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvhsubw_wu_hu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhsubw.wu.hu(<16 x i16> [[_1]], <16 x i16> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvhsubw_wu_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvhsubw_wu_hu(_1, _2); }
-// CHECK-LABEL: @xvhsubw_du_wu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvhsubw_du_wu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.du.wu(<8 x i32> [[_1]], <8 x i32> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvhsubw_du_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvhsubw_du_wu(_1, _2); }
-// CHECK-LABEL: @xvmod_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmod_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmod.b(<32 x i8> [[_1]], <32 x i8> [[_2]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvmod_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmod_b(_1, _2); }
-// CHECK-LABEL: @xvmod_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmod_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmod.h(<16 x i16> [[_1]], <16 x i16> [[_2]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvmod_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmod_h(_1, _2); }
-// CHECK-LABEL: @xvmod_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmod_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmod.w(<8 x i32> [[_1]], <8 x i32> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvmod_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmod_w(_1, _2); }
-// CHECK-LABEL: @xvmod_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmod_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmod.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvmod_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmod_d(_1, _2); }
-// CHECK-LABEL: @xvmod_bu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmod_bu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmod.bu(<32 x i8> [[_1]], <32 x i8> [[_2]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32u8 xvmod_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmod_bu(_1, _2); }
-// CHECK-LABEL: @xvmod_hu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmod_hu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmod.hu(<16 x i16> [[_1]], <16 x i16> [[_2]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16u16 xvmod_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmod_hu(_1, _2); }
-// CHECK-LABEL: @xvmod_wu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmod_wu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmod.wu(<8 x i32> [[_1]], <8 x i32> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8u32 xvmod_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmod_wu(_1, _2); }
-// CHECK-LABEL: @xvmod_du(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmod_du(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmod.du(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4u64 xvmod_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmod_du(_1, _2); }
-// CHECK-LABEL: @xvrepl128vei_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvrepl128vei_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(<32 x i8> [[_1]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvrepl128vei_b(v32i8 _1) { return __builtin_lasx_xvrepl128vei_b(_1, 1); }
-// CHECK-LABEL: @xvrepl128vei_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvrepl128vei_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h(<16 x i16> [[_1]], i32 1)
-// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvrepl128vei_h(v16i16 _1) { return __builtin_lasx_xvrepl128vei_h(_1, 1); }
-// CHECK-LABEL: @xvrepl128vei_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvrepl128vei_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w(<8 x i32> [[_1]], i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvrepl128vei_w(v8i32 _1) { return __builtin_lasx_xvrepl128vei_w(_1, 1); }
-// CHECK-LABEL: @xvrepl128vei_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvrepl128vei_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d(<4 x i64> [[_1]], i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvrepl128vei_d(v4i64 _1) { return __builtin_lasx_xvrepl128vei_d(_1, 1); }
-// CHECK-LABEL: @xvpickev_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvpickev_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpickev.b(<32 x i8> [[_1]], <32 x i8> [[_2]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvpickev_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvpickev_b(_1, _2); }
-// CHECK-LABEL: @xvpickev_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvpickev_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpickev.h(<16 x i16> [[_1]], <16 x i16> [[_2]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvpickev_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvpickev_h(_1, _2); }
-// CHECK-LABEL: @xvpickev_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvpickev_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickev.w(<8 x i32> [[_1]], <8 x i32> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvpickev_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvpickev_w(_1, _2); }
-// CHECK-LABEL: @xvpickev_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvpickev_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickev.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvpickev_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvpickev_d(_1, _2); }
-// CHECK-LABEL: @xvpickod_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvpickod_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpickod.b(<32 x i8> [[_1]], <32 x i8> [[_2]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvpickod_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvpickod_b(_1, _2); }
-// CHECK-LABEL: @xvpickod_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvpickod_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpickod.h(<16 x i16> [[_1]], <16 x i16> [[_2]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvpickod_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvpickod_h(_1, _2); }
-// CHECK-LABEL: @xvpickod_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvpickod_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickod.w(<8 x i32> [[_1]], <8 x i32> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvpickod_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvpickod_w(_1, _2); }
-// CHECK-LABEL: @xvpickod_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvpickod_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickod.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvpickod_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvpickod_d(_1, _2); }
-// CHECK-LABEL: @xvilvh_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvilvh_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvilvh.b(<32 x i8> [[_1]], <32 x i8> [[_2]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvilvh_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvilvh_b(_1, _2); }
-// CHECK-LABEL: @xvilvh_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvilvh_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvilvh.h(<16 x i16> [[_1]], <16 x i16> [[_2]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvilvh_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvilvh_h(_1, _2); }
-// CHECK-LABEL: @xvilvh_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvilvh_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvilvh.w(<8 x i32> [[_1]], <8 x i32> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvilvh_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvilvh_w(_1, _2); }
-// CHECK-LABEL: @xvilvh_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvilvh_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvilvh.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvilvh_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvilvh_d(_1, _2); }
-// CHECK-LABEL: @xvilvl_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvilvl_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvilvl.b(<32 x i8> [[_1]], <32 x i8> [[_2]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvilvl_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvilvl_b(_1, _2); }
-// CHECK-LABEL: @xvilvl_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvilvl_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvilvl.h(<16 x i16> [[_1]], <16 x i16> [[_2]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvilvl_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvilvl_h(_1, _2); }
-// CHECK-LABEL: @xvilvl_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvilvl_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvilvl.w(<8 x i32> [[_1]], <8 x i32> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvilvl_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvilvl_w(_1, _2); }
-// CHECK-LABEL: @xvilvl_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvilvl_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvilvl.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvilvl_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvilvl_d(_1, _2); }
-// CHECK-LABEL: @xvpackev_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvpackev_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpackev.b(<32 x i8> [[_1]], <32 x i8> [[_2]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvpackev_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvpackev_b(_1, _2); }
-// CHECK-LABEL: @xvpackev_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvpackev_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpackev.h(<16 x i16> [[_1]], <16 x i16> [[_2]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvpackev_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvpackev_h(_1, _2); }
-// CHECK-LABEL: @xvpackev_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvpackev_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpackev.w(<8 x i32> [[_1]], <8 x i32> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvpackev_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvpackev_w(_1, _2); }
-// CHECK-LABEL: @xvpackev_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvpackev_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpackev.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvpackev_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvpackev_d(_1, _2); }
-// CHECK-LABEL: @xvpackod_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvpackod_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpackod.b(<32 x i8> [[_1]], <32 x i8> [[_2]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvpackod_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvpackod_b(_1, _2); }
-// CHECK-LABEL: @xvpackod_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvpackod_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpackod.h(<16 x i16> [[_1]], <16 x i16> [[_2]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvpackod_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvpackod_h(_1, _2); }
-// CHECK-LABEL: @xvpackod_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvpackod_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpackod.w(<8 x i32> [[_1]], <8 x i32> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvpackod_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvpackod_w(_1, _2); }
-// CHECK-LABEL: @xvpackod_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvpackod_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpackod.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvpackod_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvpackod_d(_1, _2); }
-// CHECK-LABEL: @xvshuf_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvshuf_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_3:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvshuf.b(<32 x i8> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]])
-// CHECK-NEXT:    store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvshuf_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvshuf_b(_1, _2, _3); }
-// CHECK-LABEL: @xvshuf_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_3:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvshuf_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_3:%.*]] = load <16 x i16>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvshuf.h(<16 x i16> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]])
-// CHECK-NEXT:    store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvshuf_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvshuf_h(_1, _2, _3); }
-// CHECK-LABEL: @xvshuf_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_3:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvshuf_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_3:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvshuf.w(<8 x i32> [[_1]], <8 x i32> [[_2]], <8 x i32> [[_3]])
-// CHECK-NEXT:    store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvshuf_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __builtin_lasx_xvshuf_w(_1, _2, _3); }
-// CHECK-LABEL: @xvshuf_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvshuf_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_3:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvshuf.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]])
-// CHECK-NEXT:    store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvshuf_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __builtin_lasx_xvshuf_d(_1, _2, _3); }
-// CHECK-LABEL: @xvand_v(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvand_v(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvand.v(<32 x i8> [[_1]], <32 x i8> [[_2]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32u8 xvand_v(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvand_v(_1, _2); }
-// CHECK-LABEL: @xvandi_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvandi_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvandi.b(<32 x i8> [[_1]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32u8 xvandi_b(v32u8 _1) { return __builtin_lasx_xvandi_b(_1, 1); }
-// CHECK-LABEL: @xvor_v(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvor_v(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvor.v(<32 x i8> [[_1]], <32 x i8> [[_2]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32u8 xvor_v(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvor_v(_1, _2); }
-// CHECK-LABEL: @xvori_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvori_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvori.b(<32 x i8> [[_1]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32u8 xvori_b(v32u8 _1) { return __builtin_lasx_xvori_b(_1, 1); }
-// CHECK-LABEL: @xvnor_v(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvnor_v(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvnor.v(<32 x i8> [[_1]], <32 x i8> [[_2]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32u8 xvnor_v(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvnor_v(_1, _2); }
-// CHECK-LABEL: @xvnori_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvnori_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvnori.b(<32 x i8> [[_1]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32u8 xvnori_b(v32u8 _1) { return __builtin_lasx_xvnori_b(_1, 1); }
-// CHECK-LABEL: @xvxor_v(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvxor_v(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvxor.v(<32 x i8> [[_1]], <32 x i8> [[_2]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32u8 xvxor_v(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvxor_v(_1, _2); }
-// CHECK-LABEL: @xvxori_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvxori_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvxori.b(<32 x i8> [[_1]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32u8 xvxori_b(v32u8 _1) { return __builtin_lasx_xvxori_b(_1, 1); }
-// CHECK-LABEL: @xvbitsel_v(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvbitsel_v(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_3:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitsel.v(<32 x i8> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]])
-// CHECK-NEXT:    store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32u8 xvbitsel_v(v32u8 _1, v32u8 _2, v32u8 _3) { return __builtin_lasx_xvbitsel_v(_1, _2, _3); }
-// CHECK-LABEL: @xvbitseli_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvbitseli_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32u8 xvbitseli_b(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvbitseli_b(_1, _2, 1); }
-// CHECK-LABEL: @xvshuf4i_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvshuf4i_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(<32 x i8> [[_1]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvshuf4i_b(v32i8 _1) { return __builtin_lasx_xvshuf4i_b(_1, 1); }
-// CHECK-LABEL: @xvshuf4i_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvshuf4i_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h(<16 x i16> [[_1]], i32 1)
-// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvshuf4i_h(v16i16 _1) { return __builtin_lasx_xvshuf4i_h(_1, 1); }
-// CHECK-LABEL: @xvshuf4i_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvshuf4i_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w(<8 x i32> [[_1]], i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvshuf4i_w(v8i32 _1) { return __builtin_lasx_xvshuf4i_w(_1, 1); }
-// CHECK-LABEL: @xvreplgr2vr_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplgr2vr.b(i32 [[_1:%.*]])
-// CHECK-NEXT:    store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvreplgr2vr_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], i32 noundef signext [[_1:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplgr2vr.b(i32 [[_1]])
+// CHECK-NEXT:    store <32 x i8> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvreplgr2vr_b(int _1) { return __builtin_lasx_xvreplgr2vr_b(_1); }
-// CHECK-LABEL: @xvreplgr2vr_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplgr2vr.h(i32 [[_1:%.*]])
-// CHECK-NEXT:    store <16 x i16> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvreplgr2vr_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], i32 noundef signext [[_1:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplgr2vr.h(i32 [[_1]])
+// CHECK-NEXT:    store <16 x i16> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvreplgr2vr_h(int _1) { return __builtin_lasx_xvreplgr2vr_h(_1); }
-// CHECK-LABEL: @xvreplgr2vr_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplgr2vr.w(i32 [[_1:%.*]])
-// CHECK-NEXT:    store <8 x i32> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvreplgr2vr_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], i32 noundef signext [[_1:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplgr2vr.w(i32 [[_1]])
+// CHECK-NEXT:    store <8 x i32> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvreplgr2vr_w(int _1) { return __builtin_lasx_xvreplgr2vr_w(_1); }
-// CHECK-LABEL: @xvreplgr2vr_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[CONV:%.*]] = sext i32 [[_1:%.*]] to i64
+// CHECK-LABEL: define dso_local void @xvreplgr2vr_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], i32 noundef signext [[_1:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[CONV:%.*]] = sext i32 [[_1]] to i64
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplgr2vr.d(i64 [[CONV]])
-// CHECK-NEXT:    store <4 x i64> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvreplgr2vr_d(int _1) { return __builtin_lasx_xvreplgr2vr_d(_1); }
-// CHECK-LABEL: @xvpcnt_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvpcnt_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpcnt.b(<32 x i8> [[_1]])
-// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvpcnt_b(v32i8 _1) { return __builtin_lasx_xvpcnt_b(_1); }
-// CHECK-LABEL: @xvpcnt_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvpcnt_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpcnt.h(<16 x i16> [[_1]])
-// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvpcnt_h(v16i16 _1) { return __builtin_lasx_xvpcnt_h(_1); }
-// CHECK-LABEL: @xvpcnt_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvpcnt_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpcnt.w(<8 x i32> [[_1]])
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvpcnt_w(v8i32 _1) { return __builtin_lasx_xvpcnt_w(_1); }
-// CHECK-LABEL: @xvpcnt_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvpcnt_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpcnt.d(<4 x i64> [[_1]])
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvpcnt_d(v4i64 _1) { return __builtin_lasx_xvpcnt_d(_1); }
-// CHECK-LABEL: @xvclo_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvclo_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvclo.b(<32 x i8> [[_1]])
-// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvclo_b(v32i8 _1) { return __builtin_lasx_xvclo_b(_1); }
-// CHECK-LABEL: @xvclo_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvclo_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvclo.h(<16 x i16> [[_1]])
-// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvclo_h(v16i16 _1) { return __builtin_lasx_xvclo_h(_1); }
-// CHECK-LABEL: @xvclo_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvclo_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvclo.w(<8 x i32> [[_1]])
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvclo_w(v8i32 _1) { return __builtin_lasx_xvclo_w(_1); }
-// CHECK-LABEL: @xvclo_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvclo_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvclo.d(<4 x i64> [[_1]])
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvclo_d(v4i64 _1) { return __builtin_lasx_xvclo_d(_1); }
-// CHECK-LABEL: @xvclz_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvclz_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvclz.b(<32 x i8> [[_1]])
-// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvclz_b(v32i8 _1) { return __builtin_lasx_xvclz_b(_1); }
-// CHECK-LABEL: @xvclz_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvclz_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvclz.h(<16 x i16> [[_1]])
-// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvclz_h(v16i16 _1) { return __builtin_lasx_xvclz_h(_1); }
-// CHECK-LABEL: @xvclz_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvclz_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvclz.w(<8 x i32> [[_1]])
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvclz_w(v8i32 _1) { return __builtin_lasx_xvclz_w(_1); }
-// CHECK-LABEL: @xvclz_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvclz_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvclz.d(<4 x i64> [[_1]])
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvclz_d(v4i64 _1) { return __builtin_lasx_xvclz_d(_1); }
-// CHECK-LABEL: @xvfadd_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfadd_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfadd.s(<8 x float> [[_1]], <8 x float> [[_2]])
-// CHECK-NEXT:    store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8f32 xvfadd_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfadd_s(_1, _2); }
-// CHECK-LABEL: @xvfadd_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfadd_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfadd.d(<4 x double> [[_1]], <4 x double> [[_2]])
-// CHECK-NEXT:    store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x double> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4f64 xvfadd_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfadd_d(_1, _2); }
-// CHECK-LABEL: @xvfsub_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfsub_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfsub.s(<8 x float> [[_1]], <8 x float> [[_2]])
-// CHECK-NEXT:    store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8f32 xvfsub_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfsub_s(_1, _2); }
-// CHECK-LABEL: @xvfsub_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfsub_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfsub.d(<4 x double> [[_1]], <4 x double> [[_2]])
-// CHECK-NEXT:    store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x double> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4f64 xvfsub_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfsub_d(_1, _2); }
-// CHECK-LABEL: @xvfmul_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfmul_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmul.s(<8 x float> [[_1]], <8 x float> [[_2]])
-// CHECK-NEXT:    store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8f32 xvfmul_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfmul_s(_1, _2); }
-// CHECK-LABEL: @xvfmul_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfmul_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmul.d(<4 x double> [[_1]], <4 x double> [[_2]])
-// CHECK-NEXT:    store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x double> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4f64 xvfmul_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfmul_d(_1, _2); }
-// CHECK-LABEL: @xvfdiv_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfdiv_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfdiv.s(<8 x float> [[_1]], <8 x float> [[_2]])
-// CHECK-NEXT:    store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8f32 xvfdiv_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfdiv_s(_1, _2); }
-// CHECK-LABEL: @xvfdiv_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfdiv_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfdiv.d(<4 x double> [[_1]], <4 x double> [[_2]])
-// CHECK-NEXT:    store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x double> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4f64 xvfdiv_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfdiv_d(_1, _2); }
-// CHECK-LABEL: @xvfcvt_h_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcvt_h_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfcvt.h.s(<8 x float> [[_1]], <8 x float> [[_2]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvfcvt_h_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcvt_h_s(_1, _2); }
-// CHECK-LABEL: @xvfcvt_s_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcvt_s_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvt.s.d(<4 x double> [[_1]], <4 x double> [[_2]])
-// CHECK-NEXT:    store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8f32 xvfcvt_s_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcvt_s_d(_1, _2); }
-// CHECK-LABEL: @xvfmin_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfmin_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmin.s(<8 x float> [[_1]], <8 x float> [[_2]])
-// CHECK-NEXT:    store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8f32 xvfmin_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfmin_s(_1, _2); }
-// CHECK-LABEL: @xvfmin_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfmin_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmin.d(<4 x double> [[_1]], <4 x double> [[_2]])
-// CHECK-NEXT:    store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x double> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4f64 xvfmin_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfmin_d(_1, _2); }
-// CHECK-LABEL: @xvfmina_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfmina_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmina.s(<8 x float> [[_1]], <8 x float> [[_2]])
-// CHECK-NEXT:    store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8f32 xvfmina_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfmina_s(_1, _2); }
-// CHECK-LABEL: @xvfmina_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfmina_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmina.d(<4 x double> [[_1]], <4 x double> [[_2]])
-// CHECK-NEXT:    store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x double> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4f64 xvfmina_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfmina_d(_1, _2); }
-// CHECK-LABEL: @xvfmax_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfmax_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmax.s(<8 x float> [[_1]], <8 x float> [[_2]])
-// CHECK-NEXT:    store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8f32 xvfmax_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfmax_s(_1, _2); }
-// CHECK-LABEL: @xvfmax_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfmax_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmax.d(<4 x double> [[_1]], <4 x double> [[_2]])
-// CHECK-NEXT:    store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x double> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4f64 xvfmax_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfmax_d(_1, _2); }
-// CHECK-LABEL: @xvfmaxa_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfmaxa_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmaxa.s(<8 x float> [[_1]], <8 x float> [[_2]])
-// CHECK-NEXT:    store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8f32 xvfmaxa_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfmaxa_s(_1, _2); }
-// CHECK-LABEL: @xvfmaxa_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfmaxa_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmaxa.d(<4 x double> [[_1]], <4 x double> [[_2]])
-// CHECK-NEXT:    store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x double> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4f64 xvfmaxa_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfmaxa_d(_1, _2); }
-// CHECK-LABEL: @xvfclass_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfclass_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfclass.s(<8 x float> [[_1]])
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvfclass_s(v8f32 _1) { return __builtin_lasx_xvfclass_s(_1); }
-// CHECK-LABEL: @xvfclass_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfclass_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfclass.d(<4 x double> [[_1]])
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvfclass_d(v4f64 _1) { return __builtin_lasx_xvfclass_d(_1); }
-// CHECK-LABEL: @xvfsqrt_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfsqrt_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfsqrt.s(<8 x float> [[_1]])
-// CHECK-NEXT:    store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8f32 xvfsqrt_s(v8f32 _1) { return __builtin_lasx_xvfsqrt_s(_1); }
-// CHECK-LABEL: @xvfsqrt_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfsqrt_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfsqrt.d(<4 x double> [[_1]])
-// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4f64 xvfsqrt_d(v4f64 _1) { return __builtin_lasx_xvfsqrt_d(_1); }
-// CHECK-LABEL: @xvfrecip_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfrecip_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrecip.s(<8 x float> [[_1]])
-// CHECK-NEXT:    store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8f32 xvfrecip_s(v8f32 _1) { return __builtin_lasx_xvfrecip_s(_1); }
-// CHECK-LABEL: @xvfrecip_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfrecip_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrecip.d(<4 x double> [[_1]])
-// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4f64 xvfrecip_d(v4f64 _1) { return __builtin_lasx_xvfrecip_d(_1); }
-// CHECK-LABEL: @xvfrint_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfrint_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrint.s(<8 x float> [[_1]])
-// CHECK-NEXT:    store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8f32 xvfrint_s(v8f32 _1) { return __builtin_lasx_xvfrint_s(_1); }
-// CHECK-LABEL: @xvfrint_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfrint_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrint.d(<4 x double> [[_1]])
-// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4f64 xvfrint_d(v4f64 _1) { return __builtin_lasx_xvfrint_d(_1); }
-// CHECK-LABEL: @xvfrsqrt_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfrsqrt_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrsqrt.s(<8 x float> [[_1]])
-// CHECK-NEXT:    store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8f32 xvfrsqrt_s(v8f32 _1) { return __builtin_lasx_xvfrsqrt_s(_1); }
-// CHECK-LABEL: @xvfrsqrt_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfrsqrt_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrsqrt.d(<4 x double> [[_1]])
-// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4f64 xvfrsqrt_d(v4f64 _1) { return __builtin_lasx_xvfrsqrt_d(_1); }
-// CHECK-LABEL: @xvflogb_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvflogb_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvflogb.s(<8 x float> [[_1]])
-// CHECK-NEXT:    store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8f32 xvflogb_s(v8f32 _1) { return __builtin_lasx_xvflogb_s(_1); }
-// CHECK-LABEL: @xvflogb_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvflogb_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvflogb.d(<4 x double> [[_1]])
-// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4f64 xvflogb_d(v4f64 _1) { return __builtin_lasx_xvflogb_d(_1); }
-// CHECK-LABEL: @xvfcvth_s_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcvth_s_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvth.s.h(<16 x i16> [[_1]])
-// CHECK-NEXT:    store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8f32 xvfcvth_s_h(v16i16 _1) { return __builtin_lasx_xvfcvth_s_h(_1); }
-// CHECK-LABEL: @xvfcvth_d_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcvth_d_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfcvth.d.s(<8 x float> [[_1]])
-// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4f64 xvfcvth_d_s(v8f32 _1) { return __builtin_lasx_xvfcvth_d_s(_1); }
-// CHECK-LABEL: @xvfcvtl_s_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcvtl_s_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvtl.s.h(<16 x i16> [[_1]])
-// CHECK-NEXT:    store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8f32 xvfcvtl_s_h(v16i16 _1) { return __builtin_lasx_xvfcvtl_s_h(_1); }
-// CHECK-LABEL: @xvfcvtl_d_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcvtl_d_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfcvtl.d.s(<8 x float> [[_1]])
-// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4f64 xvfcvtl_d_s(v8f32 _1) { return __builtin_lasx_xvfcvtl_d_s(_1); }
-// CHECK-LABEL: @xvftint_w_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvftint_w_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.w.s(<8 x float> [[_1]])
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvftint_w_s(v8f32 _1) { return __builtin_lasx_xvftint_w_s(_1); }
-// CHECK-LABEL: @xvftint_l_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvftint_l_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftint.l.d(<4 x double> [[_1]])
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvftint_l_d(v4f64 _1) { return __builtin_lasx_xvftint_l_d(_1); }
-// CHECK-LABEL: @xvftint_wu_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvftint_wu_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.wu.s(<8 x float> [[_1]])
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8u32 xvftint_wu_s(v8f32 _1) { return __builtin_lasx_xvftint_wu_s(_1); }
-// CHECK-LABEL: @xvftint_lu_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvftint_lu_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftint.lu.d(<4 x double> [[_1]])
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4u64 xvftint_lu_d(v4f64 _1) { return __builtin_lasx_xvftint_lu_d(_1); }
-// CHECK-LABEL: @xvftintrz_w_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvftintrz_w_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.s(<8 x float> [[_1]])
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvftintrz_w_s(v8f32 _1) { return __builtin_lasx_xvftintrz_w_s(_1); }
-// CHECK-LABEL: @xvftintrz_l_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvftintrz_l_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrz.l.d(<4 x double> [[_1]])
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvftintrz_l_d(v4f64 _1) { return __builtin_lasx_xvftintrz_l_d(_1); }
-// CHECK-LABEL: @xvftintrz_wu_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvftintrz_wu_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.wu.s(<8 x float> [[_1]])
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8u32 xvftintrz_wu_s(v8f32 _1) { return __builtin_lasx_xvftintrz_wu_s(_1); }
-// CHECK-LABEL: @xvftintrz_lu_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvftintrz_lu_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrz.lu.d(<4 x double> [[_1]])
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4u64 xvftintrz_lu_d(v4f64 _1) { return __builtin_lasx_xvftintrz_lu_d(_1); }
-// CHECK-LABEL: @xvffint_s_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvffint_s_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.w(<8 x i32> [[_1]])
-// CHECK-NEXT:    store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8f32 xvffint_s_w(v8i32 _1) { return __builtin_lasx_xvffint_s_w(_1); }
-// CHECK-LABEL: @xvffint_d_l(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvffint_d_l(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffint.d.l(<4 x i64> [[_1]])
-// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4f64 xvffint_d_l(v4i64 _1) { return __builtin_lasx_xvffint_d_l(_1); }
-// CHECK-LABEL: @xvffint_s_wu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvffint_s_wu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.wu(<8 x i32> [[_1]])
-// CHECK-NEXT:    store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8f32 xvffint_s_wu(v8u32 _1) { return __builtin_lasx_xvffint_s_wu(_1); }
-// CHECK-LABEL: @xvffint_d_lu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvffint_d_lu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffint.d.lu(<4 x i64> [[_1]])
-// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4f64 xvffint_d_lu(v4u64 _1) { return __builtin_lasx_xvffint_d_lu(_1); }
-// CHECK-LABEL: @xvreplve_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve.b(<32 x i8> [[_1]], i32 [[_2:%.*]])
-// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvreplve_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], i32 noundef signext [[_2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve.b(<32 x i8> [[_1]], i32 [[_2]])
+// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvreplve_b(v32i8 _1, int _2) { return __builtin_lasx_xvreplve_b(_1, _2); }
-// CHECK-LABEL: @xvreplve_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplve.h(<16 x i16> [[_1]], i32 [[_2:%.*]])
-// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvreplve_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], i32 noundef signext [[_2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplve.h(<16 x i16> [[_1]], i32 [[_2]])
+// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvreplve_h(v16i16 _1, int _2) { return __builtin_lasx_xvreplve_h(_1, _2); }
-// CHECK-LABEL: @xvreplve_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplve.w(<8 x i32> [[_1]], i32 [[_2:%.*]])
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvreplve_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], i32 noundef signext [[_2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplve.w(<8 x i32> [[_1]], i32 [[_2]])
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvreplve_w(v8i32 _1, int _2) { return __builtin_lasx_xvreplve_w(_1, _2); }
-// CHECK-LABEL: @xvreplve_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplve.d(<4 x i64> [[_1]], i32 [[_2:%.*]])
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvreplve_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], i32 noundef signext [[_2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplve.d(<4 x i64> [[_1]], i32 [[_2]])
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvreplve_d(v4i64 _1, int _2) { return __builtin_lasx_xvreplve_d(_1, _2); }
-// CHECK-LABEL: @xvpermi_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvpermi_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpermi.w(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvpermi_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvpermi_w(_1, _2, 1); }
-// CHECK-LABEL: @xvandn_v(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvandn_v(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvandn.v(<32 x i8> [[_1]], <32 x i8> [[_2]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32u8 xvandn_v(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvandn_v(_1, _2); }
-// CHECK-LABEL: @xvneg_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvneg_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvneg.b(<32 x i8> [[_1]])
-// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvneg_b(v32i8 _1) { return __builtin_lasx_xvneg_b(_1); }
-// CHECK-LABEL: @xvneg_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvneg_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvneg.h(<16 x i16> [[_1]])
-// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvneg_h(v16i16 _1) { return __builtin_lasx_xvneg_h(_1); }
-// CHECK-LABEL: @xvneg_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvneg_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvneg.w(<8 x i32> [[_1]])
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvneg_w(v8i32 _1) { return __builtin_lasx_xvneg_w(_1); }
-// CHECK-LABEL: @xvneg_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvneg_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvneg.d(<4 x i64> [[_1]])
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvneg_d(v4i64 _1) { return __builtin_lasx_xvneg_d(_1); }
-// CHECK-LABEL: @xvmuh_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmuh_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmuh.b(<32 x i8> [[_1]], <32 x i8> [[_2]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvmuh_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmuh_b(_1, _2); }
-// CHECK-LABEL: @xvmuh_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmuh_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmuh.h(<16 x i16> [[_1]], <16 x i16> [[_2]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvmuh_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmuh_h(_1, _2); }
-// CHECK-LABEL: @xvmuh_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmuh_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmuh.w(<8 x i32> [[_1]], <8 x i32> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvmuh_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmuh_w(_1, _2); }
-// CHECK-LABEL: @xvmuh_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmuh_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmuh.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvmuh_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmuh_d(_1, _2); }
-// CHECK-LABEL: @xvmuh_bu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmuh_bu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmuh.bu(<32 x i8> [[_1]], <32 x i8> [[_2]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32u8 xvmuh_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmuh_bu(_1, _2); }
-// CHECK-LABEL: @xvmuh_hu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmuh_hu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmuh.hu(<16 x i16> [[_1]], <16 x i16> [[_2]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16u16 xvmuh_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmuh_hu(_1, _2); }
-// CHECK-LABEL: @xvmuh_wu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmuh_wu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmuh.wu(<8 x i32> [[_1]], <8 x i32> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8u32 xvmuh_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmuh_wu(_1, _2); }
-// CHECK-LABEL: @xvmuh_du(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmuh_du(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmuh.du(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4u64 xvmuh_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmuh_du(_1, _2); }
-// CHECK-LABEL: @xvsllwil_h_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsllwil_h_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(<32 x i8> [[_1]], i32 1)
-// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvsllwil_h_b(v32i8 _1) { return __builtin_lasx_xvsllwil_h_b(_1, 1); }
-// CHECK-LABEL: @xvsllwil_w_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsllwil_w_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h(<16 x i16> [[_1]], i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvsllwil_w_h(v16i16 _1) { return __builtin_lasx_xvsllwil_w_h(_1, 1); }
-// CHECK-LABEL: @xvsllwil_d_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsllwil_d_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w(<8 x i32> [[_1]], i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvsllwil_d_w(v8i32 _1) { return __builtin_lasx_xvsllwil_d_w(_1, 1); }
-// CHECK-LABEL: @xvsllwil_hu_bu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsllwil_hu_bu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu(<32 x i8> [[_1]], i32 1)
-// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16u16 xvsllwil_hu_bu(v32u8 _1) { return __builtin_lasx_xvsllwil_hu_bu(_1, 1); }
-// CHECK-LABEL: @xvsllwil_wu_hu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsllwil_wu_hu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu(<16 x i16> [[_1]], i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8u32 xvsllwil_wu_hu(v16u16 _1) { return __builtin_lasx_xvsllwil_wu_hu(_1, 1); }
-// CHECK-LABEL: @xvsllwil_du_wu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsllwil_du_wu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu(<8 x i32> [[_1]], i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4u64 xvsllwil_du_wu(v8u32 _1) { return __builtin_lasx_xvsllwil_du_wu(_1, 1); }
-// CHECK-LABEL: @xvsran_b_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsran_b_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsran.b.h(<16 x i16> [[_1]], <16 x i16> [[_2]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvsran_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsran_b_h(_1, _2); }
-// CHECK-LABEL: @xvsran_h_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsran_h_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsran.h.w(<8 x i32> [[_1]], <8 x i32> [[_2]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvsran_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsran_h_w(_1, _2); }
-// CHECK-LABEL: @xvsran_w_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsran_w_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsran.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvsran_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsran_w_d(_1, _2); }
-// CHECK-LABEL: @xvssran_b_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssran_b_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssran.b.h(<16 x i16> [[_1]], <16 x i16> [[_2]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvssran_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssran_b_h(_1, _2); }
-// CHECK-LABEL: @xvssran_h_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssran_h_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssran.h.w(<8 x i32> [[_1]], <8 x i32> [[_2]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvssran_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssran_h_w(_1, _2); }
-// CHECK-LABEL: @xvssran_w_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssran_w_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssran.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvssran_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssran_w_d(_1, _2); }
-// CHECK-LABEL: @xvssran_bu_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssran_bu_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssran.bu.h(<16 x i16> [[_1]], <16 x i16> [[_2]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32u8 xvssran_bu_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvssran_bu_h(_1, _2); }
-// CHECK-LABEL: @xvssran_hu_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssran_hu_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssran.hu.w(<8 x i32> [[_1]], <8 x i32> [[_2]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16u16 xvssran_hu_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvssran_hu_w(_1, _2); }
-// CHECK-LABEL: @xvssran_wu_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssran_wu_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssran.wu.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8u32 xvssran_wu_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvssran_wu_d(_1, _2); }
-// CHECK-LABEL: @xvsrarn_b_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrarn_b_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrarn.b.h(<16 x i16> [[_1]], <16 x i16> [[_2]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvsrarn_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrarn_b_h(_1, _2); }
-// CHECK-LABEL: @xvsrarn_h_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrarn_h_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrarn.h.w(<8 x i32> [[_1]], <8 x i32> [[_2]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvsrarn_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrarn_h_w(_1, _2); }
-// CHECK-LABEL: @xvsrarn_w_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrarn_w_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrarn.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvsrarn_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrarn_w_d(_1, _2); }
-// CHECK-LABEL: @xvssrarn_b_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrarn_b_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarn.b.h(<16 x i16> [[_1]], <16 x i16> [[_2]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvssrarn_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrarn_b_h(_1, _2); }
-// CHECK-LABEL: @xvssrarn_h_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrarn_h_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarn.h.w(<8 x i32> [[_1]], <8 x i32> [[_2]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvssrarn_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrarn_h_w(_1, _2); }
-// CHECK-LABEL: @xvssrarn_w_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrarn_w_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarn.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvssrarn_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrarn_w_d(_1, _2); }
-// CHECK-LABEL: @xvssrarn_bu_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrarn_bu_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarn.bu.h(<16 x i16> [[_1]], <16 x i16> [[_2]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32u8 xvssrarn_bu_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvssrarn_bu_h(_1, _2); }
-// CHECK-LABEL: @xvssrarn_hu_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrarn_hu_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarn.hu.w(<8 x i32> [[_1]], <8 x i32> [[_2]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16u16 xvssrarn_hu_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvssrarn_hu_w(_1, _2); }
-// CHECK-LABEL: @xvssrarn_wu_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrarn_wu_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarn.wu.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8u32 xvssrarn_wu_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvssrarn_wu_d(_1, _2); }
-// CHECK-LABEL: @xvsrln_b_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrln_b_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrln.b.h(<16 x i16> [[_1]], <16 x i16> [[_2]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvsrln_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrln_b_h(_1, _2); }
-// CHECK-LABEL: @xvsrln_h_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrln_h_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrln.h.w(<8 x i32> [[_1]], <8 x i32> [[_2]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvsrln_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrln_h_w(_1, _2); }
-// CHECK-LABEL: @xvsrln_w_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrln_w_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrln.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvsrln_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrln_w_d(_1, _2); }
-// CHECK-LABEL: @xvssrln_bu_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrln_bu_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrln.bu.h(<16 x i16> [[_1]], <16 x i16> [[_2]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32u8 xvssrln_bu_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvssrln_bu_h(_1, _2); }
-// CHECK-LABEL: @xvssrln_hu_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrln_hu_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrln.hu.w(<8 x i32> [[_1]], <8 x i32> [[_2]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16u16 xvssrln_hu_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvssrln_hu_w(_1, _2); }
-// CHECK-LABEL: @xvssrln_wu_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrln_wu_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrln.wu.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8u32 xvssrln_wu_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvssrln_wu_d(_1, _2); }
-// CHECK-LABEL: @xvsrlrn_b_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrlrn_b_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlrn.b.h(<16 x i16> [[_1]], <16 x i16> [[_2]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvsrlrn_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrlrn_b_h(_1, _2); }
-// CHECK-LABEL: @xvsrlrn_h_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrlrn_h_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlrn.h.w(<8 x i32> [[_1]], <8 x i32> [[_2]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvsrlrn_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrlrn_h_w(_1, _2); }
-// CHECK-LABEL: @xvsrlrn_w_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrlrn_w_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlrn.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvsrlrn_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrlrn_w_d(_1, _2); }
-// CHECK-LABEL: @xvssrlrn_bu_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrlrn_bu_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.bu.h(<16 x i16> [[_1]], <16 x i16> [[_2]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32u8 xvssrlrn_bu_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvssrlrn_bu_h(_1, _2); }
-// CHECK-LABEL: @xvssrlrn_hu_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrlrn_hu_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.hu.w(<8 x i32> [[_1]], <8 x i32> [[_2]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16u16 xvssrlrn_hu_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvssrlrn_hu_w(_1, _2); }
-// CHECK-LABEL: @xvssrlrn_wu_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrlrn_wu_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.wu.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8u32 xvssrlrn_wu_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvssrlrn_wu_d(_1, _2); }
-// CHECK-LABEL: @xvfrstpi_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfrstpi_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvfrstpi_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvfrstpi_b(_1, _2, 1); }
-// CHECK-LABEL: @xvfrstpi_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfrstpi_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1)
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvfrstpi_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvfrstpi_h(_1, _2, 1); }
-// CHECK-LABEL: @xvfrstp_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfrstp_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_3:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvfrstp.b(<32 x i8> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]])
-// CHECK-NEXT:    store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvfrstp_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvfrstp_b(_1, _2, _3); }
-// CHECK-LABEL: @xvfrstp_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_3:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfrstp_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_3:%.*]] = load <16 x i16>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfrstp.h(<16 x i16> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]])
-// CHECK-NEXT:    store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvfrstp_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvfrstp_h(_1, _2, _3); }
-// CHECK-LABEL: @xvshuf4i_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvshuf4i_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvshuf4i_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvshuf4i_d(_1, _2, 1); }
-// CHECK-LABEL: @xvbsrl_v(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvbsrl_v(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(<32 x i8> [[_1]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvbsrl_v(v32i8 _1) { return __builtin_lasx_xvbsrl_v(_1, 1); }
-// CHECK-LABEL: @xvbsll_v(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvbsll_v(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbsll.v(<32 x i8> [[_1]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvbsll_v(v32i8 _1) { return __builtin_lasx_xvbsll_v(_1, 1); }
-// CHECK-LABEL: @xvextrins_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvextrins_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvextrins.b(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvextrins_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvextrins_b(_1, _2, 1); }
-// CHECK-LABEL: @xvextrins_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvextrins_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvextrins.h(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1)
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvextrins_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvextrins_h(_1, _2, 1); }
-// CHECK-LABEL: @xvextrins_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvextrins_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvextrins.w(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvextrins_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvextrins_w(_1, _2, 1); }
-// CHECK-LABEL: @xvextrins_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvextrins_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextrins.d(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvextrins_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvextrins_d(_1, _2, 1); }
-// CHECK-LABEL: @xvmskltz_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmskltz_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmskltz.b(<32 x i8> [[_1]])
-// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvmskltz_b(v32i8 _1) { return __builtin_lasx_xvmskltz_b(_1); }
-// CHECK-LABEL: @xvmskltz_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmskltz_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmskltz.h(<16 x i16> [[_1]])
-// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvmskltz_h(v16i16 _1) { return __builtin_lasx_xvmskltz_h(_1); }
-// CHECK-LABEL: @xvmskltz_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmskltz_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmskltz.w(<8 x i32> [[_1]])
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvmskltz_w(v8i32 _1) { return __builtin_lasx_xvmskltz_w(_1); }
-// CHECK-LABEL: @xvmskltz_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmskltz_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmskltz.d(<4 x i64> [[_1]])
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvmskltz_d(v4i64 _1) { return __builtin_lasx_xvmskltz_d(_1); }
-// CHECK-LABEL: @xvsigncov_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsigncov_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsigncov.b(<32 x i8> [[_1]], <32 x i8> [[_2]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvsigncov_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsigncov_b(_1, _2); }
-// CHECK-LABEL: @xvsigncov_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsigncov_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsigncov.h(<16 x i16> [[_1]], <16 x i16> [[_2]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvsigncov_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsigncov_h(_1, _2); }
-// CHECK-LABEL: @xvsigncov_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsigncov_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsigncov.w(<8 x i32> [[_1]], <8 x i32> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvsigncov_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsigncov_w(_1, _2); }
-// CHECK-LABEL: @xvsigncov_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsigncov_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsigncov.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvsigncov_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsigncov_d(_1, _2); }
-// CHECK-LABEL: @xvfmadd_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_3:%.*]] = load <8 x float>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfmadd_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_3:%.*]] = load <8 x float>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmadd.s(<8 x float> [[_1]], <8 x float> [[_2]], <8 x float> [[_3]])
-// CHECK-NEXT:    store <8 x float> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x float> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8f32 xvfmadd_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __builtin_lasx_xvfmadd_s(_1, _2, _3); }
-// CHECK-LABEL: @xvfmadd_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_3:%.*]] = load <4 x double>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfmadd_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_3:%.*]] = load <4 x double>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmadd.d(<4 x double> [[_1]], <4 x double> [[_2]], <4 x double> [[_3]])
-// CHECK-NEXT:    store <4 x double> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x double> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4f64 xvfmadd_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __builtin_lasx_xvfmadd_d(_1, _2, _3); }
-// CHECK-LABEL: @xvfmsub_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_3:%.*]] = load <8 x float>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfmsub_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_3:%.*]] = load <8 x float>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmsub.s(<8 x float> [[_1]], <8 x float> [[_2]], <8 x float> [[_3]])
-// CHECK-NEXT:    store <8 x float> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x float> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8f32 xvfmsub_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __builtin_lasx_xvfmsub_s(_1, _2, _3); }
-// CHECK-LABEL: @xvfmsub_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_3:%.*]] = load <4 x double>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfmsub_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_3:%.*]] = load <4 x double>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmsub.d(<4 x double> [[_1]], <4 x double> [[_2]], <4 x double> [[_3]])
-// CHECK-NEXT:    store <4 x double> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x double> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4f64 xvfmsub_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __builtin_lasx_xvfmsub_d(_1, _2, _3); }
-// CHECK-LABEL: @xvfnmadd_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_3:%.*]] = load <8 x float>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfnmadd_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_3:%.*]] = load <8 x float>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfnmadd.s(<8 x float> [[_1]], <8 x float> [[_2]], <8 x float> [[_3]])
-// CHECK-NEXT:    store <8 x float> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x float> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8f32 xvfnmadd_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __builtin_lasx_xvfnmadd_s(_1, _2, _3); }
-// CHECK-LABEL: @xvfnmadd_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_3:%.*]] = load <4 x double>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfnmadd_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_3:%.*]] = load <4 x double>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfnmadd.d(<4 x double> [[_1]], <4 x double> [[_2]], <4 x double> [[_3]])
-// CHECK-NEXT:    store <4 x double> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x double> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4f64 xvfnmadd_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __builtin_lasx_xvfnmadd_d(_1, _2, _3); }
-// CHECK-LABEL: @xvfnmsub_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_3:%.*]] = load <8 x float>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfnmsub_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_3:%.*]] = load <8 x float>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfnmsub.s(<8 x float> [[_1]], <8 x float> [[_2]], <8 x float> [[_3]])
-// CHECK-NEXT:    store <8 x float> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x float> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8f32 xvfnmsub_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __builtin_lasx_xvfnmsub_s(_1, _2, _3); }
-// CHECK-LABEL: @xvfnmsub_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_3:%.*]] = load <4 x double>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfnmsub_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_3:%.*]] = load <4 x double>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfnmsub.d(<4 x double> [[_1]], <4 x double> [[_2]], <4 x double> [[_3]])
-// CHECK-NEXT:    store <4 x double> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x double> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4f64 xvfnmsub_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __builtin_lasx_xvfnmsub_d(_1, _2, _3); }
-// CHECK-LABEL: @xvftintrne_w_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvftintrne_w_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.s(<8 x float> [[_1]])
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvftintrne_w_s(v8f32 _1) { return __builtin_lasx_xvftintrne_w_s(_1); }
-// CHECK-LABEL: @xvftintrne_l_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvftintrne_l_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrne.l.d(<4 x double> [[_1]])
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvftintrne_l_d(v4f64 _1) { return __builtin_lasx_xvftintrne_l_d(_1); }
-// CHECK-LABEL: @xvftintrp_w_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvftintrp_w_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.s(<8 x float> [[_1]])
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvftintrp_w_s(v8f32 _1) { return __builtin_lasx_xvftintrp_w_s(_1); }
-// CHECK-LABEL: @xvftintrp_l_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvftintrp_l_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrp.l.d(<4 x double> [[_1]])
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvftintrp_l_d(v4f64 _1) { return __builtin_lasx_xvftintrp_l_d(_1); }
-// CHECK-LABEL: @xvftintrm_w_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvftintrm_w_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.s(<8 x float> [[_1]])
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvftintrm_w_s(v8f32 _1) { return __builtin_lasx_xvftintrm_w_s(_1); }
-// CHECK-LABEL: @xvftintrm_l_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvftintrm_l_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrm.l.d(<4 x double> [[_1]])
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvftintrm_l_d(v4f64 _1) { return __builtin_lasx_xvftintrm_l_d(_1); }
-// CHECK-LABEL: @xvftint_w_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvftint_w_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.w.d(<4 x double> [[_1]], <4 x double> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvftint_w_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvftint_w_d(_1, _2); }
-// CHECK-LABEL: @xvffint_s_l(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvffint_s_l(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.l(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8f32 xvffint_s_l(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvffint_s_l(_1, _2); }
-// CHECK-LABEL: @xvftintrz_w_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvftintrz_w_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.d(<4 x double> [[_1]], <4 x double> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvftintrz_w_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvftintrz_w_d(_1, _2); }
-// CHECK-LABEL: @xvftintrp_w_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvftintrp_w_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.d(<4 x double> [[_1]], <4 x double> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvftintrp_w_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvftintrp_w_d(_1, _2); }
-// CHECK-LABEL: @xvftintrm_w_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvftintrm_w_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.d(<4 x double> [[_1]], <4 x double> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvftintrm_w_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvftintrm_w_d(_1, _2); }
-// CHECK-LABEL: @xvftintrne_w_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvftintrne_w_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.d(<4 x double> [[_1]], <4 x double> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvftintrne_w_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvftintrne_w_d(_1, _2); }
-// CHECK-LABEL: @xvftinth_l_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvftinth_l_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftinth.l.s(<8 x float> [[_1]])
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvftinth_l_s(v8f32 _1) { return __builtin_lasx_xvftinth_l_s(_1); }
-// CHECK-LABEL: @xvftintl_l_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvftintl_l_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintl.l.s(<8 x float> [[_1]])
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvftintl_l_s(v8f32 _1) { return __builtin_lasx_xvftintl_l_s(_1); }
-// CHECK-LABEL: @xvffinth_d_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvffinth_d_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffinth.d.w(<8 x i32> [[_1]])
-// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4f64 xvffinth_d_w(v8i32 _1) { return __builtin_lasx_xvffinth_d_w(_1); }
-// CHECK-LABEL: @xvffintl_d_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvffintl_d_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffintl.d.w(<8 x i32> [[_1]])
-// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4f64 xvffintl_d_w(v8i32 _1) { return __builtin_lasx_xvffintl_d_w(_1); }
-// CHECK-LABEL: @xvftintrzh_l_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvftintrzh_l_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrzh.l.s(<8 x float> [[_1]])
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvftintrzh_l_s(v8f32 _1) { return __builtin_lasx_xvftintrzh_l_s(_1); }
-// CHECK-LABEL: @xvftintrzl_l_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvftintrzl_l_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrzl.l.s(<8 x float> [[_1]])
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvftintrzl_l_s(v8f32 _1) { return __builtin_lasx_xvftintrzl_l_s(_1); }
-// CHECK-LABEL: @xvftintrph_l_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvftintrph_l_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrph.l.s(<8 x float> [[_1]])
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvftintrph_l_s(v8f32 _1) { return __builtin_lasx_xvftintrph_l_s(_1); }
-// CHECK-LABEL: @xvftintrpl_l_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvftintrpl_l_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrpl.l.s(<8 x float> [[_1]])
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvftintrpl_l_s(v8f32 _1) { return __builtin_lasx_xvftintrpl_l_s(_1); }
-// CHECK-LABEL: @xvftintrmh_l_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvftintrmh_l_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrmh.l.s(<8 x float> [[_1]])
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvftintrmh_l_s(v8f32 _1) { return __builtin_lasx_xvftintrmh_l_s(_1); }
-// CHECK-LABEL: @xvftintrml_l_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvftintrml_l_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrml.l.s(<8 x float> [[_1]])
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvftintrml_l_s(v8f32 _1) { return __builtin_lasx_xvftintrml_l_s(_1); }
-// CHECK-LABEL: @xvftintrneh_l_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvftintrneh_l_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrneh.l.s(<8 x float> [[_1]])
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvftintrneh_l_s(v8f32 _1) { return __builtin_lasx_xvftintrneh_l_s(_1); }
-// CHECK-LABEL: @xvftintrnel_l_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvftintrnel_l_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrnel.l.s(<8 x float> [[_1]])
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvftintrnel_l_s(v8f32 _1) { return __builtin_lasx_xvftintrnel_l_s(_1); }
-// CHECK-LABEL: @xvfrintrne_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfrintrne_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrne.s(<8 x float> [[_1]])
-// CHECK-NEXT:    store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvfrintrne_s(v8f32 _1) { return __builtin_lasx_xvfrintrne_s(_1); }
-// CHECK-LABEL: @xvfrintrne_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfrintrne_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrne.d(<4 x double> [[_1]])
-// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvfrintrne_d(v4f64 _1) { return __builtin_lasx_xvfrintrne_d(_1); }
-// CHECK-LABEL: @xvfrintrz_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfrintrz_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrz.s(<8 x float> [[_1]])
-// CHECK-NEXT:    store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvfrintrz_s(v8f32 _1) { return __builtin_lasx_xvfrintrz_s(_1); }
-// CHECK-LABEL: @xvfrintrz_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfrintrz_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrz.d(<4 x double> [[_1]])
-// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvfrintrz_d(v4f64 _1) { return __builtin_lasx_xvfrintrz_d(_1); }
-// CHECK-LABEL: @xvfrintrp_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfrintrp_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrp.s(<8 x float> [[_1]])
-// CHECK-NEXT:    store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvfrintrp_s(v8f32 _1) { return __builtin_lasx_xvfrintrp_s(_1); }
-// CHECK-LABEL: @xvfrintrp_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfrintrp_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrp.d(<4 x double> [[_1]])
-// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvfrintrp_d(v4f64 _1) { return __builtin_lasx_xvfrintrp_d(_1); }
-// CHECK-LABEL: @xvfrintrm_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfrintrm_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrm.s(<8 x float> [[_1]])
-// CHECK-NEXT:    store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvfrintrm_s(v8f32 _1) { return __builtin_lasx_xvfrintrm_s(_1); }
-// CHECK-LABEL: @xvfrintrm_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfrintrm_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrm.d(<4 x double> [[_1]])
-// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvfrintrm_d(v4f64 _1) { return __builtin_lasx_xvfrintrm_d(_1); }
-// CHECK-LABEL: @xvld(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvld(ptr [[_1:%.*]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvld(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr noundef [[_1:%.*]]) local_unnamed_addr #[[ATTR3:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvld(ptr [[_1]], i32 1)
+// CHECK-NEXT:    store <32 x i8> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvld(void *_1) { return __builtin_lasx_xvld(_1, 1); }
-// CHECK-LABEL: @xvst(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    tail call void @llvm.loongarch.lasx.xvst(<32 x i8> [[_1]], ptr [[_2:%.*]], i32 1)
+// CHECK-LABEL: define dso_local void @xvst(
+// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr noundef [[_2:%.*]]) local_unnamed_addr #[[ATTR5:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    tail call void @llvm.loongarch.lasx.xvst(<32 x i8> [[_1]], ptr [[_2]], i32 1)
 // CHECK-NEXT:    ret void
 //
 void xvst(v32i8 _1, void *_2) { return __builtin_lasx_xvst(_1, _2, 1); }
-// CHECK-LABEL: @xvstelm_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    tail call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> [[_1]], ptr [[_2:%.*]], i32 1, i32 1)
+// CHECK-LABEL: define dso_local void @xvstelm_b(
+// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr noundef [[_2:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    tail call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> [[_1]], ptr [[_2]], i32 1, i32 1)
 // CHECK-NEXT:    ret void
 //
 void xvstelm_b(v32i8 _1, void * _2) { return __builtin_lasx_xvstelm_b(_1, _2, 1, 1); }
-// CHECK-LABEL: @xvstelm_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    tail call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> [[_1]], ptr [[_2:%.*]], i32 2, i32 1)
+// CHECK-LABEL: define dso_local void @xvstelm_h(
+// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr noundef [[_2:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    tail call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> [[_1]], ptr [[_2]], i32 2, i32 1)
 // CHECK-NEXT:    ret void
 //
 void xvstelm_h(v16i16 _1, void * _2) { return __builtin_lasx_xvstelm_h(_1, _2, 2, 1); }
-// CHECK-LABEL: @xvstelm_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    tail call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> [[_1]], ptr [[_2:%.*]], i32 4, i32 1)
+// CHECK-LABEL: define dso_local void @xvstelm_w(
+// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr noundef [[_2:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    tail call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> [[_1]], ptr [[_2]], i32 4, i32 1)
 // CHECK-NEXT:    ret void
 //
 void xvstelm_w(v8i32 _1, void * _2) { return __builtin_lasx_xvstelm_w(_1, _2, 4, 1); }
-// CHECK-LABEL: @xvstelm_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    tail call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> [[_1]], ptr [[_2:%.*]], i32 8, i32 1)
+// CHECK-LABEL: define dso_local void @xvstelm_d(
+// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr noundef [[_2:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    tail call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> [[_1]], ptr [[_2]], i32 8, i32 1)
 // CHECK-NEXT:    ret void
 //
 void xvstelm_d(v4i64 _1, void * _2) { return __builtin_lasx_xvstelm_d(_1, _2, 8, 1); }
-// CHECK-LABEL: @xvinsve0_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvinsve0_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvinsve0_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvinsve0_w(_1, _2, 1); }
-// CHECK-LABEL: @xvinsve0_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvinsve0_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvinsve0.d(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvinsve0_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvinsve0_d(_1, _2, 1); }
-// CHECK-LABEL: @xvpickve_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvpickve_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickve.w(<8 x i32> [[_1]], i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvpickve_w(v8i32 _1) { return __builtin_lasx_xvpickve_w(_1, 1); }
-// CHECK-LABEL: @xvpickve_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvpickve_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickve.d(<4 x i64> [[_1]], i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvpickve_d(v4i64 _1) { return __builtin_lasx_xvpickve_d(_1, 1); }
-// CHECK-LABEL: @xvssrlrn_b_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrlrn_b_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.b.h(<16 x i16> [[_1]], <16 x i16> [[_2]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvssrlrn_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrlrn_b_h(_1, _2); }
-// CHECK-LABEL: @xvssrlrn_h_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrlrn_h_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.h.w(<8 x i32> [[_1]], <8 x i32> [[_2]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvssrlrn_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrlrn_h_w(_1, _2); }
-// CHECK-LABEL: @xvssrlrn_w_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrlrn_w_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvssrlrn_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrlrn_w_d(_1, _2); }
-// CHECK-LABEL: @xvssrln_b_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrln_b_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrln.b.h(<16 x i16> [[_1]], <16 x i16> [[_2]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvssrln_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrln_b_h(_1, _2); }
-// CHECK-LABEL: @xvssrln_h_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrln_h_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrln.h.w(<8 x i32> [[_1]], <8 x i32> [[_2]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvssrln_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrln_h_w(_1, _2); }
-// CHECK-LABEL: @xvssrln_w_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrln_w_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrln.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvssrln_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrln_w_d(_1, _2); }
-// CHECK-LABEL: @xvorn_v(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvorn_v(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvorn.v(<32 x i8> [[_1]], <32 x i8> [[_2]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvorn_v(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvorn_v(_1, _2); }
-// CHECK-LABEL: @xvldi(
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define dso_local void @xvldi(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvldi(i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvldi() { return __builtin_lasx_xvldi(1); }
-// CHECK-LABEL: @xvldx(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvldx(ptr [[_1:%.*]], i64 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvldx(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr noundef [[_1:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvldx(ptr [[_1]], i64 1)
+// CHECK-NEXT:    store <32 x i8> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvldx(void *_1) { return __builtin_lasx_xvldx(_1, 1); }
-// CHECK-LABEL: @xvstx(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    tail call void @llvm.loongarch.lasx.xvstx(<32 x i8> [[_1]], ptr [[_2:%.*]], i64 1)
+// CHECK-LABEL: define dso_local void @xvstx(
+// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr noundef [[_2:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    tail call void @llvm.loongarch.lasx.xvstx(<32 x i8> [[_1]], ptr [[_2]], i64 1)
 // CHECK-NEXT:    ret void
 //
 void xvstx(v32i8 _1, void *_2) { return __builtin_lasx_xvstx(_1, _2, 1); }
-// CHECK-LABEL: @xvextl_qu_du(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvextl_qu_du(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextl.qu.du(<4 x i64> [[_1]])
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4u64 xvextl_qu_du(v4u64 _1) { return __builtin_lasx_xvextl_qu_du(_1); }
-// CHECK-LABEL: @xvinsgr2vr_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvinsgr2vr_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32> [[_1]], i32 1, i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvinsgr2vr_w(v8i32 _1) { return __builtin_lasx_xvinsgr2vr_w(_1, 1, 1); }
-// CHECK-LABEL: @xvinsgr2vr_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvinsgr2vr_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64> [[_1]], i64 1, i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvinsgr2vr_d(v4i64 _1) { return __builtin_lasx_xvinsgr2vr_d(_1, 1, 1); }
-// CHECK-LABEL: @xvreplve0_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvreplve0_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve0.b(<32 x i8> [[_1]])
-// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvreplve0_b(v32i8 _1) { return __builtin_lasx_xvreplve0_b(_1); }
-// CHECK-LABEL: @xvreplve0_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvreplve0_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplve0.h(<16 x i16> [[_1]])
-// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvreplve0_h(v16i16 _1) { return __builtin_lasx_xvreplve0_h(_1); }
-// CHECK-LABEL: @xvreplve0_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvreplve0_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplve0.w(<8 x i32> [[_1]])
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvreplve0_w(v8i32 _1) { return __builtin_lasx_xvreplve0_w(_1); }
-// CHECK-LABEL: @xvreplve0_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvreplve0_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplve0.d(<4 x i64> [[_1]])
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvreplve0_d(v4i64 _1) { return __builtin_lasx_xvreplve0_d(_1); }
-// CHECK-LABEL: @xvreplve0_q(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvreplve0_q(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve0.q(<32 x i8> [[_1]])
-// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvreplve0_q(v32i8 _1) { return __builtin_lasx_xvreplve0_q(_1); }
-// CHECK-LABEL: @vext2xv_h_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @vext2xv_h_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.vext2xv.h.b(<32 x i8> [[_1]])
-// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 vext2xv_h_b(v32i8 _1) { return __builtin_lasx_vext2xv_h_b(_1); }
-// CHECK-LABEL: @vext2xv_w_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @vext2xv_w_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.h(<16 x i16> [[_1]])
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 vext2xv_w_h(v16i16 _1) { return __builtin_lasx_vext2xv_w_h(_1); }
-// CHECK-LABEL: @vext2xv_d_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @vext2xv_d_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.w(<8 x i32> [[_1]])
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 vext2xv_d_w(v8i32 _1) { return __builtin_lasx_vext2xv_d_w(_1); }
-// CHECK-LABEL: @vext2xv_w_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @vext2xv_w_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.b(<32 x i8> [[_1]])
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 vext2xv_w_b(v32i8 _1) { return __builtin_lasx_vext2xv_w_b(_1); }
-// CHECK-LABEL: @vext2xv_d_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @vext2xv_d_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.h(<16 x i16> [[_1]])
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 vext2xv_d_h(v16i16 _1) { return __builtin_lasx_vext2xv_d_h(_1); }
-// CHECK-LABEL: @vext2xv_d_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @vext2xv_d_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.b(<32 x i8> [[_1]])
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 vext2xv_d_b(v32i8 _1) { return __builtin_lasx_vext2xv_d_b(_1); }
-// CHECK-LABEL: @vext2xv_hu_bu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @vext2xv_hu_bu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.vext2xv.hu.bu(<32 x i8> [[_1]])
-// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 vext2xv_hu_bu(v32i8 _1) { return __builtin_lasx_vext2xv_hu_bu(_1); }
-// CHECK-LABEL: @vext2xv_wu_hu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @vext2xv_wu_hu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.hu(<16 x i16> [[_1]])
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 vext2xv_wu_hu(v16i16 _1) { return __builtin_lasx_vext2xv_wu_hu(_1); }
-// CHECK-LABEL: @vext2xv_du_wu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @vext2xv_du_wu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.wu(<8 x i32> [[_1]])
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 vext2xv_du_wu(v8i32 _1) { return __builtin_lasx_vext2xv_du_wu(_1); }
-// CHECK-LABEL: @vext2xv_wu_bu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @vext2xv_wu_bu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.bu(<32 x i8> [[_1]])
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 vext2xv_wu_bu(v32i8 _1) { return __builtin_lasx_vext2xv_wu_bu(_1); }
-// CHECK-LABEL: @vext2xv_du_hu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @vext2xv_du_hu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.hu(<16 x i16> [[_1]])
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 vext2xv_du_hu(v16i16 _1) { return __builtin_lasx_vext2xv_du_hu(_1); }
-// CHECK-LABEL: @vext2xv_du_bu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @vext2xv_du_bu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.bu(<32 x i8> [[_1]])
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 vext2xv_du_bu(v32i8 _1) { return __builtin_lasx_vext2xv_du_bu(_1); }
-// CHECK-LABEL: @xvpermi_q(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvpermi_q(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpermi.q(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvpermi_q(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvpermi_q(_1, _2, 1); }
-// CHECK-LABEL: @xvpermi_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvpermi_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpermi.d(<4 x i64> [[_1]], i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvpermi_d(v4i64 _1) { return __builtin_lasx_xvpermi_d(_1, 1); }
-// CHECK-LABEL: @xvperm_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvperm_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvperm.w(<8 x i32> [[_1]], <8 x i32> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvperm_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvperm_w(_1, _2); }
-// CHECK-LABEL: @xvldrepl_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(ptr [[_1:%.*]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvldrepl_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr noundef [[_1:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(ptr [[_1]], i32 1)
+// CHECK-NEXT:    store <32 x i8> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvldrepl_b(void *_1) { return __builtin_lasx_xvldrepl_b(_1, 1); }
-// CHECK-LABEL: @xvldrepl_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(ptr [[_1:%.*]], i32 2)
-// CHECK-NEXT:    store <16 x i16> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvldrepl_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr noundef [[_1:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(ptr [[_1]], i32 2)
+// CHECK-NEXT:    store <16 x i16> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvldrepl_h(void *_1) { return __builtin_lasx_xvldrepl_h(_1, 2); }
-// CHECK-LABEL: @xvldrepl_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(ptr [[_1:%.*]], i32 4)
-// CHECK-NEXT:    store <8 x i32> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvldrepl_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr noundef [[_1:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(ptr [[_1]], i32 4)
+// CHECK-NEXT:    store <8 x i32> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvldrepl_w(void *_1) { return __builtin_lasx_xvldrepl_w(_1, 4); }
-// CHECK-LABEL: @xvldrepl_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(ptr [[_1:%.*]], i32 8)
-// CHECK-NEXT:    store <4 x i64> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvldrepl_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr noundef [[_1:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(ptr [[_1]], i32 8)
+// CHECK-NEXT:    store <4 x i64> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvldrepl_d(void *_1) { return __builtin_lasx_xvldrepl_d(_1, 8); }
-// CHECK-LABEL: @xvpickve2gr_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local signext i32 @xvpickve2gr_w(
+// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xvpickve2gr.w(<8 x i32> [[_1]], i32 1)
 // CHECK-NEXT:    ret i32 [[TMP1]]
 //
 int xvpickve2gr_w(v8i32 _1) { return __builtin_lasx_xvpickve2gr_w(_1, 1); }
-// CHECK-LABEL: @xvpickve2gr_wu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local signext i32 @xvpickve2gr_wu(
+// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32> [[_1]], i32 1)
 // CHECK-NEXT:    ret i32 [[TMP1]]
 //
 unsigned int xvpickve2gr_wu(v8i32 _1) { return __builtin_lasx_xvpickve2gr_wu(_1, 1); }
-// CHECK-LABEL: @xvpickve2gr_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local i64 @xvpickve2gr_d(
+// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64> [[_1]], i32 1)
 // CHECK-NEXT:    ret i64 [[TMP1]]
 //
 long xvpickve2gr_d(v4i64 _1) { return __builtin_lasx_xvpickve2gr_d(_1, 1); }
-// CHECK-LABEL: @xvpickve2gr_du(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local i64 @xvpickve2gr_du(
+// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64> [[_1]], i32 1)
 // CHECK-NEXT:    ret i64 [[TMP1]]
 //
 unsigned long int xvpickve2gr_du(v4i64 _1) { return __builtin_lasx_xvpickve2gr_du(_1, 1); }
-// CHECK-LABEL: @xvaddwev_q_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvaddwev_q_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvaddwev_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvaddwev_q_d(_1, _2); }
-// CHECK-LABEL: @xvaddwev_d_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvaddwev_d_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.w(<8 x i32> [[_1]], <8 x i32> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvaddwev_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvaddwev_d_w(_1, _2); }
-// CHECK-LABEL: @xvaddwev_w_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvaddwev_w_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.h(<16 x i16> [[_1]], <16 x i16> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvaddwev_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvaddwev_w_h(_1, _2); }
-// CHECK-LABEL: @xvaddwev_h_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvaddwev_h_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.b(<32 x i8> [[_1]], <32 x i8> [[_2]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvaddwev_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvaddwev_h_b(_1, _2); }
-// CHECK-LABEL: @xvaddwev_q_du(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvaddwev_q_du(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvaddwev_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvaddwev_q_du(_1, _2); }
-// CHECK-LABEL: @xvaddwev_d_wu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvaddwev_d_wu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu(<8 x i32> [[_1]], <8 x i32> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvaddwev_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvaddwev_d_wu(_1, _2); }
-// CHECK-LABEL: @xvaddwev_w_hu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvaddwev_w_hu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu(<16 x i16> [[_1]], <16 x i16> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvaddwev_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvaddwev_w_hu(_1, _2); }
-// CHECK-LABEL: @xvaddwev_h_bu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvaddwev_h_bu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu(<32 x i8> [[_1]], <32 x i8> [[_2]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvaddwev_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvaddwev_h_bu(_1, _2); }
-// CHECK-LABEL: @xvsubwev_q_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsubwev_q_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvsubwev_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsubwev_q_d(_1, _2); }
-// CHECK-LABEL: @xvsubwev_d_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsubwev_d_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.w(<8 x i32> [[_1]], <8 x i32> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvsubwev_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsubwev_d_w(_1, _2); }
-// CHECK-LABEL: @xvsubwev_w_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsubwev_w_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.h(<16 x i16> [[_1]], <16 x i16> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvsubwev_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsubwev_w_h(_1, _2); }
-// CHECK-LABEL: @xvsubwev_h_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsubwev_h_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.b(<32 x i8> [[_1]], <32 x i8> [[_2]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvsubwev_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsubwev_h_b(_1, _2); }
-// CHECK-LABEL: @xvsubwev_q_du(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsubwev_q_du(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvsubwev_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvsubwev_q_du(_1, _2); }
-// CHECK-LABEL: @xvsubwev_d_wu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsubwev_d_wu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.wu(<8 x i32> [[_1]], <8 x i32> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvsubwev_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvsubwev_d_wu(_1, _2); }
-// CHECK-LABEL: @xvsubwev_w_hu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsubwev_w_hu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.hu(<16 x i16> [[_1]], <16 x i16> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvsubwev_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvsubwev_w_hu(_1, _2); }
-// CHECK-LABEL: @xvsubwev_h_bu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsubwev_h_bu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.bu(<32 x i8> [[_1]], <32 x i8> [[_2]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvsubwev_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvsubwev_h_bu(_1, _2); }
-// CHECK-LABEL: @xvmulwev_q_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmulwev_q_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvmulwev_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmulwev_q_d(_1, _2); }
-// CHECK-LABEL: @xvmulwev_d_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmulwev_d_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.w(<8 x i32> [[_1]], <8 x i32> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvmulwev_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmulwev_d_w(_1, _2); }
-// CHECK-LABEL: @xvmulwev_w_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmulwev_w_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.h(<16 x i16> [[_1]], <16 x i16> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvmulwev_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmulwev_w_h(_1, _2); }
-// CHECK-LABEL: @xvmulwev_h_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmulwev_h_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.b(<32 x i8> [[_1]], <32 x i8> [[_2]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvmulwev_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmulwev_h_b(_1, _2); }
-// CHECK-LABEL: @xvmulwev_q_du(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmulwev_q_du(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvmulwev_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmulwev_q_du(_1, _2); }
-// CHECK-LABEL: @xvmulwev_d_wu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmulwev_d_wu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu(<8 x i32> [[_1]], <8 x i32> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvmulwev_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmulwev_d_wu(_1, _2); }
-// CHECK-LABEL: @xvmulwev_w_hu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmulwev_w_hu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu(<16 x i16> [[_1]], <16 x i16> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvmulwev_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmulwev_w_hu(_1, _2); }
-// CHECK-LABEL: @xvmulwev_h_bu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmulwev_h_bu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu(<32 x i8> [[_1]], <32 x i8> [[_2]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvmulwev_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmulwev_h_bu(_1, _2); }
-// CHECK-LABEL: @xvaddwod_q_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvaddwod_q_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvaddwod_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvaddwod_q_d(_1, _2); }
-// CHECK-LABEL: @xvaddwod_d_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvaddwod_d_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.w(<8 x i32> [[_1]], <8 x i32> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvaddwod_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvaddwod_d_w(_1, _2); }
-// CHECK-LABEL: @xvaddwod_w_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvaddwod_w_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.h(<16 x i16> [[_1]], <16 x i16> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvaddwod_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvaddwod_w_h(_1, _2); }
-// CHECK-LABEL: @xvaddwod_h_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvaddwod_h_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.b(<32 x i8> [[_1]], <32 x i8> [[_2]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvaddwod_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvaddwod_h_b(_1, _2); }
-// CHECK-LABEL: @xvaddwod_q_du(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvaddwod_q_du(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvaddwod_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvaddwod_q_du(_1, _2); }
-// CHECK-LABEL: @xvaddwod_d_wu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvaddwod_d_wu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu(<8 x i32> [[_1]], <8 x i32> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvaddwod_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvaddwod_d_wu(_1, _2); }
-// CHECK-LABEL: @xvaddwod_w_hu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvaddwod_w_hu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu(<16 x i16> [[_1]], <16 x i16> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvaddwod_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvaddwod_w_hu(_1, _2); }
-// CHECK-LABEL: @xvaddwod_h_bu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvaddwod_h_bu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu(<32 x i8> [[_1]], <32 x i8> [[_2]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvaddwod_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvaddwod_h_bu(_1, _2); }
-// CHECK-LABEL: @xvsubwod_q_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsubwod_q_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvsubwod_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsubwod_q_d(_1, _2); }
-// CHECK-LABEL: @xvsubwod_d_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsubwod_d_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.w(<8 x i32> [[_1]], <8 x i32> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvsubwod_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsubwod_d_w(_1, _2); }
-// CHECK-LABEL: @xvsubwod_w_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsubwod_w_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.h(<16 x i16> [[_1]], <16 x i16> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvsubwod_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsubwod_w_h(_1, _2); }
-// CHECK-LABEL: @xvsubwod_h_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsubwod_h_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.b(<32 x i8> [[_1]], <32 x i8> [[_2]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvsubwod_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsubwod_h_b(_1, _2); }
-// CHECK-LABEL: @xvsubwod_q_du(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsubwod_q_du(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvsubwod_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvsubwod_q_du(_1, _2); }
-// CHECK-LABEL: @xvsubwod_d_wu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsubwod_d_wu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.wu(<8 x i32> [[_1]], <8 x i32> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvsubwod_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvsubwod_d_wu(_1, _2); }
-// CHECK-LABEL: @xvsubwod_w_hu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsubwod_w_hu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.hu(<16 x i16> [[_1]], <16 x i16> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvsubwod_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvsubwod_w_hu(_1, _2); }
-// CHECK-LABEL: @xvsubwod_h_bu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsubwod_h_bu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.bu(<32 x i8> [[_1]], <32 x i8> [[_2]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvsubwod_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvsubwod_h_bu(_1, _2); }
-// CHECK-LABEL: @xvmulwod_q_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmulwod_q_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvmulwod_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmulwod_q_d(_1, _2); }
-// CHECK-LABEL: @xvmulwod_d_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmulwod_d_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.w(<8 x i32> [[_1]], <8 x i32> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvmulwod_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmulwod_d_w(_1, _2); }
-// CHECK-LABEL: @xvmulwod_w_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmulwod_w_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.h(<16 x i16> [[_1]], <16 x i16> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvmulwod_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmulwod_w_h(_1, _2); }
-// CHECK-LABEL: @xvmulwod_h_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmulwod_h_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.b(<32 x i8> [[_1]], <32 x i8> [[_2]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvmulwod_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmulwod_h_b(_1, _2); }
-// CHECK-LABEL: @xvmulwod_q_du(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmulwod_q_du(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvmulwod_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmulwod_q_du(_1, _2); }
-// CHECK-LABEL: @xvmulwod_d_wu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmulwod_d_wu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu(<8 x i32> [[_1]], <8 x i32> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvmulwod_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmulwod_d_wu(_1, _2); }
-// CHECK-LABEL: @xvmulwod_w_hu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmulwod_w_hu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu(<16 x i16> [[_1]], <16 x i16> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvmulwod_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmulwod_w_hu(_1, _2); }
-// CHECK-LABEL: @xvmulwod_h_bu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmulwod_h_bu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu(<32 x i8> [[_1]], <32 x i8> [[_2]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvmulwod_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmulwod_h_bu(_1, _2); }
-// CHECK-LABEL: @xvaddwev_d_wu_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvaddwev_d_wu_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu.w(<8 x i32> [[_1]], <8 x i32> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvaddwev_d_wu_w(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvaddwev_d_wu_w(_1, _2); }
-// CHECK-LABEL: @xvaddwev_w_hu_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvaddwev_w_hu_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu.h(<16 x i16> [[_1]], <16 x i16> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvaddwev_w_hu_h(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvaddwev_w_hu_h(_1, _2); }
-// CHECK-LABEL: @xvaddwev_h_bu_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvaddwev_h_bu_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu.b(<32 x i8> [[_1]], <32 x i8> [[_2]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvaddwev_h_bu_b(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvaddwev_h_bu_b(_1, _2); }
-// CHECK-LABEL: @xvmulwev_d_wu_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmulwev_d_wu_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu.w(<8 x i32> [[_1]], <8 x i32> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvmulwev_d_wu_w(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvmulwev_d_wu_w(_1, _2); }
-// CHECK-LABEL: @xvmulwev_w_hu_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmulwev_w_hu_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu.h(<16 x i16> [[_1]], <16 x i16> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvmulwev_w_hu_h(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvmulwev_w_hu_h(_1, _2); }
-// CHECK-LABEL: @xvmulwev_h_bu_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmulwev_h_bu_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu.b(<32 x i8> [[_1]], <32 x i8> [[_2]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvmulwev_h_bu_b(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvmulwev_h_bu_b(_1, _2); }
-// CHECK-LABEL: @xvaddwod_d_wu_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvaddwod_d_wu_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu.w(<8 x i32> [[_1]], <8 x i32> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvaddwod_d_wu_w(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvaddwod_d_wu_w(_1, _2); }
-// CHECK-LABEL: @xvaddwod_w_hu_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvaddwod_w_hu_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu.h(<16 x i16> [[_1]], <16 x i16> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvaddwod_w_hu_h(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvaddwod_w_hu_h(_1, _2); }
-// CHECK-LABEL: @xvaddwod_h_bu_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvaddwod_h_bu_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu.b(<32 x i8> [[_1]], <32 x i8> [[_2]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvaddwod_h_bu_b(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvaddwod_h_bu_b(_1, _2); }
-// CHECK-LABEL: @xvmulwod_d_wu_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmulwod_d_wu_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu.w(<8 x i32> [[_1]], <8 x i32> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvmulwod_d_wu_w(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvmulwod_d_wu_w(_1, _2); }
-// CHECK-LABEL: @xvmulwod_w_hu_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmulwod_w_hu_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu.h(<16 x i16> [[_1]], <16 x i16> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvmulwod_w_hu_h(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvmulwod_w_hu_h(_1, _2); }
-// CHECK-LABEL: @xvmulwod_h_bu_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmulwod_h_bu_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu.b(<32 x i8> [[_1]], <32 x i8> [[_2]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvmulwod_h_bu_b(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvmulwod_h_bu_b(_1, _2); }
-// CHECK-LABEL: @xvhaddw_q_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvhaddw_q_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvhaddw_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvhaddw_q_d(_1, _2); }
-// CHECK-LABEL: @xvhaddw_qu_du(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvhaddw_qu_du(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.qu.du(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4u64 xvhaddw_qu_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvhaddw_qu_du(_1, _2); }
-// CHECK-LABEL: @xvhsubw_q_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvhsubw_q_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvhsubw_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvhsubw_q_d(_1, _2); }
-// CHECK-LABEL: @xvhsubw_qu_du(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvhsubw_qu_du(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.qu.du(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4u64 xvhsubw_qu_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvhsubw_qu_du(_1, _2); }
-// CHECK-LABEL: @xvmaddwev_q_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmaddwev_q_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_3:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]])
-// CHECK-NEXT:    store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvmaddwev_q_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __builtin_lasx_xvmaddwev_q_d(_1, _2, _3); }
-// CHECK-LABEL: @xvmaddwev_d_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_3:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmaddwev_d_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_3:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.w(<4 x i64> [[_1]], <8 x i32> [[_2]], <8 x i32> [[_3]])
-// CHECK-NEXT:    store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvmaddwev_d_w(v4i64 _1, v8i32 _2, v8i32 _3) { return __builtin_lasx_xvmaddwev_d_w(_1, _2, _3); }
-// CHECK-LABEL: @xvmaddwev_w_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_3:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmaddwev_w_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_3:%.*]] = load <16 x i16>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.h(<8 x i32> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]])
-// CHECK-NEXT:    store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvmaddwev_w_h(v8i32 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvmaddwev_w_h(_1, _2, _3); }
-// CHECK-LABEL: @xvmaddwev_h_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmaddwev_h_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_3:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.b(<16 x i16> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]])
-// CHECK-NEXT:    store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvmaddwev_h_b(v16i16 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvmaddwev_h_b(_1, _2, _3); }
-// CHECK-LABEL: @xvmaddwev_q_du(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmaddwev_q_du(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_3:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]])
-// CHECK-NEXT:    store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4u64 xvmaddwev_q_du(v4u64 _1, v4u64 _2, v4u64 _3) { return __builtin_lasx_xvmaddwev_q_du(_1, _2, _3); }
-// CHECK-LABEL: @xvmaddwev_d_wu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_3:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmaddwev_d_wu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_3:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu(<4 x i64> [[_1]], <8 x i32> [[_2]], <8 x i32> [[_3]])
-// CHECK-NEXT:    store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4u64 xvmaddwev_d_wu(v4u64 _1, v8u32 _2, v8u32 _3) { return __builtin_lasx_xvmaddwev_d_wu(_1, _2, _3); }
-// CHECK-LABEL: @xvmaddwev_w_hu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_3:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmaddwev_w_hu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_3:%.*]] = load <16 x i16>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu(<8 x i32> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]])
-// CHECK-NEXT:    store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8u32 xvmaddwev_w_hu(v8u32 _1, v16u16 _2, v16u16 _3) { return __builtin_lasx_xvmaddwev_w_hu(_1, _2, _3); }
-// CHECK-LABEL: @xvmaddwev_h_bu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmaddwev_h_bu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_3:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu(<16 x i16> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]])
-// CHECK-NEXT:    store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16u16 xvmaddwev_h_bu(v16u16 _1, v32u8 _2, v32u8 _3) { return __builtin_lasx_xvmaddwev_h_bu(_1, _2, _3); }
-// CHECK-LABEL: @xvmaddwod_q_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmaddwod_q_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_3:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]])
-// CHECK-NEXT:    store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvmaddwod_q_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __builtin_lasx_xvmaddwod_q_d(_1, _2, _3); }
-// CHECK-LABEL: @xvmaddwod_d_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_3:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmaddwod_d_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_3:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.w(<4 x i64> [[_1]], <8 x i32> [[_2]], <8 x i32> [[_3]])
-// CHECK-NEXT:    store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvmaddwod_d_w(v4i64 _1, v8i32 _2, v8i32 _3) { return __builtin_lasx_xvmaddwod_d_w(_1, _2, _3); }
-// CHECK-LABEL: @xvmaddwod_w_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_3:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmaddwod_w_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_3:%.*]] = load <16 x i16>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.h(<8 x i32> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]])
-// CHECK-NEXT:    store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvmaddwod_w_h(v8i32 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvmaddwod_w_h(_1, _2, _3); }
-// CHECK-LABEL: @xvmaddwod_h_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmaddwod_h_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_3:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.b(<16 x i16> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]])
-// CHECK-NEXT:    store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvmaddwod_h_b(v16i16 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvmaddwod_h_b(_1, _2, _3); }
-// CHECK-LABEL: @xvmaddwod_q_du(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmaddwod_q_du(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_3:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]])
-// CHECK-NEXT:    store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4u64 xvmaddwod_q_du(v4u64 _1, v4u64 _2, v4u64 _3) { return __builtin_lasx_xvmaddwod_q_du(_1, _2, _3); }
-// CHECK-LABEL: @xvmaddwod_d_wu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_3:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmaddwod_d_wu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_3:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu(<4 x i64> [[_1]], <8 x i32> [[_2]], <8 x i32> [[_3]])
-// CHECK-NEXT:    store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4u64 xvmaddwod_d_wu(v4u64 _1, v8u32 _2, v8u32 _3) { return __builtin_lasx_xvmaddwod_d_wu(_1, _2, _3); }
-// CHECK-LABEL: @xvmaddwod_w_hu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_3:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmaddwod_w_hu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_3:%.*]] = load <16 x i16>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu(<8 x i32> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]])
-// CHECK-NEXT:    store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8u32 xvmaddwod_w_hu(v8u32 _1, v16u16 _2, v16u16 _3) { return __builtin_lasx_xvmaddwod_w_hu(_1, _2, _3); }
-// CHECK-LABEL: @xvmaddwod_h_bu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmaddwod_h_bu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_3:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu(<16 x i16> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]])
-// CHECK-NEXT:    store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16u16 xvmaddwod_h_bu(v16u16 _1, v32u8 _2, v32u8 _3) { return __builtin_lasx_xvmaddwod_h_bu(_1, _2, _3); }
-// CHECK-LABEL: @xvmaddwev_q_du_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmaddwev_q_du_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_3:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]])
-// CHECK-NEXT:    store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvmaddwev_q_du_d(v4i64 _1, v4u64 _2, v4i64 _3) { return __builtin_lasx_xvmaddwev_q_du_d(_1, _2, _3); }
-// CHECK-LABEL: @xvmaddwev_d_wu_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_3:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmaddwev_d_wu_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_3:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu.w(<4 x i64> [[_1]], <8 x i32> [[_2]], <8 x i32> [[_3]])
-// CHECK-NEXT:    store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvmaddwev_d_wu_w(v4i64 _1, v8u32 _2, v8i32 _3) { return __builtin_lasx_xvmaddwev_d_wu_w(_1, _2, _3); }
-// CHECK-LABEL: @xvmaddwev_w_hu_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_3:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmaddwev_w_hu_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_3:%.*]] = load <16 x i16>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu.h(<8 x i32> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]])
-// CHECK-NEXT:    store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvmaddwev_w_hu_h(v8i32 _1, v16u16 _2, v16i16 _3) { return __builtin_lasx_xvmaddwev_w_hu_h(_1, _2, _3); }
-// CHECK-LABEL: @xvmaddwev_h_bu_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmaddwev_h_bu_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_3:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu.b(<16 x i16> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]])
-// CHECK-NEXT:    store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvmaddwev_h_bu_b(v16i16 _1, v32u8 _2, v32i8 _3) { return __builtin_lasx_xvmaddwev_h_bu_b(_1, _2, _3); }
-// CHECK-LABEL: @xvmaddwod_q_du_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmaddwod_q_du_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_3:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]])
-// CHECK-NEXT:    store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvmaddwod_q_du_d(v4i64 _1, v4u64 _2, v4i64 _3) { return __builtin_lasx_xvmaddwod_q_du_d(_1, _2, _3); }
-// CHECK-LABEL: @xvmaddwod_d_wu_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_3:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmaddwod_d_wu_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_3:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu.w(<4 x i64> [[_1]], <8 x i32> [[_2]], <8 x i32> [[_3]])
-// CHECK-NEXT:    store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvmaddwod_d_wu_w(v4i64 _1, v8u32 _2, v8i32 _3) { return __builtin_lasx_xvmaddwod_d_wu_w(_1, _2, _3); }
-// CHECK-LABEL: @xvmaddwod_w_hu_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_3:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmaddwod_w_hu_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_3:%.*]] = load <16 x i16>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu.h(<8 x i32> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]])
-// CHECK-NEXT:    store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvmaddwod_w_hu_h(v8i32 _1, v16u16 _2, v16i16 _3) { return __builtin_lasx_xvmaddwod_w_hu_h(_1, _2, _3); }
-// CHECK-LABEL: @xvmaddwod_h_bu_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmaddwod_h_bu_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_3:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu.b(<16 x i16> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]])
-// CHECK-NEXT:    store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvmaddwod_h_bu_b(v16i16 _1, v32u8 _2, v32i8 _3) { return __builtin_lasx_xvmaddwod_h_bu_b(_1, _2, _3); }
-// CHECK-LABEL: @xvrotr_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvrotr_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrotr.b(<32 x i8> [[_1]], <32 x i8> [[_2]])
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvrotr_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvrotr_b(_1, _2); }
-// CHECK-LABEL: @xvrotr_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvrotr_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrotr.h(<16 x i16> [[_1]], <16 x i16> [[_2]])
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvrotr_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvrotr_h(_1, _2); }
-// CHECK-LABEL: @xvrotr_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvrotr_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrotr.w(<8 x i32> [[_1]], <8 x i32> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvrotr_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvrotr_w(_1, _2); }
-// CHECK-LABEL: @xvrotr_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvrotr_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrotr.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvrotr_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvrotr_d(_1, _2); }
-// CHECK-LABEL: @xvadd_q(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvadd_q(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadd.q(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvadd_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvadd_q(_1, _2); }
-// CHECK-LABEL: @xvsub_q(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsub_q(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsub.q(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvsub_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsub_q(_1, _2); }
-// CHECK-LABEL: @xvaddwev_q_du_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvaddwev_q_du_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvaddwev_q_du_d(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvaddwev_q_du_d(_1, _2); }
-// CHECK-LABEL: @xvaddwod_q_du_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvaddwod_q_du_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvaddwod_q_du_d(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvaddwod_q_du_d(_1, _2); }
-// CHECK-LABEL: @xvmulwev_q_du_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmulwev_q_du_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvmulwev_q_du_d(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvmulwev_q_du_d(_1, _2); }
-// CHECK-LABEL: @xvmulwod_q_du_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmulwod_q_du_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvmulwod_q_du_d(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvmulwod_q_du_d(_1, _2); }
-// CHECK-LABEL: @xvmskgez_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmskgez_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmskgez.b(<32 x i8> [[_1]])
-// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvmskgez_b(v32i8 _1) { return __builtin_lasx_xvmskgez_b(_1); }
-// CHECK-LABEL: @xvmsknz_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvmsknz_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmsknz.b(<32 x i8> [[_1]])
-// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvmsknz_b(v32i8 _1) { return __builtin_lasx_xvmsknz_b(_1); }
-// CHECK-LABEL: @xvexth_h_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvexth_h_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvexth.h.b(<32 x i8> [[_1]])
-// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvexth_h_b(v32i8 _1) { return __builtin_lasx_xvexth_h_b(_1); }
-// CHECK-LABEL: @xvexth_w_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvexth_w_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvexth.w.h(<16 x i16> [[_1]])
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvexth_w_h(v16i16 _1) { return __builtin_lasx_xvexth_w_h(_1); }
-// CHECK-LABEL: @xvexth_d_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvexth_d_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.d.w(<8 x i32> [[_1]])
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvexth_d_w(v8i32 _1) { return __builtin_lasx_xvexth_d_w(_1); }
-// CHECK-LABEL: @xvexth_q_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvexth_q_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.q.d(<4 x i64> [[_1]])
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvexth_q_d(v4i64 _1) { return __builtin_lasx_xvexth_q_d(_1); }
-// CHECK-LABEL: @xvexth_hu_bu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvexth_hu_bu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvexth.hu.bu(<32 x i8> [[_1]])
-// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16u16 xvexth_hu_bu(v32u8 _1) { return __builtin_lasx_xvexth_hu_bu(_1); }
-// CHECK-LABEL: @xvexth_wu_hu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvexth_wu_hu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvexth.wu.hu(<16 x i16> [[_1]])
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8u32 xvexth_wu_hu(v16u16 _1) { return __builtin_lasx_xvexth_wu_hu(_1); }
-// CHECK-LABEL: @xvexth_du_wu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvexth_du_wu(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.du.wu(<8 x i32> [[_1]])
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4u64 xvexth_du_wu(v8u32 _1) { return __builtin_lasx_xvexth_du_wu(_1); }
-// CHECK-LABEL: @xvexth_qu_du(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvexth_qu_du(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.qu.du(<4 x i64> [[_1]])
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4u64 xvexth_qu_du(v4u64 _1) { return __builtin_lasx_xvexth_qu_du(_1); }
-// CHECK-LABEL: @xvrotri_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvrotri_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrotri.b(<32 x i8> [[_1]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvrotri_b(v32i8 _1) { return __builtin_lasx_xvrotri_b(_1, 1); }
-// CHECK-LABEL: @xvrotri_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvrotri_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrotri.h(<16 x i16> [[_1]], i32 1)
-// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvrotri_h(v16i16 _1) { return __builtin_lasx_xvrotri_h(_1, 1); }
-// CHECK-LABEL: @xvrotri_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvrotri_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrotri.w(<8 x i32> [[_1]], i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvrotri_w(v8i32 _1) { return __builtin_lasx_xvrotri_w(_1, 1); }
-// CHECK-LABEL: @xvrotri_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvrotri_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrotri.d(<4 x i64> [[_1]], i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvrotri_d(v4i64 _1) { return __builtin_lasx_xvrotri_d(_1, 1); }
-// CHECK-LABEL: @xvextl_q_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvextl_q_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextl.q.d(<4 x i64> [[_1]])
-// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvextl_q_d(v4i64 _1) { return __builtin_lasx_xvextl_q_d(_1); }
-// CHECK-LABEL: @xvsrlni_b_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrlni_b_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvsrlni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrlni_b_h(_1, _2, 1); }
-// CHECK-LABEL: @xvsrlni_h_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrlni_h_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1)
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvsrlni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrlni_h_w(_1, _2, 1); }
-// CHECK-LABEL: @xvsrlni_w_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrlni_w_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvsrlni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrlni_w_d(_1, _2, 1); }
-// CHECK-LABEL: @xvsrlni_d_q(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrlni_d_q(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvsrlni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrlni_d_q(_1, _2, 1); }
-// CHECK-LABEL: @xvsrlrni_b_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrlrni_b_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvsrlrni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrlrni_b_h(_1, _2, 1); }
-// CHECK-LABEL: @xvsrlrni_h_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrlrni_h_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1)
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvsrlrni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrlrni_h_w(_1, _2, 1); }
-// CHECK-LABEL: @xvsrlrni_w_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrlrni_w_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvsrlrni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrlrni_w_d(_1, _2, 1); }
-// CHECK-LABEL: @xvsrlrni_d_q(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrlrni_d_q(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvsrlrni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrlrni_d_q(_1, _2, 1); }
-// CHECK-LABEL: @xvssrlni_b_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrlni_b_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvssrlni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvssrlni_b_h(_1, _2, 1); }
-// CHECK-LABEL: @xvssrlni_h_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrlni_h_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1)
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvssrlni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrlni_h_w(_1, _2, 1); }
-// CHECK-LABEL: @xvssrlni_w_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrlni_w_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvssrlni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrlni_w_d(_1, _2, 1); }
-// CHECK-LABEL: @xvssrlni_d_q(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrlni_d_q(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvssrlni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrlni_d_q(_1, _2, 1); }
-// CHECK-LABEL: @xvssrlni_bu_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrlni_bu_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32u8 xvssrlni_bu_h(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvssrlni_bu_h(_1, _2, 1); }
-// CHECK-LABEL: @xvssrlni_hu_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrlni_hu_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1)
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16u16 xvssrlni_hu_w(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvssrlni_hu_w(_1, _2, 1); }
-// CHECK-LABEL: @xvssrlni_wu_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrlni_wu_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8u32 xvssrlni_wu_d(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvssrlni_wu_d(_1, _2, 1); }
-// CHECK-LABEL: @xvssrlni_du_q(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrlni_du_q(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4u64 xvssrlni_du_q(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvssrlni_du_q(_1, _2, 1); }
-// CHECK-LABEL: @xvssrlrni_b_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrlrni_b_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvssrlrni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvssrlrni_b_h(_1, _2, 1); }
-// CHECK-LABEL: @xvssrlrni_h_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrlrni_h_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1)
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvssrlrni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrlrni_h_w(_1, _2, 1); }
-// CHECK-LABEL: @xvssrlrni_w_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrlrni_w_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvssrlrni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrlrni_w_d(_1, _2, 1); }
-// CHECK-LABEL: @xvssrlrni_d_q(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrlrni_d_q(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvssrlrni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrlrni_d_q(_1, _2, 1); }
-// CHECK-LABEL: @xvssrlrni_bu_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrlrni_bu_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32u8 xvssrlrni_bu_h(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvssrlrni_bu_h(_1, _2, 1); }
-// CHECK-LABEL: @xvssrlrni_hu_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrlrni_hu_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1)
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16u16 xvssrlrni_hu_w(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvssrlrni_hu_w(_1, _2, 1); }
-// CHECK-LABEL: @xvssrlrni_wu_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrlrni_wu_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8u32 xvssrlrni_wu_d(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvssrlrni_wu_d(_1, _2, 1); }
-// CHECK-LABEL: @xvssrlrni_du_q(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrlrni_du_q(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4u64 xvssrlrni_du_q(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvssrlrni_du_q(_1, _2, 1); }
-// CHECK-LABEL: @xvsrani_b_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrani_b_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvsrani_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrani_b_h(_1, _2, 1); }
-// CHECK-LABEL: @xvsrani_h_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrani_h_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1)
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvsrani_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrani_h_w(_1, _2, 1); }
-// CHECK-LABEL: @xvsrani_w_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrani_w_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvsrani_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrani_w_d(_1, _2, 1); }
-// CHECK-LABEL: @xvsrani_d_q(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrani_d_q(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvsrani_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrani_d_q(_1, _2, 1); }
-// CHECK-LABEL: @xvsrarni_b_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrarni_b_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvsrarni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrarni_b_h(_1, _2, 1); }
-// CHECK-LABEL: @xvsrarni_h_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrarni_h_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1)
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvsrarni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrarni_h_w(_1, _2, 1); }
-// CHECK-LABEL: @xvsrarni_w_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrarni_w_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvsrarni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrarni_w_d(_1, _2, 1); }
-// CHECK-LABEL: @xvsrarni_d_q(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvsrarni_d_q(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvsrarni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrarni_d_q(_1, _2, 1); }
-// CHECK-LABEL: @xvssrani_b_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrani_b_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvssrani_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvssrani_b_h(_1, _2, 1); }
-// CHECK-LABEL: @xvssrani_h_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrani_h_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1)
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvssrani_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrani_h_w(_1, _2, 1); }
-// CHECK-LABEL: @xvssrani_w_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrani_w_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvssrani_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrani_w_d(_1, _2, 1); }
-// CHECK-LABEL: @xvssrani_d_q(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrani_d_q(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvssrani_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrani_d_q(_1, _2, 1); }
-// CHECK-LABEL: @xvssrani_bu_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrani_bu_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32u8 xvssrani_bu_h(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvssrani_bu_h(_1, _2, 1); }
-// CHECK-LABEL: @xvssrani_hu_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrani_hu_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1)
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16u16 xvssrani_hu_w(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvssrani_hu_w(_1, _2, 1); }
-// CHECK-LABEL: @xvssrani_wu_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrani_wu_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8u32 xvssrani_wu_d(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvssrani_wu_d(_1, _2, 1); }
-// CHECK-LABEL: @xvssrani_du_q(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrani_du_q(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4u64 xvssrani_du_q(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvssrani_du_q(_1, _2, 1); }
-// CHECK-LABEL: @xvssrarni_b_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrarni_b_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvssrarni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvssrarni_b_h(_1, _2, 1); }
-// CHECK-LABEL: @xvssrarni_h_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrarni_h_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1)
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvssrarni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrarni_h_w(_1, _2, 1); }
-// CHECK-LABEL: @xvssrarni_w_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrarni_w_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvssrarni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrarni_w_d(_1, _2, 1); }
-// CHECK-LABEL: @xvssrarni_d_q(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrarni_d_q(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvssrarni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrarni_d_q(_1, _2, 1); }
-// CHECK-LABEL: @xvssrarni_bu_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrarni_bu_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32u8 xvssrarni_bu_h(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvssrarni_bu_h(_1, _2, 1); }
-// CHECK-LABEL: @xvssrarni_hu_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrarni_hu_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1)
-// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16u16 xvssrarni_hu_w(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvssrarni_hu_w(_1, _2, 1); }
-// CHECK-LABEL: @xvssrarni_wu_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrarni_wu_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8u32 xvssrarni_wu_d(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvssrarni_wu_d(_1, _2, 1); }
-// CHECK-LABEL: @xvssrarni_du_q(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvssrarni_du_q(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4u64 xvssrarni_du_q(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvssrarni_du_q(_1, _2, 1); }
-// CHECK-LABEL: @xbnz_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local signext i32 @xbnz_b(
+// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.b(<32 x i8> [[_1]])
 // CHECK-NEXT:    ret i32 [[TMP1]]
 //
 int xbnz_b(v32u8 _1) { return __builtin_lasx_xbnz_b(_1); }
-// CHECK-LABEL: @xbnz_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local signext i32 @xbnz_d(
+// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.d(<4 x i64> [[_1]])
 // CHECK-NEXT:    ret i32 [[TMP1]]
 //
 int xbnz_d(v4u64 _1) { return __builtin_lasx_xbnz_d(_1); }
-// CHECK-LABEL: @xbnz_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local signext i32 @xbnz_h(
+// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.h(<16 x i16> [[_1]])
 // CHECK-NEXT:    ret i32 [[TMP1]]
 //
 int xbnz_h(v16u16 _1) { return __builtin_lasx_xbnz_h(_1); }
-// CHECK-LABEL: @xbnz_v(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local signext i32 @xbnz_v(
+// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.v(<32 x i8> [[_1]])
 // CHECK-NEXT:    ret i32 [[TMP1]]
 //
 int xbnz_v(v32u8 _1) { return __builtin_lasx_xbnz_v(_1); }
-// CHECK-LABEL: @xbnz_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local signext i32 @xbnz_w(
+// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.w(<8 x i32> [[_1]])
 // CHECK-NEXT:    ret i32 [[TMP1]]
 //
 int xbnz_w(v8u32 _1) { return __builtin_lasx_xbnz_w(_1); }
-// CHECK-LABEL: @xbz_b(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local signext i32 @xbz_b(
+// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.b(<32 x i8> [[_1]])
 // CHECK-NEXT:    ret i32 [[TMP1]]
 //
 int xbz_b(v32u8 _1) { return __builtin_lasx_xbz_b(_1); }
-// CHECK-LABEL: @xbz_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local signext i32 @xbz_d(
+// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.d(<4 x i64> [[_1]])
 // CHECK-NEXT:    ret i32 [[TMP1]]
 //
 int xbz_d(v4u64 _1) { return __builtin_lasx_xbz_d(_1); }
-// CHECK-LABEL: @xbz_h(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local signext i32 @xbz_h(
+// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.h(<16 x i16> [[_1]])
 // CHECK-NEXT:    ret i32 [[TMP1]]
 //
 int xbz_h(v16u16 _1) { return __builtin_lasx_xbz_h(_1); }
-// CHECK-LABEL: @xbz_v(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local signext i32 @xbz_v(
+// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.v(<32 x i8> [[_1]])
 // CHECK-NEXT:    ret i32 [[TMP1]]
 //
 int xbz_v(v32u8 _1) { return __builtin_lasx_xbz_v(_1); }
-// CHECK-LABEL: @xbz_w(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local signext i32 @xbz_w(
+// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.w(<8 x i32> [[_1]])
 // CHECK-NEXT:    ret i32 [[TMP1]]
 //
 int xbz_w(v8u32 _1) { return __builtin_lasx_xbz_w(_1); }
-// CHECK-LABEL: @xvfcmp_caf_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_caf_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.caf.d(<4 x double> [[_1]], <4 x double> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvfcmp_caf_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_caf_d(_1, _2); }
-// CHECK-LABEL: @xvfcmp_caf_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_caf_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.caf.s(<8 x float> [[_1]], <8 x float> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvfcmp_caf_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_caf_s(_1, _2); }
-// CHECK-LABEL: @xvfcmp_ceq_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_ceq_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.ceq.d(<4 x double> [[_1]], <4 x double> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvfcmp_ceq_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_ceq_d(_1, _2); }
-// CHECK-LABEL: @xvfcmp_ceq_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_ceq_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.ceq.s(<8 x float> [[_1]], <8 x float> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvfcmp_ceq_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_ceq_s(_1, _2); }
-// CHECK-LABEL: @xvfcmp_cle_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_cle_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cle.d(<4 x double> [[_1]], <4 x double> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvfcmp_cle_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cle_d(_1, _2); }
-// CHECK-LABEL: @xvfcmp_cle_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_cle_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cle.s(<8 x float> [[_1]], <8 x float> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvfcmp_cle_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cle_s(_1, _2); }
-// CHECK-LABEL: @xvfcmp_clt_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_clt_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.clt.d(<4 x double> [[_1]], <4 x double> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvfcmp_clt_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_clt_d(_1, _2); }
-// CHECK-LABEL: @xvfcmp_clt_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_clt_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.clt.s(<8 x float> [[_1]], <8 x float> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvfcmp_clt_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_clt_s(_1, _2); }
-// CHECK-LABEL: @xvfcmp_cne_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_cne_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cne.d(<4 x double> [[_1]], <4 x double> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvfcmp_cne_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cne_d(_1, _2); }
-// CHECK-LABEL: @xvfcmp_cne_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_cne_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cne.s(<8 x float> [[_1]], <8 x float> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvfcmp_cne_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cne_s(_1, _2); }
-// CHECK-LABEL: @xvfcmp_cor_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_cor_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cor.d(<4 x double> [[_1]], <4 x double> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvfcmp_cor_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cor_d(_1, _2); }
-// CHECK-LABEL: @xvfcmp_cor_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_cor_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cor.s(<8 x float> [[_1]], <8 x float> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvfcmp_cor_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cor_s(_1, _2); }
-// CHECK-LABEL: @xvfcmp_cueq_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_cueq_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cueq.d(<4 x double> [[_1]], <4 x double> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvfcmp_cueq_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cueq_d(_1, _2); }
-// CHECK-LABEL: @xvfcmp_cueq_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_cueq_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cueq.s(<8 x float> [[_1]], <8 x float> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvfcmp_cueq_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cueq_s(_1, _2); }
-// CHECK-LABEL: @xvfcmp_cule_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_cule_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cule.d(<4 x double> [[_1]], <4 x double> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvfcmp_cule_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cule_d(_1, _2); }
-// CHECK-LABEL: @xvfcmp_cule_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_cule_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cule.s(<8 x float> [[_1]], <8 x float> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvfcmp_cule_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cule_s(_1, _2); }
-// CHECK-LABEL: @xvfcmp_cult_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_cult_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cult.d(<4 x double> [[_1]], <4 x double> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvfcmp_cult_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cult_d(_1, _2); }
-// CHECK-LABEL: @xvfcmp_cult_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_cult_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cult.s(<8 x float> [[_1]], <8 x float> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvfcmp_cult_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cult_s(_1, _2); }
-// CHECK-LABEL: @xvfcmp_cun_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_cun_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cun.d(<4 x double> [[_1]], <4 x double> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvfcmp_cun_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cun_d(_1, _2); }
-// CHECK-LABEL: @xvfcmp_cune_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_cune_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cune.d(<4 x double> [[_1]], <4 x double> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvfcmp_cune_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cune_d(_1, _2); }
-// CHECK-LABEL: @xvfcmp_cune_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_cune_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cune.s(<8 x float> [[_1]], <8 x float> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvfcmp_cune_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cune_s(_1, _2); }
-// CHECK-LABEL: @xvfcmp_cun_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_cun_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cun.s(<8 x float> [[_1]], <8 x float> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvfcmp_cun_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cun_s(_1, _2); }
-// CHECK-LABEL: @xvfcmp_saf_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_saf_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.saf.d(<4 x double> [[_1]], <4 x double> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvfcmp_saf_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_saf_d(_1, _2); }
-// CHECK-LABEL: @xvfcmp_saf_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_saf_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.saf.s(<8 x float> [[_1]], <8 x float> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvfcmp_saf_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_saf_s(_1, _2); }
-// CHECK-LABEL: @xvfcmp_seq_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_seq_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.seq.d(<4 x double> [[_1]], <4 x double> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvfcmp_seq_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_seq_d(_1, _2); }
-// CHECK-LABEL: @xvfcmp_seq_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_seq_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.seq.s(<8 x float> [[_1]], <8 x float> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvfcmp_seq_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_seq_s(_1, _2); }
-// CHECK-LABEL: @xvfcmp_sle_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_sle_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sle.d(<4 x double> [[_1]], <4 x double> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvfcmp_sle_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sle_d(_1, _2); }
-// CHECK-LABEL: @xvfcmp_sle_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_sle_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sle.s(<8 x float> [[_1]], <8 x float> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvfcmp_sle_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sle_s(_1, _2); }
-// CHECK-LABEL: @xvfcmp_slt_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_slt_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.slt.d(<4 x double> [[_1]], <4 x double> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvfcmp_slt_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_slt_d(_1, _2); }
-// CHECK-LABEL: @xvfcmp_slt_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_slt_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.slt.s(<8 x float> [[_1]], <8 x float> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvfcmp_slt_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_slt_s(_1, _2); }
-// CHECK-LABEL: @xvfcmp_sne_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_sne_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sne.d(<4 x double> [[_1]], <4 x double> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvfcmp_sne_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sne_d(_1, _2); }
-// CHECK-LABEL: @xvfcmp_sne_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_sne_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sne.s(<8 x float> [[_1]], <8 x float> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvfcmp_sne_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sne_s(_1, _2); }
-// CHECK-LABEL: @xvfcmp_sor_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_sor_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sor.d(<4 x double> [[_1]], <4 x double> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvfcmp_sor_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sor_d(_1, _2); }
-// CHECK-LABEL: @xvfcmp_sor_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_sor_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sor.s(<8 x float> [[_1]], <8 x float> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvfcmp_sor_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sor_s(_1, _2); }
-// CHECK-LABEL: @xvfcmp_sueq_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_sueq_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sueq.d(<4 x double> [[_1]], <4 x double> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvfcmp_sueq_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sueq_d(_1, _2); }
-// CHECK-LABEL: @xvfcmp_sueq_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_sueq_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sueq.s(<8 x float> [[_1]], <8 x float> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvfcmp_sueq_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sueq_s(_1, _2); }
-// CHECK-LABEL: @xvfcmp_sule_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_sule_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sule.d(<4 x double> [[_1]], <4 x double> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvfcmp_sule_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sule_d(_1, _2); }
-// CHECK-LABEL: @xvfcmp_sule_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_sule_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sule.s(<8 x float> [[_1]], <8 x float> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvfcmp_sule_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sule_s(_1, _2); }
-// CHECK-LABEL: @xvfcmp_sult_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_sult_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sult.d(<4 x double> [[_1]], <4 x double> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvfcmp_sult_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sult_d(_1, _2); }
-// CHECK-LABEL: @xvfcmp_sult_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_sult_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sult.s(<8 x float> [[_1]], <8 x float> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvfcmp_sult_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sult_s(_1, _2); }
-// CHECK-LABEL: @xvfcmp_sun_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_sun_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sun.d(<4 x double> [[_1]], <4 x double> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvfcmp_sun_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sun_d(_1, _2); }
-// CHECK-LABEL: @xvfcmp_sune_d(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_sune_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sune.d(<4 x double> [[_1]], <4 x double> [[_2]])
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvfcmp_sune_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sune_d(_1, _2); }
-// CHECK-LABEL: @xvfcmp_sune_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_sune_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sune.s(<8 x float> [[_1]], <8 x float> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvfcmp_sune_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sune_s(_1, _2); }
-// CHECK-LABEL: @xvfcmp_sun_s(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvfcmp_sun_s(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sun.s(<8 x float> [[_1]], <8 x float> [[_2]])
-// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvfcmp_sun_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sun_s(_1, _2); }
-// CHECK-LABEL: @xvpickve_d_f(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvpickve_d_f(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double> [[_1]], i32 1)
-// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4f64 xvpickve_d_f(v4f64 _1) { return __builtin_lasx_xvpickve_d_f(_1, 1); }
-// CHECK-LABEL: @xvpickve_w_f(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @xvpickve_w_f(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float> [[_1]], i32 1)
-// CHECK-NEXT:    store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8f32 xvpickve_w_f(v8f32 _1) { return __builtin_lasx_xvpickve_w_f(_1, 1); }
-// CHECK-LABEL: @xvrepli_b(
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define dso_local void @xvrepli_b(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrepli.b(i32 1)
-// CHECK-NEXT:    store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x i8> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v32i8 xvrepli_b() { return __builtin_lasx_xvrepli_b(1); }
-// CHECK-LABEL: @xvrepli_d(
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define dso_local void @xvrepli_d(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrepli.d(i32 1)
-// CHECK-NEXT:    store <4 x i64> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v4i64 xvrepli_d() { return __builtin_lasx_xvrepli_d(1); }
-// CHECK-LABEL: @xvrepli_h(
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define dso_local void @xvrepli_h(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrepli.h(i32 1)
-// CHECK-NEXT:    store <16 x i16> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i16> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v16i16 xvrepli_h() { return __builtin_lasx_xvrepli_h(1); }
-// CHECK-LABEL: @xvrepli_w(
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define dso_local void @xvrepli_w(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrepli.w(i32 1)
-// CHECK-NEXT:    store <8 x i32> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x i32> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 v8i32 xvrepli_w() { return __builtin_lasx_xvrepli_w(1); }
+//.
+// CHECK: [[CHAR_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0}
+// CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0}
+// CHECK: [[META4]] = !{!"Simple C/C++ TBAA"}
+//.
diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-build-pair-mma.c b/clang/test/CodeGen/PowerPC/builtins-ppc-build-pair-mma.c
index cdbfdd6..59b71cd 100644
--- a/clang/test/CodeGen/PowerPC/builtins-ppc-build-pair-mma.c
+++ b/clang/test/CodeGen/PowerPC/builtins-ppc-build-pair-mma.c
@@ -1,4 +1,4 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
 // RUN: %clang_cc1 -O3 -triple powerpc64le-unknown-unknown -target-cpu pwr10 \
 // RUN: -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK-LE
 // RUN: %clang_cc1 -O3 -triple powerpc64-unknown-unknown -target-cpu pwr10 \
@@ -6,20 +6,23 @@
 // RUN: %clang_cc1 -O0 -triple powerpc64le-unknown-unknown -target-cpu pwr10 \
 // RUN: -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK-LE-NOOPT
 
-// CHECK-LE-LABEL: @test1(
-// CHECK-LE-NEXT:  entry:
-// CHECK-LE-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> [[VC4:%.*]], <16 x i8> [[VC3:%.*]], <16 x i8> [[VC2:%.*]], <16 x i8> [[VC1:%.*]])
-// CHECK-LE-NEXT:    store <512 x i1> [[TMP0]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2:![0-9]+]]
+// CHECK-LE-LABEL: define dso_local void @test1(
+// CHECK-LE-SAME: ptr noundef readnone captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC1:%.*]], <16 x i8> noundef [[VC2:%.*]], <16 x i8> noundef [[VC3:%.*]], <16 x i8> noundef [[VC4:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-LE-NEXT:  [[ENTRY:.*:]]
+// CHECK-LE-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> [[VC4]], <16 x i8> [[VC3]], <16 x i8> [[VC2]], <16 x i8> [[VC1]])
+// CHECK-LE-NEXT:    store <512 x i1> [[TMP0]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2:![0-9]+]]
 // CHECK-LE-NEXT:    ret void
 //
-// CHECK-BE-LABEL: @test1(
-// CHECK-BE-NEXT:  entry:
-// CHECK-BE-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> [[VC1:%.*]], <16 x i8> [[VC2:%.*]], <16 x i8> [[VC3:%.*]], <16 x i8> [[VC4:%.*]])
-// CHECK-BE-NEXT:    store <512 x i1> [[TMP0]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2:![0-9]+]]
+// CHECK-BE-LABEL: define dso_local void @test1(
+// CHECK-BE-SAME: ptr noundef readnone captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC1:%.*]], <16 x i8> noundef [[VC2:%.*]], <16 x i8> noundef [[VC3:%.*]], <16 x i8> noundef [[VC4:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-BE-NEXT:  [[ENTRY:.*:]]
+// CHECK-BE-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> [[VC1]], <16 x i8> [[VC2]], <16 x i8> [[VC3]], <16 x i8> [[VC4]])
+// CHECK-BE-NEXT:    store <512 x i1> [[TMP0]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2:![0-9]+]]
 // CHECK-BE-NEXT:    ret void
 //
-// CHECK-LE-NOOPT-LABEL: @test1(
-// CHECK-LE-NOOPT-NEXT:  entry:
+// CHECK-LE-NOOPT-LABEL: define dso_local void @test1(
+// CHECK-LE-NOOPT-SAME: ptr noundef [[VQP:%.*]], ptr noundef [[VPP:%.*]], <16 x i8> noundef [[VC1:%.*]], <16 x i8> noundef [[VC2:%.*]], <16 x i8> noundef [[VC3:%.*]], <16 x i8> noundef [[VC4:%.*]], ptr noundef [[RESP:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-LE-NOOPT-NEXT:  [[ENTRY:.*:]]
 // CHECK-LE-NOOPT-NEXT:    [[VQP_ADDR:%.*]] = alloca ptr, align 8
 // CHECK-LE-NOOPT-NEXT:    [[VPP_ADDR:%.*]] = alloca ptr, align 8
 // CHECK-LE-NOOPT-NEXT:    [[VC1_ADDR:%.*]] = alloca <16 x i8>, align 16
@@ -30,13 +33,13 @@
 // CHECK-LE-NOOPT-NEXT:    [[VQ:%.*]] = alloca <512 x i1>, align 64
 // CHECK-LE-NOOPT-NEXT:    [[VP:%.*]] = alloca <256 x i1>, align 32
 // CHECK-LE-NOOPT-NEXT:    [[RES:%.*]] = alloca <512 x i1>, align 64
-// CHECK-LE-NOOPT-NEXT:    store ptr [[VQP:%.*]], ptr [[VQP_ADDR]], align 8
-// CHECK-LE-NOOPT-NEXT:    store ptr [[VPP:%.*]], ptr [[VPP_ADDR]], align 8
-// CHECK-LE-NOOPT-NEXT:    store <16 x i8> [[VC1:%.*]], ptr [[VC1_ADDR]], align 16
-// CHECK-LE-NOOPT-NEXT:    store <16 x i8> [[VC2:%.*]], ptr [[VC2_ADDR]], align 16
-// CHECK-LE-NOOPT-NEXT:    store <16 x i8> [[VC3:%.*]], ptr [[VC3_ADDR]], align 16
-// CHECK-LE-NOOPT-NEXT:    store <16 x i8> [[VC4:%.*]], ptr [[VC4_ADDR]], align 16
-// CHECK-LE-NOOPT-NEXT:    store ptr [[RESP:%.*]], ptr [[RESP_ADDR]], align 8
+// CHECK-LE-NOOPT-NEXT:    store ptr [[VQP]], ptr [[VQP_ADDR]], align 8
+// CHECK-LE-NOOPT-NEXT:    store ptr [[VPP]], ptr [[VPP_ADDR]], align 8
+// CHECK-LE-NOOPT-NEXT:    store <16 x i8> [[VC1]], ptr [[VC1_ADDR]], align 16
+// CHECK-LE-NOOPT-NEXT:    store <16 x i8> [[VC2]], ptr [[VC2_ADDR]], align 16
+// CHECK-LE-NOOPT-NEXT:    store <16 x i8> [[VC3]], ptr [[VC3_ADDR]], align 16
+// CHECK-LE-NOOPT-NEXT:    store <16 x i8> [[VC4]], ptr [[VC4_ADDR]], align 16
+// CHECK-LE-NOOPT-NEXT:    store ptr [[RESP]], ptr [[RESP_ADDR]], align 8
 // CHECK-LE-NOOPT-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[VQP_ADDR]], align 8
 // CHECK-LE-NOOPT-NEXT:    [[TMP1:%.*]] = load <512 x i1>, ptr [[TMP0]], align 64
 // CHECK-LE-NOOPT-NEXT:    store <512 x i1> [[TMP1]], ptr [[VQ]], align 64
@@ -63,20 +66,23 @@ void test1(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc1, vec
   *((__vector_quad *)resp) = res;
 }
 
-// CHECK-LE-LABEL: @test2(
-// CHECK-LE-NEXT:  entry:
-// CHECK-LE-NEXT:    [[TMP0:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[VC2:%.*]], <16 x i8> [[VC1:%.*]])
-// CHECK-LE-NEXT:    store <256 x i1> [[TMP0]], ptr [[RESP:%.*]], align 32, !tbaa [[TBAA6:![0-9]+]]
+// CHECK-LE-LABEL: define dso_local void @test2(
+// CHECK-LE-SAME: ptr noundef readnone captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC1:%.*]], <16 x i8> noundef [[VC2:%.*]], ptr noundef writeonly captures(none) initializes((0, 32)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-LE-NEXT:  [[ENTRY:.*:]]
+// CHECK-LE-NEXT:    [[TMP0:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[VC2]], <16 x i8> [[VC1]])
+// CHECK-LE-NEXT:    store <256 x i1> [[TMP0]], ptr [[RESP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6:![0-9]+]]
 // CHECK-LE-NEXT:    ret void
 //
-// CHECK-BE-LABEL: @test2(
-// CHECK-BE-NEXT:  entry:
-// CHECK-BE-NEXT:    [[TMP0:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[VC1:%.*]], <16 x i8> [[VC2:%.*]])
-// CHECK-BE-NEXT:    store <256 x i1> [[TMP0]], ptr [[RESP:%.*]], align 32, !tbaa [[TBAA6:![0-9]+]]
+// CHECK-BE-LABEL: define dso_local void @test2(
+// CHECK-BE-SAME: ptr noundef readnone captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC1:%.*]], <16 x i8> noundef [[VC2:%.*]], ptr noundef writeonly captures(none) initializes((0, 32)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-BE-NEXT:  [[ENTRY:.*:]]
+// CHECK-BE-NEXT:    [[TMP0:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[VC1]], <16 x i8> [[VC2]])
+// CHECK-BE-NEXT:    store <256 x i1> [[TMP0]], ptr [[RESP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6:![0-9]+]]
 // CHECK-BE-NEXT:    ret void
 //
-// CHECK-LE-NOOPT-LABEL: @test2(
-// CHECK-LE-NOOPT-NEXT:  entry:
+// CHECK-LE-NOOPT-LABEL: define dso_local void @test2(
+// CHECK-LE-NOOPT-SAME: ptr noundef [[VQP:%.*]], ptr noundef [[VPP:%.*]], <16 x i8> noundef [[VC1:%.*]], <16 x i8> noundef [[VC2:%.*]], ptr noundef [[RESP:%.*]]) #[[ATTR0]] {
+// CHECK-LE-NOOPT-NEXT:  [[ENTRY:.*:]]
 // CHECK-LE-NOOPT-NEXT:    [[VQP_ADDR:%.*]] = alloca ptr, align 8
 // CHECK-LE-NOOPT-NEXT:    [[VPP_ADDR:%.*]] = alloca ptr, align 8
 // CHECK-LE-NOOPT-NEXT:    [[VC1_ADDR:%.*]] = alloca <16 x i8>, align 16
@@ -85,11 +91,11 @@ void test1(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc1, vec
 // CHECK-LE-NOOPT-NEXT:    [[VQ:%.*]] = alloca <512 x i1>, align 64
 // CHECK-LE-NOOPT-NEXT:    [[VP:%.*]] = alloca <256 x i1>, align 32
 // CHECK-LE-NOOPT-NEXT:    [[RES:%.*]] = alloca <256 x i1>, align 32
-// CHECK-LE-NOOPT-NEXT:    store ptr [[VQP:%.*]], ptr [[VQP_ADDR]], align 8
-// CHECK-LE-NOOPT-NEXT:    store ptr [[VPP:%.*]], ptr [[VPP_ADDR]], align 8
-// CHECK-LE-NOOPT-NEXT:    store <16 x i8> [[VC1:%.*]], ptr [[VC1_ADDR]], align 16
-// CHECK-LE-NOOPT-NEXT:    store <16 x i8> [[VC2:%.*]], ptr [[VC2_ADDR]], align 16
-// CHECK-LE-NOOPT-NEXT:    store ptr [[RESP:%.*]], ptr [[RESP_ADDR]], align 8
+// CHECK-LE-NOOPT-NEXT:    store ptr [[VQP]], ptr [[VQP_ADDR]], align 8
+// CHECK-LE-NOOPT-NEXT:    store ptr [[VPP]], ptr [[VPP_ADDR]], align 8
+// CHECK-LE-NOOPT-NEXT:    store <16 x i8> [[VC1]], ptr [[VC1_ADDR]], align 16
+// CHECK-LE-NOOPT-NEXT:    store <16 x i8> [[VC2]], ptr [[VC2_ADDR]], align 16
+// CHECK-LE-NOOPT-NEXT:    store ptr [[RESP]], ptr [[RESP_ADDR]], align 8
 // CHECK-LE-NOOPT-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[VQP_ADDR]], align 8
 // CHECK-LE-NOOPT-NEXT:    [[TMP1:%.*]] = load <512 x i1>, ptr [[TMP0]], align 64
 // CHECK-LE-NOOPT-NEXT:    store <512 x i1> [[TMP1]], ptr [[VQ]], align 64
@@ -113,3 +119,18 @@ void test2(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc1,
   __builtin_vsx_build_pair(&res, vc1, vc2);
   *((__vector_pair *)resp) = res;
 }
+//.
+// CHECK-LE: [[__VECTOR_QUAD_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0}
+// CHECK-LE: [[META3]] = !{!"__vector_quad", [[META4:![0-9]+]], i64 0}
+// CHECK-LE: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0}
+// CHECK-LE: [[META5]] = !{!"Simple C/C++ TBAA"}
+// CHECK-LE: [[__VECTOR_PAIR_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0}
+// CHECK-LE: [[META7]] = !{!"__vector_pair", [[META4]], i64 0}
+//.
+// CHECK-BE: [[__VECTOR_QUAD_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0}
+// CHECK-BE: [[META3]] = !{!"__vector_quad", [[META4:![0-9]+]], i64 0}
+// CHECK-BE: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0}
+// CHECK-BE: [[META5]] = !{!"Simple C/C++ TBAA"}
+// CHECK-BE: [[__VECTOR_PAIR_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0}
+// CHECK-BE: [[META7]] = !{!"__vector_pair", [[META4]], i64 0}
+//.
diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-dmf.c b/clang/test/CodeGen/PowerPC/builtins-ppc-dmf.c
index c66f5e2..f626567 100644
--- a/clang/test/CodeGen/PowerPC/builtins-ppc-dmf.c
+++ b/clang/test/CodeGen/PowerPC/builtins-ppc-dmf.c
@@ -1,17 +1,26 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
 // RUN: %clang_cc1 -O3 -triple powerpc64le-unknown-unknown -target-cpu future \
 // RUN:  -emit-llvm %s -o - | FileCheck %s
 // RUN: %clang_cc1 -O3 -triple powerpc64-ibm-aix -target-cpu future \
-// RUN: -emit-llvm %s -o - | FileCheck %s
+// RUN: -emit-llvm %s -o - | FileCheck %s --check-prefix=AIX
 
 
-// CHECK-LABEL: @test_dmxvi8gerx4(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2:![0-9]+]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvi8gerx4(<256 x i1> [[TMP0]], <16 x i8> [[VC:%.*]])
-// CHECK-NEXT:    store <1024 x i1> [[TMP1]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6:![0-9]+]]
+// CHECK-LABEL: define dso_local void @test_dmxvi8gerx4(
+// CHECK-SAME: ptr noundef readnone captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA2:![0-9]+]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvi8gerx4(<256 x i1> [[TMP0]], <16 x i8> [[VC]])
+// CHECK-NEXT:    store <1024 x i1> [[TMP1]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA6:![0-9]+]]
 // CHECK-NEXT:    ret void
 //
+// AIX-LABEL: define void @test_dmxvi8gerx4(
+// AIX-SAME: ptr noundef readnone captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// AIX-NEXT:  [[ENTRY:.*:]]
+// AIX-NEXT:    [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA2:![0-9]+]]
+// AIX-NEXT:    [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvi8gerx4(<256 x i1> [[TMP0]], <16 x i8> [[VC]])
+// AIX-NEXT:    store <1024 x i1> [[TMP1]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA6:![0-9]+]]
+// AIX-NEXT:    ret void
+//
 void test_dmxvi8gerx4(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
   __dmr1024 vdmr = *((__dmr1024 *)vdmrp);
   __vector_pair vp = *((__vector_pair *)vpp);
@@ -19,13 +28,22 @@ void test_dmxvi8gerx4(unsigned char *vdmrp, unsigned char *vpp, vector unsigned
   *((__dmr1024 *)resp) = vdmr;
 }
 
-// CHECK-LABEL: @test_pmdmxvi8gerx4(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvi8gerx4(<256 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], i32 0, i32 0, i32 0)
-// CHECK-NEXT:    store <1024 x i1> [[TMP1]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]]
+// CHECK-LABEL: define dso_local void @test_pmdmxvi8gerx4(
+// CHECK-SAME: ptr noundef readnone captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvi8gerx4(<256 x i1> [[TMP0]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
+// CHECK-NEXT:    store <1024 x i1> [[TMP1]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA6]]
 // CHECK-NEXT:    ret void
 //
+// AIX-LABEL: define void @test_pmdmxvi8gerx4(
+// AIX-SAME: ptr noundef readnone captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// AIX-NEXT:  [[ENTRY:.*:]]
+// AIX-NEXT:    [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA2]]
+// AIX-NEXT:    [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvi8gerx4(<256 x i1> [[TMP0]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
+// AIX-NEXT:    store <1024 x i1> [[TMP1]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA6]]
+// AIX-NEXT:    ret void
+//
 void test_pmdmxvi8gerx4(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
   __dmr1024 vdmr = *((__dmr1024 *)vdmrp);
   __vector_pair vp = *((__vector_pair *)vpp);
@@ -33,14 +51,24 @@ void test_pmdmxvi8gerx4(unsigned char *vdmrp, unsigned char *vpp, vector unsigne
   *((__dmr1024 *)resp) = vdmr;
 }
 
-// CHECK-LABEL: @test_dmxvi8gerx4pp(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]]
-// CHECK-NEXT:    [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvi8gerx4pp(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]])
-// CHECK-NEXT:    store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]]
+// CHECK-LABEL: define dso_local void @test_dmxvi8gerx4pp(
+// CHECK-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA6]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA2]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvi8gerx4pp(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]])
+// CHECK-NEXT:    store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA6]]
 // CHECK-NEXT:    ret void
 //
+// AIX-LABEL: define void @test_dmxvi8gerx4pp(
+// AIX-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// AIX-NEXT:  [[ENTRY:.*:]]
+// AIX-NEXT:    [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA6]]
+// AIX-NEXT:    [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA2]]
+// AIX-NEXT:    [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvi8gerx4pp(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]])
+// AIX-NEXT:    store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA6]]
+// AIX-NEXT:    ret void
+//
 void test_dmxvi8gerx4pp(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
   __dmr1024 vdmr = *((__dmr1024 *)vdmrp);
   __vector_pair vp = *((__vector_pair *)vpp);
@@ -48,14 +76,24 @@ void test_dmxvi8gerx4pp(unsigned char *vdmrp, unsigned char *vpp, vector unsigne
   *((__dmr1024 *)resp) = vdmr;
 }
 
-// CHECK-LABEL: @test_pmdmxvi8gerx4pp(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]]
-// CHECK-NEXT:    [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvi8gerx4pp(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], i32 0, i32 0, i32 0)
-// CHECK-NEXT:    store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]]
+// CHECK-LABEL: define dso_local void @test_pmdmxvi8gerx4pp(
+// CHECK-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA6]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA2]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvi8gerx4pp(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
+// CHECK-NEXT:    store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA6]]
 // CHECK-NEXT:    ret void
 //
+// AIX-LABEL: define void @test_pmdmxvi8gerx4pp(
+// AIX-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// AIX-NEXT:  [[ENTRY:.*:]]
+// AIX-NEXT:    [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA6]]
+// AIX-NEXT:    [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA2]]
+// AIX-NEXT:    [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvi8gerx4pp(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
+// AIX-NEXT:    store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA6]]
+// AIX-NEXT:    ret void
+//
 void test_pmdmxvi8gerx4pp(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
   __dmr1024 vdmr = *((__dmr1024 *)vdmrp);
   __vector_pair vp = *((__vector_pair *)vpp);
@@ -63,14 +101,24 @@ void test_pmdmxvi8gerx4pp(unsigned char *vdmrp, unsigned char *vpp, vector unsig
   *((__dmr1024 *)resp) = vdmr;
 }
 
-// CHECK-LABEL: @test_dmxvi8gerx4spp(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]]
-// CHECK-NEXT:    [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvi8gerx4spp(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]])
-// CHECK-NEXT:    store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]]
+// CHECK-LABEL: define dso_local void @test_dmxvi8gerx4spp(
+// CHECK-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA6]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA2]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvi8gerx4spp(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]])
+// CHECK-NEXT:    store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA6]]
 // CHECK-NEXT:    ret void
 //
+// AIX-LABEL: define void @test_dmxvi8gerx4spp(
+// AIX-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// AIX-NEXT:  [[ENTRY:.*:]]
+// AIX-NEXT:    [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA6]]
+// AIX-NEXT:    [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA2]]
+// AIX-NEXT:    [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvi8gerx4spp(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]])
+// AIX-NEXT:    store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA6]]
+// AIX-NEXT:    ret void
+//
 void test_dmxvi8gerx4spp(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
   __dmr1024 vdmr = *((__dmr1024 *)vdmrp);
   __vector_pair vp = *((__vector_pair *)vpp);
@@ -78,14 +126,24 @@ void test_dmxvi8gerx4spp(unsigned char *vdmrp, unsigned char *vpp, vector unsign
   *((__dmr1024 *)resp) = vdmr;
 }
 
-// CHECK-LABEL: @test_pmdmxvi8gerx4spp(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]]
-// CHECK-NEXT:    [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvi8gerx4spp(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], i32 0, i32 0, i32 0)
-// CHECK-NEXT:    store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]]
+// CHECK-LABEL: define dso_local void @test_pmdmxvi8gerx4spp(
+// CHECK-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA6]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA2]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvi8gerx4spp(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
+// CHECK-NEXT:    store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA6]]
 // CHECK-NEXT:    ret void
 //
+// AIX-LABEL: define void @test_pmdmxvi8gerx4spp(
+// AIX-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// AIX-NEXT:  [[ENTRY:.*:]]
+// AIX-NEXT:    [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA6]]
+// AIX-NEXT:    [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA2]]
+// AIX-NEXT:    [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvi8gerx4spp(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
+// AIX-NEXT:    store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA6]]
+// AIX-NEXT:    ret void
+//
 void test_pmdmxvi8gerx4spp(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
   __dmr1024 vdmr = *((__dmr1024 *)vdmrp);
   __vector_pair vp = *((__vector_pair *)vpp);
@@ -93,17 +151,30 @@ void test_pmdmxvi8gerx4spp(unsigned char *vdmrp, unsigned char *vpp, vector unsi
   *((__dmr1024 *)resp) = vdmr;
 }
 
-// CHECK-LABEL: @test_dmf_basic(
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define dso_local void @test_dmf_basic(
+// CHECK-SAME: ptr noundef readonly captures(none) [[P:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RES1:%.*]], ptr noundef captures(none) [[RES2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmsetdmrz()
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmmr(<1024 x i1> [[TMP0]])
-// CHECK-NEXT:    store <1024 x i1> [[TMP1]], ptr [[RES1:%.*]], align 128
-// CHECK-NEXT:    [[TMP2:%.*]] = load <1024 x i1>, ptr [[RES2:%.*]], align 128
-// CHECK-NEXT:    [[TMP3:%.*]] = load <1024 x i1>, ptr [[P:%.*]], align 128
+// CHECK-NEXT:    store <1024 x i1> [[TMP1]], ptr [[RES1]], align 128
+// CHECK-NEXT:    [[TMP2:%.*]] = load <1024 x i1>, ptr [[RES2]], align 128
+// CHECK-NEXT:    [[TMP3:%.*]] = load <1024 x i1>, ptr [[P]], align 128
 // CHECK-NEXT:    [[TMP4:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxor(<1024 x i1> [[TMP2]], <1024 x i1> [[TMP3]])
 // CHECK-NEXT:    store <1024 x i1> [[TMP4]], ptr [[RES2]], align 128
 // CHECK-NEXT:    ret void
 //
+// AIX-LABEL: define void @test_dmf_basic(
+// AIX-SAME: ptr noundef readonly captures(none) [[P:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RES1:%.*]], ptr noundef captures(none) [[RES2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// AIX-NEXT:  [[ENTRY:.*:]]
+// AIX-NEXT:    [[TMP0:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmsetdmrz()
+// AIX-NEXT:    [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmmr(<1024 x i1> [[TMP0]])
+// AIX-NEXT:    store <1024 x i1> [[TMP1]], ptr [[RES1]], align 128
+// AIX-NEXT:    [[TMP2:%.*]] = load <1024 x i1>, ptr [[RES2]], align 128
+// AIX-NEXT:    [[TMP3:%.*]] = load <1024 x i1>, ptr [[P]], align 128
+// AIX-NEXT:    [[TMP4:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxor(<1024 x i1> [[TMP2]], <1024 x i1> [[TMP3]])
+// AIX-NEXT:    store <1024 x i1> [[TMP4]], ptr [[RES2]], align 128
+// AIX-NEXT:    ret void
+//
 void test_dmf_basic(char *p, char *res1, char *res2) {
   __dmr1024 x[2];
   __builtin_mma_dmsetdmrz(&x[0]);
@@ -111,18 +182,46 @@ void test_dmf_basic(char *p, char *res1, char *res2) {
   __builtin_mma_dmxor((__dmr1024*)res2, (__dmr1024*)p);
 }
 
-// CHECK-LABEL: @test_dmf_basic2(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i8>, ptr [[V:%.*]], align 16, !tbaa [[TBAA8:![0-9]+]]
+// CHECK-LABEL: define dso_local void @test_dmf_basic2(
+// CHECK-SAME: ptr noundef readonly captures(none) [[P1:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RES1:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RES2:%.*]], ptr noundef readonly captures(none) [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i8>, ptr [[V]], align 16, !tbaa [[CHAR_TBAA8:![0-9]+]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.build.dmr(<16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]])
-// CHECK-NEXT:    store <1024 x i1> [[TMP1]], ptr [[RES2:%.*]], align 128
-// CHECK-NEXT:    [[TMP2:%.*]] = load <1024 x i1>, ptr [[P1:%.*]], align 128
-// CHECK-NEXT:    store <1024 x i1> [[TMP2]], ptr [[RES1:%.*]], align 128
+// CHECK-NEXT:    store <1024 x i1> [[TMP1]], ptr [[RES2]], align 128
+// CHECK-NEXT:    [[TMP2:%.*]] = load <1024 x i1>, ptr [[P1]], align 128
+// CHECK-NEXT:    store <1024 x i1> [[TMP2]], ptr [[RES1]], align 128
 // CHECK-NEXT:    ret void
 //
+// AIX-LABEL: define void @test_dmf_basic2(
+// AIX-SAME: ptr noundef readonly captures(none) [[P1:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RES1:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RES2:%.*]], ptr noundef readonly captures(none) [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// AIX-NEXT:  [[ENTRY:.*:]]
+// AIX-NEXT:    [[TMP0:%.*]] = load <16 x i8>, ptr [[V]], align 16, !tbaa [[CHAR_TBAA8:![0-9]+]]
+// AIX-NEXT:    [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.build.dmr(<16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]])
+// AIX-NEXT:    store <1024 x i1> [[TMP1]], ptr [[RES2]], align 128
+// AIX-NEXT:    [[TMP2:%.*]] = load <1024 x i1>, ptr [[P1]], align 128
+// AIX-NEXT:    store <1024 x i1> [[TMP2]], ptr [[RES1]], align 128
+// AIX-NEXT:    ret void
+//
 void test_dmf_basic2(char *p1, char *res1, char *res2,
                      vector unsigned char *v) {
   vector unsigned char vv = *v;
   __builtin_mma_build_dmr((__dmr1024*)res2, vv, vv, vv, vv, vv, vv, vv, vv);
   __builtin_mma_disassemble_dmr(res1, (__dmr1024*)p1);
 }
+//.
+// CHECK: [[__VECTOR_PAIR_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0}
+// CHECK: [[META3]] = !{!"__vector_pair", [[META4:![0-9]+]], i64 0}
+// CHECK: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0}
+// CHECK: [[META5]] = !{!"Simple C/C++ TBAA"}
+// CHECK: [[__DMR1024_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0}
+// CHECK: [[META7]] = !{!"__dmr1024", [[META4]], i64 0}
+// CHECK: [[CHAR_TBAA8]] = !{[[META4]], [[META4]], i64 0}
+//.
+// AIX: [[__VECTOR_PAIR_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0}
+// AIX: [[META3]] = !{!"__vector_pair", [[META4:![0-9]+]], i64 0}
+// AIX: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0}
+// AIX: [[META5]] = !{!"Simple C/C++ TBAA"}
+// AIX: [[__DMR1024_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0}
+// AIX: [[META7]] = !{!"__dmr1024", [[META4]], i64 0}
+// AIX: [[CHAR_TBAA8]] = !{[[META4]], [[META4]], i64 0}
+//.
diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-pair-mma.c b/clang/test/CodeGen/PowerPC/builtins-ppc-pair-mma.c
index 08ff936..5c7b222 100644
--- a/clang/test/CodeGen/PowerPC/builtins-ppc-pair-mma.c
+++ b/clang/test/CodeGen/PowerPC/builtins-ppc-pair-mma.c
@@ -1,13 +1,14 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
 // RUN: %clang_cc1 -O3 -triple powerpc64le-unknown-unknown -target-cpu pwr10 \
 // RUN:  -emit-llvm %s -o - | FileCheck %s
 // RUN: %clang_cc1 -O3 -triple powerpc64-unknown-unknown -target-cpu pwr10 \
 // RUN: -emit-llvm %s -o - | FileCheck %s
 
-// CHECK-LABEL: @test1(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], <16 x i8> [[VC]], <16 x i8> [[VC]])
-// CHECK-NEXT:    store <512 x i1> [[TMP0]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2:![0-9]+]]
+// CHECK-LABEL: define dso_local void @test1(
+// CHECK-SAME: ptr noundef readnone captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> [[VC]], <16 x i8> [[VC]], <16 x i8> [[VC]], <16 x i8> [[VC]])
+// CHECK-NEXT:    store <512 x i1> [[TMP0]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2:![0-9]+]]
 // CHECK-NEXT:    ret void
 //
 void test1(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -18,12 +19,13 @@ void test1(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsi
   *((__vector_quad *)resp) = res;
 }
 
-// CHECK-LABEL: @test2(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64
+// CHECK-LABEL: define dso_local void @test2(
+// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> [[TMP0]])
 // CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP1]], 0
-// CHECK-NEXT:    store <16 x i8> [[TMP2]], ptr [[RESP:%.*]], align 16
+// CHECK-NEXT:    store <16 x i8> [[TMP2]], ptr [[RESP]], align 16
 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP1]], 1
 // CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[RESP]], i64 16
 // CHECK-NEXT:    store <16 x i8> [[TMP3]], ptr [[TMP4]], align 16
@@ -39,10 +41,11 @@ void test2(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsi
   __builtin_mma_disassemble_acc(resp, (__vector_quad*)vqp);
 }
 
-// CHECK-LABEL: @test3(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
-// CHECK-NEXT:    store <256 x i1> [[TMP0]], ptr [[RESP:%.*]], align 32, !tbaa [[TBAA6:![0-9]+]]
+// CHECK-LABEL: define dso_local void @test3(
+// CHECK-SAME: ptr noundef readnone captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 32)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[VC]], <16 x i8> [[VC]])
+// CHECK-NEXT:    store <256 x i1> [[TMP0]], ptr [[RESP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6:![0-9]+]]
 // CHECK-NEXT:    ret void
 //
 void test3(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -53,12 +56,13 @@ void test3(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsi
   *((__vector_pair *)resp) = res;
 }
 
-// CHECK-LABEL: @test4(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32
+// CHECK-LABEL: define dso_local void @test4(
+// CHECK-SAME: ptr noundef readnone captures(none) [[VQP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 32)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP]], align 32
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> [[TMP0]])
 // CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[TMP1]], 0
-// CHECK-NEXT:    store <16 x i8> [[TMP2]], ptr [[RESP:%.*]], align 16
+// CHECK-NEXT:    store <16 x i8> [[TMP2]], ptr [[RESP]], align 16
 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[TMP1]], 1
 // CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[RESP]], i64 16
 // CHECK-NEXT:    store <16 x i8> [[TMP3]], ptr [[TMP4]], align 16
@@ -68,11 +72,12 @@ void test4(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsi
   __builtin_vsx_disassemble_pair(resp, (__vector_pair*)vpp);
 }
 
-// CHECK-LABEL: @test5(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @test5(
+// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xxmtacc(<512 x i1> [[TMP0]])
-// CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 void test5(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -82,11 +87,12 @@ void test5(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsi
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test6(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @test6(
+// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xxmfacc(<512 x i1> [[TMP0]])
-// CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 void test6(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -96,10 +102,11 @@ void test6(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsi
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test7(
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define dso_local void @test7(
+// CHECK-SAME: ptr noundef readnone captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xxsetaccz()
-// CHECK-NEXT:    store <512 x i1> [[TMP0]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <512 x i1> [[TMP0]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 void test7(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -109,10 +116,11 @@ void test7(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsi
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi4ger8(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
-// CHECK-NEXT:    store <512 x i1> [[TMP0]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @test8(
+// CHECK-SAME: ptr noundef readnone captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi4ger8(<16 x i8> [[VC]], <16 x i8> [[VC]])
+// CHECK-NEXT:    store <512 x i1> [[TMP0]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 void test8(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -122,10 +130,11 @@ void test8(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsi
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test9(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi8ger4(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
-// CHECK-NEXT:    store <512 x i1> [[TMP0]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @test9(
+// CHECK-SAME: ptr noundef readnone captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi8ger4(<16 x i8> [[VC]], <16 x i8> [[VC]])
+// CHECK-NEXT:    store <512 x i1> [[TMP0]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 void test9(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -135,10 +144,11 @@ void test9(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsi
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test10(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi16ger2(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
-// CHECK-NEXT:    store <512 x i1> [[TMP0]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @test10(
+// CHECK-SAME: ptr noundef readnone captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi16ger2(<16 x i8> [[VC]], <16 x i8> [[VC]])
+// CHECK-NEXT:    store <512 x i1> [[TMP0]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 void test10(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -148,10 +158,11 @@ void test10(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test11(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi16ger2s(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
-// CHECK-NEXT:    store <512 x i1> [[TMP0]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @test11(
+// CHECK-SAME: ptr noundef readnone captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi16ger2s(<16 x i8> [[VC]], <16 x i8> [[VC]])
+// CHECK-NEXT:    store <512 x i1> [[TMP0]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 void test11(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -161,10 +172,11 @@ void test11(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test12(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
-// CHECK-NEXT:    store <512 x i1> [[TMP0]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @test12(
+// CHECK-SAME: ptr noundef readnone captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2(<16 x i8> [[VC]], <16 x i8> [[VC]])
+// CHECK-NEXT:    store <512 x i1> [[TMP0]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 void test12(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -174,10 +186,11 @@ void test12(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test13(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf32ger(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
-// CHECK-NEXT:    store <512 x i1> [[TMP0]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @test13(
+// CHECK-SAME: ptr noundef readnone captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf32ger(<16 x i8> [[VC]], <16 x i8> [[VC]])
+// CHECK-NEXT:    store <512 x i1> [[TMP0]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 void test13(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -187,11 +200,12 @@ void test13(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test14(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA6]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64ger(<256 x i1> [[TMP0]], <16 x i8> [[VC:%.*]])
-// CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @test14(
+// CHECK-SAME: ptr noundef readnone captures(none) [[VQP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64ger(<256 x i1> [[TMP0]], <16 x i8> [[VC]])
+// CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 void test14(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -201,10 +215,11 @@ void test14(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test15(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi4ger8(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
-// CHECK-NEXT:    store <512 x i1> [[TMP0]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @test15(
+// CHECK-SAME: ptr noundef readnone captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi4ger8(<16 x i8> [[VC]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
+// CHECK-NEXT:    store <512 x i1> [[TMP0]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 void test15(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -214,10 +229,11 @@ void test15(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi8ger4(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
-// CHECK-NEXT:    store <512 x i1> [[TMP0]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @test16(
+// CHECK-SAME: ptr noundef readnone captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi8ger4(<16 x i8> [[VC]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
+// CHECK-NEXT:    store <512 x i1> [[TMP0]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 void test16(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -227,10 +243,11 @@ void test16(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test17(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi16ger2(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
-// CHECK-NEXT:    store <512 x i1> [[TMP0]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @test17(
+// CHECK-SAME: ptr noundef readnone captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi16ger2(<16 x i8> [[VC]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
+// CHECK-NEXT:    store <512 x i1> [[TMP0]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 void test17(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -240,10 +257,11 @@ void test17(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test18(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi16ger2s(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
-// CHECK-NEXT:    store <512 x i1> [[TMP0]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @test18(
+// CHECK-SAME: ptr noundef readnone captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi16ger2s(<16 x i8> [[VC]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
+// CHECK-NEXT:    store <512 x i1> [[TMP0]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 void test18(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -253,10 +271,11 @@ void test18(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test19(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
-// CHECK-NEXT:    store <512 x i1> [[TMP0]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @test19(
+// CHECK-SAME: ptr noundef readnone captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2(<16 x i8> [[VC]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
+// CHECK-NEXT:    store <512 x i1> [[TMP0]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 void test19(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -266,10 +285,11 @@ void test19(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test20(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf32ger(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0)
-// CHECK-NEXT:    store <512 x i1> [[TMP0]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @test20(
+// CHECK-SAME: ptr noundef readnone captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf32ger(<16 x i8> [[VC]], <16 x i8> [[VC]], i32 0, i32 0)
+// CHECK-NEXT:    store <512 x i1> [[TMP0]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 void test20(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -279,11 +299,12 @@ void test20(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test21(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA6]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64ger(<256 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], i32 0, i32 0)
-// CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @test21(
+// CHECK-SAME: ptr noundef readnone captures(none) [[VQP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64ger(<256 x i1> [[TMP0]], <16 x i8> [[VC]], i32 0, i32 0)
+// CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 void test21(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -293,11 +314,12 @@ void test21(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test22(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi4ger8pp(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
-// CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @test22(
+// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi4ger8pp(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]])
+// CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 void test22(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -307,11 +329,12 @@ void test22(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test23(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi8ger4pp(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
-// CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @test23(
+// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi8ger4pp(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]])
+// CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 void test23(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -321,11 +344,12 @@ void test23(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test24(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi8ger4spp(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
-// CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @test24(
+// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi8ger4spp(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]])
+// CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 void test24(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -335,11 +359,12 @@ void test24(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test25(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi16ger2pp(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
-// CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @test25(
+// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi16ger2pp(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]])
+// CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 void test25(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -349,11 +374,12 @@ void test25(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test26(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi16ger2spp(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
-// CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @test26(
+// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi16ger2spp(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]])
+// CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 void test26(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -363,11 +389,12 @@ void test26(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test27(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi4ger8pp(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
-// CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @test27(
+// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi4ger8pp(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
+// CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 void test27(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -377,11 +404,12 @@ void test27(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test28(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi8ger4pp(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
-// CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @test28(
+// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi8ger4pp(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
+// CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 void test28(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -391,11 +419,12 @@ void test28(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test29(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi8ger4spp(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
-// CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @test29(
+// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi8ger4spp(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
+// CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 void test29(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -405,11 +434,12 @@ void test29(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test30(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi16ger2pp(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
-// CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @test30(
+// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi16ger2pp(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
+// CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 void test30(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -419,11 +449,12 @@ void test30(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test31(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi16ger2spp(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
-// CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @test31(
+// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi16ger2spp(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
+// CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 void test31(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -433,11 +464,12 @@ void test31(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2pp(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
-// CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @test32(
+// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2pp(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]])
+// CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 void test32(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -447,11 +479,12 @@ void test32(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test33(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2pn(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
-// CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @test33(
+// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2pn(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]])
+// CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 void test33(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -461,11 +494,12 @@ void test33(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test34(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2np(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
-// CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @test34(
+// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2np(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]])
+// CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 void test34(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -475,11 +509,12 @@ void test34(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test35(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2nn(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
-// CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @test35(
+// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2nn(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]])
+// CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 void test35(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -489,11 +524,12 @@ void test35(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test36(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2pp(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
-// CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @test36(
+// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2pp(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
+// CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 void test36(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -503,11 +539,12 @@ void test36(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test37(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2pn(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
-// CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @test37(
+// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2pn(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
+// CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 void test37(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -517,11 +554,12 @@ void test37(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test38(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2np(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
-// CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @test38(
+// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2np(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
+// CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 void test38(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -531,11 +569,12 @@ void test38(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test39(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2nn(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
-// CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @test39(
+// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2nn(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
+// CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 void test39(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -545,11 +584,12 @@ void test39(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test40(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
-// CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @test40(
+// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]])
+// CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 void test40(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -559,11 +599,12 @@ void test40(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test41(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf32gerpn(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
-// CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @test41(
+// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf32gerpn(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]])
+// CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 void test41(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -573,11 +614,12 @@ void test41(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test42(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf32gernp(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
-// CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @test42(
+// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf32gernp(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]])
+// CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 void test42(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -587,11 +629,12 @@ void test42(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test43(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf32gernn(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
-// CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @test43(
+// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf32gernn(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]])
+// CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 void test43(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -601,11 +644,12 @@ void test43(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test44(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gerpp(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0)
-// CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @test44(
+// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gerpp(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]], i32 0, i32 0)
+// CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 void test44(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -615,11 +659,12 @@ void test44(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test45(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gerpn(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0)
-// CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @test45(
+// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gerpn(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]], i32 0, i32 0)
+// CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 void test45(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -629,11 +674,12 @@ void test45(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test46(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gernp(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0)
-// CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @test46(
+// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gernp(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]], i32 0, i32 0)
+// CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 void test46(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -643,11 +689,12 @@ void test46(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test47(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gernn(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0)
-// CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @test47(
+// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gernn(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]], i32 0, i32 0)
+// CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 void test47(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -657,12 +704,13 @@ void test47(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test48(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA6]]
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]])
-// CHECK-NEXT:    store <512 x i1> [[TMP2]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @test48(
+// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]])
+// CHECK-NEXT:    store <512 x i1> [[TMP2]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 void test48(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -672,12 +720,13 @@ void test48(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test49(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA6]]
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpn(<512 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]])
-// CHECK-NEXT:    store <512 x i1> [[TMP2]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @test49(
+// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpn(<512 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]])
+// CHECK-NEXT:    store <512 x i1> [[TMP2]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 void test49(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -687,12 +736,13 @@ void test49(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test50(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA6]]
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]])
-// CHECK-NEXT:    store <512 x i1> [[TMP2]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @test50(
+// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]])
+// CHECK-NEXT:    store <512 x i1> [[TMP2]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 void test50(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -702,12 +752,13 @@ void test50(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test51(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA6]]
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gernn(<512 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]])
-// CHECK-NEXT:    store <512 x i1> [[TMP2]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @test51(
+// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gernn(<512 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]])
+// CHECK-NEXT:    store <512 x i1> [[TMP2]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 void test51(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -717,12 +768,13 @@ void test51(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test52(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA6]]
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gerpp(<512 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], i32 0, i32 0)
-// CHECK-NEXT:    store <512 x i1> [[TMP2]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @test52(
+// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gerpp(<512 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]], i32 0, i32 0)
+// CHECK-NEXT:    store <512 x i1> [[TMP2]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 void test52(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -732,12 +784,13 @@ void test52(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test53(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA6]]
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gerpn(<512 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], i32 0, i32 0)
-// CHECK-NEXT:    store <512 x i1> [[TMP2]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @test53(
+// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gerpn(<512 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]], i32 0, i32 0)
+// CHECK-NEXT:    store <512 x i1> [[TMP2]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 void test53(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -747,12 +800,13 @@ void test53(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test54(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA6]]
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gernp(<512 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], i32 0, i32 0)
-// CHECK-NEXT:    store <512 x i1> [[TMP2]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @test54(
+// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gernp(<512 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]], i32 0, i32 0)
+// CHECK-NEXT:    store <512 x i1> [[TMP2]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 void test54(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -762,12 +816,13 @@ void test54(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test55(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA6]]
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], i32 0, i32 0)
-// CHECK-NEXT:    store <512 x i1> [[TMP2]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @test55(
+// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]], i32 0, i32 0)
+// CHECK-NEXT:    store <512 x i1> [[TMP2]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 void test55(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -777,10 +832,11 @@ void test55(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test56(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvbf16ger2(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
-// CHECK-NEXT:    store <512 x i1> [[TMP0]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @test56(
+// CHECK-SAME: ptr noundef readnone captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvbf16ger2(<16 x i8> [[VC]], <16 x i8> [[VC]])
+// CHECK-NEXT:    store <512 x i1> [[TMP0]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 void test56(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -790,10 +846,11 @@ void test56(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test57(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
-// CHECK-NEXT:    store <512 x i1> [[TMP0]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @test57(
+// CHECK-SAME: ptr noundef readnone captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2(<16 x i8> [[VC]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
+// CHECK-NEXT:    store <512 x i1> [[TMP0]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 void test57(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -803,11 +860,12 @@ void test57(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test58(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvbf16ger2pp(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
-// CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @test58(
+// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvbf16ger2pp(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]])
+// CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 void test58(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -817,11 +875,12 @@ void test58(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test59(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvbf16ger2pn(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
-// CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @test59(
+// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvbf16ger2pn(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]])
+// CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 void test59(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -831,11 +890,12 @@ void test59(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test60(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvbf16ger2np(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
-// CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @test60(
+// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvbf16ger2np(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]])
+// CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 void test60(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -845,11 +905,12 @@ void test60(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test61(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvbf16ger2nn(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
-// CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @test61(
+// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvbf16ger2nn(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]])
+// CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 void test61(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -859,11 +920,12 @@ void test61(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test62(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2pp(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
-// CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @test62(
+// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2pp(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
+// CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 void test62(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -873,11 +935,12 @@ void test62(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test63(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2pn(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
-// CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @test63(
+// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2pn(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
+// CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 void test63(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -887,11 +950,12 @@ void test63(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test64(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2np(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
-// CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @test64(
+// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2np(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
+// CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 void test64(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -901,11 +965,12 @@ void test64(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test65(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2nn(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
-// CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @test65(
+// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2nn(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
+// CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 void test65(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -915,10 +980,11 @@ void test65(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test66(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[VPP:%.*]])
-// CHECK-NEXT:    tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP0]], ptr [[VP2:%.*]])
+// CHECK-LABEL: define dso_local void @test66(
+// CHECK-SAME: ptr noundef [[VPP:%.*]], ptr noundef [[VP2:%.*]]) local_unnamed_addr #[[ATTR3:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[VPP]])
+// CHECK-NEXT:    tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP0]], ptr [[VP2]])
 // CHECK-NEXT:    ret void
 //
 void test66(const __vector_pair *vpp, __vector_pair *vp2) {
@@ -926,11 +992,12 @@ void test66(const __vector_pair *vpp, __vector_pair *vp2) {
   __builtin_vsx_stxvp(vp, 0L, vp2);
 }
 
-// CHECK-LABEL: @test67(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[VPP:%.*]], i64 [[OFFSET:%.*]]
+// CHECK-LABEL: define dso_local void @test67(
+// CHECK-SAME: ptr noundef [[VPP:%.*]], i64 noundef [[OFFSET:%.*]], ptr noundef [[VP2:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[VPP]], i64 [[OFFSET]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[VP2:%.*]], i64 [[OFFSET]]
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[VP2]], i64 [[OFFSET]]
 // CHECK-NEXT:    tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP1]], ptr [[TMP2]])
 // CHECK-NEXT:    ret void
 //
@@ -939,11 +1006,12 @@ void test67(const __vector_pair *vpp, signed long offset, __vector_pair *vp2) {
   __builtin_vsx_stxvp(vp, offset, vp2);
 }
 
-// CHECK-LABEL: @test68(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[VPP:%.*]], i64 18
+// CHECK-LABEL: define dso_local void @test68(
+// CHECK-SAME: ptr noundef [[VPP:%.*]], ptr noundef [[VP2:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[VPP]], i64 18
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[VP2:%.*]], i64 18
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[VP2]], i64 18
 // CHECK-NEXT:    tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP1]], ptr [[TMP2]])
 // CHECK-NEXT:    ret void
 //
@@ -952,11 +1020,12 @@ void test68(const __vector_pair *vpp, __vector_pair *vp2) {
   __builtin_vsx_stxvp(vp, 18L, vp2);
 }
 
-// CHECK-LABEL: @test69(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[VPP:%.*]], i64 1
+// CHECK-LABEL: define dso_local void @test69(
+// CHECK-SAME: ptr noundef [[VPP:%.*]], ptr noundef [[VP2:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[VPP]], i64 1
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[VP2:%.*]], i64 1
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[VP2]], i64 1
 // CHECK-NEXT:    tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP1]], ptr [[TMP2]])
 // CHECK-NEXT:    ret void
 //
@@ -965,11 +1034,12 @@ void test69(const __vector_pair *vpp, __vector_pair *vp2) {
   __builtin_vsx_stxvp(vp, 1L, vp2);
 }
 
-// CHECK-LABEL: @test70(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[VPP:%.*]], i64 42
+// CHECK-LABEL: define dso_local void @test70(
+// CHECK-SAME: ptr noundef [[VPP:%.*]], ptr noundef [[VP2:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[VPP]], i64 42
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[VP2:%.*]], i64 42
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[VP2]], i64 42
 // CHECK-NEXT:    tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP1]], ptr [[TMP2]])
 // CHECK-NEXT:    ret void
 //
@@ -978,11 +1048,12 @@ void test70(const __vector_pair *vpp, __vector_pair *vp2) {
   __builtin_vsx_stxvp(vp, 42L, vp2);
 }
 
-// CHECK-LABEL: @test71(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[VPP:%.*]], i64 32768
+// CHECK-LABEL: define dso_local void @test71(
+// CHECK-SAME: ptr noundef [[VPP:%.*]], ptr noundef [[VP2:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[VPP]], i64 32768
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[VP2:%.*]], i64 32768
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[VP2]], i64 32768
 // CHECK-NEXT:    tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP1]], ptr [[TMP2]])
 // CHECK-NEXT:    ret void
 //
@@ -991,11 +1062,12 @@ void test71(const __vector_pair *vpp, __vector_pair *vp2) {
   __builtin_vsx_stxvp(vp, 32768L, vp2);
 }
 
-// CHECK-LABEL: @test72(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[VPP:%.*]], i64 32799
+// CHECK-LABEL: define dso_local void @test72(
+// CHECK-SAME: ptr noundef [[VPP:%.*]], ptr noundef [[VP2:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[VPP]], i64 32799
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[VP2:%.*]], i64 32799
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[VP2]], i64 32799
 // CHECK-NEXT:    tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP1]], ptr [[TMP2]])
 // CHECK-NEXT:    ret void
 //
@@ -1004,13 +1076,14 @@ void test72(const __vector_pair *vpp, __vector_pair *vp2) {
   __builtin_vsx_stxvp(vp, 32799L, vp2);
 }
 
-// CHECK-LABEL: @test73(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[VPP:%.*]], i64 8
+// CHECK-LABEL: define dso_local void @test73(
+// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[VPP]], i64 8
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1> [[TMP0]], <256 x i1> [[TMP2]], <16 x i8> [[VC:%.*]], i32 0, i32 0)
-// CHECK-NEXT:    store <512 x i1> [[TMP3]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1> [[TMP0]], <256 x i1> [[TMP2]], <16 x i8> [[VC]], i32 0, i32 0)
+// CHECK-NEXT:    store <512 x i1> [[TMP3]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 void test73(unsigned char *vqp, const __vector_pair *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -1020,12 +1093,13 @@ void test73(unsigned char *vqp, const __vector_pair *vpp, vector unsigned char v
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test74(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[VPP:%.*]])
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]])
-// CHECK-NEXT:    store <512 x i1> [[TMP2]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @test74(
+// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[VPP]])
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]])
+// CHECK-NEXT:    store <512 x i1> [[TMP2]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 void test74(unsigned char *vqp, const __vector_pair *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -1035,13 +1109,14 @@ void test74(unsigned char *vqp, const __vector_pair *vpp, vector unsigned char v
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test75(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[VPP:%.*]], i64 [[OFFS:%.*]]
+// CHECK-LABEL: define dso_local void @test75(
+// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], i64 noundef [[OFFS:%.*]], ptr noundef [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[VPP]], i64 [[OFFS]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> [[TMP0]], <256 x i1> [[TMP2]], <16 x i8> [[VC:%.*]])
-// CHECK-NEXT:    store <512 x i1> [[TMP3]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> [[TMP0]], <256 x i1> [[TMP2]], <16 x i8> [[VC]])
+// CHECK-NEXT:    store <512 x i1> [[TMP3]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 void test75(unsigned char *vqp, signed long offs, const __vector_pair *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -1051,10 +1126,11 @@ void test75(unsigned char *vqp, signed long offs, const __vector_pair *vpp, vect
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test76(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
-// CHECK-NEXT:    store <256 x i1> [[TMP0]], ptr [[RESP:%.*]], align 32, !tbaa [[TBAA6]]
+// CHECK-LABEL: define dso_local void @test76(
+// CHECK-SAME: ptr noundef readnone captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 32)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[VC]], <16 x i8> [[VC]])
+// CHECK-NEXT:    store <256 x i1> [[TMP0]], ptr [[RESP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]]
 // CHECK-NEXT:    ret void
 //
 void test76(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -1065,12 +1141,13 @@ void test76(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_pair *)resp) = res;
 }
 
-// CHECK-LABEL: @test77(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32
+// CHECK-LABEL: define dso_local void @test77(
+// CHECK-SAME: ptr noundef readnone captures(none) [[VQP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 32)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP]], align 32
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> [[TMP0]])
 // CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[TMP1]], 0
-// CHECK-NEXT:    store <16 x i8> [[TMP2]], ptr [[RESP:%.*]], align 16
+// CHECK-NEXT:    store <16 x i8> [[TMP2]], ptr [[RESP]], align 16
 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[TMP1]], 1
 // CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[RESP]], i64 16
 // CHECK-NEXT:    store <16 x i8> [[TMP3]], ptr [[TMP4]], align 16
@@ -1080,10 +1157,11 @@ void test77(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   __builtin_mma_disassemble_pair(resp, (__vector_pair*)vpp);
 }
 
-// CHECK-LABEL: @test78(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[VPP:%.*]])
-// CHECK-NEXT:    tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP0]], ptr [[VP2:%.*]])
+// CHECK-LABEL: define dso_local void @test78(
+// CHECK-SAME: ptr noundef [[VPP:%.*]], ptr noundef [[VP2:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[VPP]])
+// CHECK-NEXT:    tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP0]], ptr [[VP2]])
 // CHECK-NEXT:    ret void
 //
 void test78(const __vector_pair *vpp, __vector_pair *vp2) {
@@ -1091,11 +1169,12 @@ void test78(const __vector_pair *vpp, __vector_pair *vp2) {
   __builtin_mma_stxvp(vp, 0L, vp2);
 }
 
-// CHECK-LABEL: @test79(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[VPP:%.*]], i64 [[OFFSET:%.*]]
+// CHECK-LABEL: define dso_local void @test79(
+// CHECK-SAME: ptr noundef [[VPP:%.*]], i64 noundef [[OFFSET:%.*]], ptr noundef [[VP2:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[VPP]], i64 [[OFFSET]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[VP2:%.*]], i64 [[OFFSET]]
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[VP2]], i64 [[OFFSET]]
 // CHECK-NEXT:    tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP1]], ptr [[TMP2]])
 // CHECK-NEXT:    ret void
 //
@@ -1104,11 +1183,12 @@ void test79(const __vector_pair *vpp, signed long offset, __vector_pair *vp2) {
   __builtin_mma_stxvp(vp, offset, vp2);
 }
 
-// CHECK-LABEL: @test80(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[VPP:%.*]], i64 18
+// CHECK-LABEL: define dso_local void @test80(
+// CHECK-SAME: ptr noundef [[VPP:%.*]], ptr noundef [[VP2:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[VPP]], i64 18
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[VP2:%.*]], i64 18
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[VP2]], i64 18
 // CHECK-NEXT:    tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP1]], ptr [[TMP2]])
 // CHECK-NEXT:    ret void
 //
@@ -1117,11 +1197,12 @@ void test80(const __vector_pair *vpp, __vector_pair *vp2) {
   __builtin_mma_stxvp(vp, 18L, vp2);
 }
 
-// CHECK-LABEL: @test81(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[VPP:%.*]], i64 1
+// CHECK-LABEL: define dso_local void @test81(
+// CHECK-SAME: ptr noundef [[VPP:%.*]], ptr noundef [[VP2:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[VPP]], i64 1
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[VP2:%.*]], i64 1
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[VP2]], i64 1
 // CHECK-NEXT:    tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP1]], ptr [[TMP2]])
 // CHECK-NEXT:    ret void
 //
@@ -1130,11 +1211,12 @@ void test81(const __vector_pair *vpp, __vector_pair *vp2) {
   __builtin_mma_stxvp(vp, 1L, vp2);
 }
 
-// CHECK-LABEL: @test82(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[VPP:%.*]], i64 42
+// CHECK-LABEL: define dso_local void @test82(
+// CHECK-SAME: ptr noundef [[VPP:%.*]], ptr noundef [[VP2:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[VPP]], i64 42
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[VP2:%.*]], i64 42
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[VP2]], i64 42
 // CHECK-NEXT:    tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP1]], ptr [[TMP2]])
 // CHECK-NEXT:    ret void
 //
@@ -1143,11 +1225,12 @@ void test82(const __vector_pair *vpp, __vector_pair *vp2) {
   __builtin_mma_stxvp(vp, 42L, vp2);
 }
 
-// CHECK-LABEL: @test83(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[VPP:%.*]], i64 32768
+// CHECK-LABEL: define dso_local void @test83(
+// CHECK-SAME: ptr noundef [[VPP:%.*]], ptr noundef [[VP2:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[VPP]], i64 32768
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[VP2:%.*]], i64 32768
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[VP2]], i64 32768
 // CHECK-NEXT:    tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP1]], ptr [[TMP2]])
 // CHECK-NEXT:    ret void
 //
@@ -1156,11 +1239,12 @@ void test83(const __vector_pair *vpp, __vector_pair *vp2) {
   __builtin_mma_stxvp(vp, 32768L, vp2);
 }
 
-// CHECK-LABEL: @test84(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[VPP:%.*]], i64 32799
+// CHECK-LABEL: define dso_local void @test84(
+// CHECK-SAME: ptr noundef [[VPP:%.*]], ptr noundef [[VP2:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[VPP]], i64 32799
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[VP2:%.*]], i64 32799
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[VP2]], i64 32799
 // CHECK-NEXT:    tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP1]], ptr [[TMP2]])
 // CHECK-NEXT:    ret void
 //
@@ -1169,13 +1253,14 @@ void test84(const __vector_pair *vpp, __vector_pair *vp2) {
   __builtin_mma_stxvp(vp, 32799L, vp2);
 }
 
-// CHECK-LABEL: @test85(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[VPP:%.*]], i64 8
+// CHECK-LABEL: define dso_local void @test85(
+// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[VPP]], i64 8
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1> [[TMP0]], <256 x i1> [[TMP2]], <16 x i8> [[VC:%.*]], i32 0, i32 0)
-// CHECK-NEXT:    store <512 x i1> [[TMP3]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1> [[TMP0]], <256 x i1> [[TMP2]], <16 x i8> [[VC]], i32 0, i32 0)
+// CHECK-NEXT:    store <512 x i1> [[TMP3]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 void test85(unsigned char *vqp, const __vector_pair *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -1185,12 +1270,13 @@ void test85(unsigned char *vqp, const __vector_pair *vpp, vector unsigned char v
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test86(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[VPP:%.*]])
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]])
-// CHECK-NEXT:    store <512 x i1> [[TMP2]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @test86(
+// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[VPP]])
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]])
+// CHECK-NEXT:    store <512 x i1> [[TMP2]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 void test86(unsigned char *vqp, const __vector_pair *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -1200,13 +1286,14 @@ void test86(unsigned char *vqp, const __vector_pair *vpp, vector unsigned char v
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test87(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[VPP:%.*]], i64 [[OFFS:%.*]]
+// CHECK-LABEL: define dso_local void @test87(
+// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], i64 noundef [[OFFS:%.*]], ptr noundef [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[VPP]], i64 [[OFFS]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> [[TMP0]], <256 x i1> [[TMP2]], <16 x i8> [[VC:%.*]])
-// CHECK-NEXT:    store <512 x i1> [[TMP3]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> [[TMP0]], <256 x i1> [[TMP2]], <16 x i8> [[VC]])
+// CHECK-NEXT:    store <512 x i1> [[TMP3]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 void test87(unsigned char *vqp, signed long offs, const __vector_pair *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -1215,3 +1302,11 @@ void test87(unsigned char *vqp, signed long offs, const __vector_pair *vpp, vect
   __builtin_mma_xvf64gernp(&vq, vp, vc);
   *((__vector_quad *)resp) = vq;
 }
+//.
+// CHECK: [[__VECTOR_QUAD_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0}
+// CHECK: [[META3]] = !{!"__vector_quad", [[META4:![0-9]+]], i64 0}
+// CHECK: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0}
+// CHECK: [[META5]] = !{!"Simple C/C++ TBAA"}
+// CHECK: [[__VECTOR_PAIR_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0}
+// CHECK: [[META7]] = !{!"__vector_pair", [[META4]], i64 0}
+//.
diff --git a/clang/test/CodeGen/RISCV/attr-rvv-vector-bits-bitcast-less-8.c b/clang/test/CodeGen/RISCV/attr-rvv-vector-bits-bitcast-less-8.c
index 45a099d..1f0b3d4 100644
--- a/clang/test/CodeGen/RISCV/attr-rvv-vector-bits-bitcast-less-8.c
+++ b/clang/test/CodeGen/RISCV/attr-rvv-vector-bits-bitcast-less-8.c
@@ -1,4 +1,4 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
 // RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -mvscale-min=2 -mvscale-max=2 -O1 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-128
 
 // REQUIRES: riscv-registered-target
@@ -53,10 +53,11 @@ DEFINE_STRUCT(bool64)
 // bool
 //===----------------------------------------------------------------------===//
 
-// CHECK-128-LABEL: @read_bool32(
-// CHECK-128-NEXT:  entry:
-// CHECK-128-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 1
-// CHECK-128-NEXT:    [[TMP0:%.*]] = load <1 x i8>, ptr [[Y]], align 1, !tbaa [[TBAA6:![0-9]+]]
+// CHECK-128-LABEL: define dso_local <vscale x 2 x i1> @read_bool32(
+// CHECK-128-SAME: ptr noundef readonly captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-128-NEXT:  [[ENTRY:.*:]]
+// CHECK-128-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 1
+// CHECK-128-NEXT:    [[TMP0:%.*]] = load <1 x i8>, ptr [[Y]], align 1, !tbaa [[CHAR_TBAA6:![0-9]+]]
 // CHECK-128-NEXT:    [[CAST_SCALABLE:%.*]] = tail call <vscale x 1 x i8> @llvm.vector.insert.nxv1i8.v1i8(<vscale x 1 x i8> poison, <1 x i8> [[TMP0]], i64 0)
 // CHECK-128-NEXT:    [[TMP1:%.*]] = bitcast <vscale x 1 x i8> [[CAST_SCALABLE]] to <vscale x 8 x i1>
 // CHECK-128-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i1> @llvm.vector.extract.nxv2i1.nxv8i1(<vscale x 8 x i1> [[TMP1]], i64 0)
@@ -66,23 +67,25 @@ vbool32_t read_bool32(struct struct_bool32 *s) {
   return s->y[0];
 }
 
-// CHECK-128-LABEL: @write_bool32(
-// CHECK-128-NEXT:  entry:
-// CHECK-128-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.vector.insert.nxv8i1.nxv2i1(<vscale x 8 x i1> zeroinitializer, <vscale x 2 x i1> [[X:%.*]], i64 0)
+// CHECK-128-LABEL: define dso_local void @write_bool32(
+// CHECK-128-SAME: ptr noundef writeonly captures(none) initializes((1, 2)) [[S:%.*]], <vscale x 2 x i1> [[X:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] {
+// CHECK-128-NEXT:  [[ENTRY:.*:]]
+// CHECK-128-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.vector.insert.nxv8i1.nxv2i1(<vscale x 8 x i1> zeroinitializer, <vscale x 2 x i1> [[X]], i64 0)
 // CHECK-128-NEXT:    [[TMP1:%.*]] = bitcast <vscale x 8 x i1> [[TMP0]] to <vscale x 1 x i8>
 // CHECK-128-NEXT:    [[CAST_FIXED:%.*]] = tail call <1 x i8> @llvm.vector.extract.v1i8.nxv1i8(<vscale x 1 x i8> [[TMP1]], i64 0)
-// CHECK-128-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 1
-// CHECK-128-NEXT:    store <1 x i8> [[CAST_FIXED]], ptr [[Y]], align 1, !tbaa [[TBAA6]]
+// CHECK-128-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 1
+// CHECK-128-NEXT:    store <1 x i8> [[CAST_FIXED]], ptr [[Y]], align 1, !tbaa [[CHAR_TBAA6]]
 // CHECK-128-NEXT:    ret void
 //
 void write_bool32(struct struct_bool32 *s, vbool32_t x) {
   s->y[0] = x;
 }
 
-// CHECK-128-LABEL: @read_bool64(
-// CHECK-128-NEXT:  entry:
-// CHECK-128-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 1
-// CHECK-128-NEXT:    [[TMP0:%.*]] = load <1 x i8>, ptr [[Y]], align 1, !tbaa [[TBAA6]]
+// CHECK-128-LABEL: define dso_local <vscale x 1 x i1> @read_bool64(
+// CHECK-128-SAME: ptr noundef readonly captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-128-NEXT:  [[ENTRY:.*:]]
+// CHECK-128-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 1
+// CHECK-128-NEXT:    [[TMP0:%.*]] = load <1 x i8>, ptr [[Y]], align 1, !tbaa [[CHAR_TBAA6]]
 // CHECK-128-NEXT:    [[CAST_SCALABLE:%.*]] = tail call <vscale x 1 x i8> @llvm.vector.insert.nxv1i8.v1i8(<vscale x 1 x i8> poison, <1 x i8> [[TMP0]], i64 0)
 // CHECK-128-NEXT:    [[TMP1:%.*]] = bitcast <vscale x 1 x i8> [[CAST_SCALABLE]] to <vscale x 8 x i1>
 // CHECK-128-NEXT:    [[TMP2:%.*]] = tail call <vscale x 1 x i1> @llvm.vector.extract.nxv1i1.nxv8i1(<vscale x 8 x i1> [[TMP1]], i64 0)
@@ -92,15 +95,21 @@ vbool64_t read_bool64(struct struct_bool64 *s) {
   return s->y[0];
 }
 
-// CHECK-128-LABEL: @write_bool64(
-// CHECK-128-NEXT:  entry:
-// CHECK-128-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.vector.insert.nxv8i1.nxv1i1(<vscale x 8 x i1> zeroinitializer, <vscale x 1 x i1> [[X:%.*]], i64 0)
+// CHECK-128-LABEL: define dso_local void @write_bool64(
+// CHECK-128-SAME: ptr noundef writeonly captures(none) initializes((1, 2)) [[S:%.*]], <vscale x 1 x i1> [[X:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-128-NEXT:  [[ENTRY:.*:]]
+// CHECK-128-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.vector.insert.nxv8i1.nxv1i1(<vscale x 8 x i1> zeroinitializer, <vscale x 1 x i1> [[X]], i64 0)
 // CHECK-128-NEXT:    [[TMP1:%.*]] = bitcast <vscale x 8 x i1> [[TMP0]] to <vscale x 1 x i8>
 // CHECK-128-NEXT:    [[CAST_FIXED:%.*]] = tail call <1 x i8> @llvm.vector.extract.v1i8.nxv1i8(<vscale x 1 x i8> [[TMP1]], i64 0)
-// CHECK-128-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 1
-// CHECK-128-NEXT:    store <1 x i8> [[CAST_FIXED]], ptr [[Y]], align 1, !tbaa [[TBAA6]]
+// CHECK-128-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 1
+// CHECK-128-NEXT:    store <1 x i8> [[CAST_FIXED]], ptr [[Y]], align 1, !tbaa [[CHAR_TBAA6]]
 // CHECK-128-NEXT:    ret void
 //
 void write_bool64(struct struct_bool64 *s, vbool64_t x) {
   s->y[0] = x;
 }
+//.
+// CHECK-128: [[CHAR_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0}
+// CHECK-128: [[META7]] = !{!"omnipotent char", [[META8:![0-9]+]], i64 0}
+// CHECK-128: [[META8]] = !{!"Simple C/C++ TBAA"}
+//.
diff --git a/clang/test/CodeGen/RISCV/attr-rvv-vector-bits-bitcast.c b/clang/test/CodeGen/RISCV/attr-rvv-vector-bits-bitcast.c
index ecde52e..b92e6df 100644
--- a/clang/test/CodeGen/RISCV/attr-rvv-vector-bits-bitcast.c
+++ b/clang/test/CodeGen/RISCV/attr-rvv-vector-bits-bitcast.c
@@ -1,4 +1,4 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
 // RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -mvscale-min=1 -mvscale-max=1 -O1 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-64
 // RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -mvscale-min=2 -mvscale-max=2 -O1 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-128
 // RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -mvscale-min=4 -mvscale-max=4 -O1 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-256
@@ -67,24 +67,27 @@ DEFINE_STRUCT(bool64)
 // int64
 //===----------------------------------------------------------------------===//
 
-// CHECK-64-LABEL: @read_int64m1(
-// CHECK-64-NEXT:  entry:
-// CHECK-64-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 8
-// CHECK-64-NEXT:    [[TMP0:%.*]] = load <1 x i64>, ptr [[Y]], align 8, !tbaa [[TBAA6:![0-9]+]]
+// CHECK-64-LABEL: define dso_local <vscale x 1 x i64> @read_int64m1(
+// CHECK-64-SAME: ptr noundef readonly captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-64-NEXT:  [[ENTRY:.*:]]
+// CHECK-64-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 8
+// CHECK-64-NEXT:    [[TMP0:%.*]] = load <1 x i64>, ptr [[Y]], align 8, !tbaa [[CHAR_TBAA6:![0-9]+]]
 // CHECK-64-NEXT:    [[CAST_SCALABLE:%.*]] = tail call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v1i64(<vscale x 1 x i64> poison, <1 x i64> [[TMP0]], i64 0)
 // CHECK-64-NEXT:    ret <vscale x 1 x i64> [[CAST_SCALABLE]]
 //
-// CHECK-128-LABEL: @read_int64m1(
-// CHECK-128-NEXT:  entry:
-// CHECK-128-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 16
-// CHECK-128-NEXT:    [[TMP0:%.*]] = load <2 x i64>, ptr [[Y]], align 8, !tbaa [[TBAA6:![0-9]+]]
+// CHECK-128-LABEL: define dso_local <vscale x 1 x i64> @read_int64m1(
+// CHECK-128-SAME: ptr noundef readonly captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-128-NEXT:  [[ENTRY:.*:]]
+// CHECK-128-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 16
+// CHECK-128-NEXT:    [[TMP0:%.*]] = load <2 x i64>, ptr [[Y]], align 8, !tbaa [[CHAR_TBAA6:![0-9]+]]
 // CHECK-128-NEXT:    [[CAST_SCALABLE:%.*]] = tail call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v2i64(<vscale x 1 x i64> poison, <2 x i64> [[TMP0]], i64 0)
 // CHECK-128-NEXT:    ret <vscale x 1 x i64> [[CAST_SCALABLE]]
 //
-// CHECK-256-LABEL: @read_int64m1(
-// CHECK-256-NEXT:  entry:
-// CHECK-256-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 32
-// CHECK-256-NEXT:    [[TMP0:%.*]] = load <4 x i64>, ptr [[Y]], align 8, !tbaa [[TBAA6:![0-9]+]]
+// CHECK-256-LABEL: define dso_local <vscale x 1 x i64> @read_int64m1(
+// CHECK-256-SAME: ptr noundef readonly captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-256-NEXT:  [[ENTRY:.*:]]
+// CHECK-256-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 32
+// CHECK-256-NEXT:    [[TMP0:%.*]] = load <4 x i64>, ptr [[Y]], align 8, !tbaa [[CHAR_TBAA6:![0-9]+]]
 // CHECK-256-NEXT:    [[CAST_SCALABLE:%.*]] = tail call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> poison, <4 x i64> [[TMP0]], i64 0)
 // CHECK-256-NEXT:    ret <vscale x 1 x i64> [[CAST_SCALABLE]]
 //
@@ -92,25 +95,28 @@ vint64m1_t read_int64m1(struct struct_int64m1 *s) {
   return s->y[0];
 }
 
-// CHECK-64-LABEL: @write_int64m1(
-// CHECK-64-NEXT:  entry:
-// CHECK-64-NEXT:    [[CAST_FIXED:%.*]] = tail call <1 x i64> @llvm.vector.extract.v1i64.nxv1i64(<vscale x 1 x i64> [[X:%.*]], i64 0)
-// CHECK-64-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 8
-// CHECK-64-NEXT:    store <1 x i64> [[CAST_FIXED]], ptr [[Y]], align 8, !tbaa [[TBAA6]]
+// CHECK-64-LABEL: define dso_local void @write_int64m1(
+// CHECK-64-SAME: ptr noundef writeonly captures(none) initializes((8, 16)) [[S:%.*]], <vscale x 1 x i64> [[X:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] {
+// CHECK-64-NEXT:  [[ENTRY:.*:]]
+// CHECK-64-NEXT:    [[CAST_FIXED:%.*]] = tail call <1 x i64> @llvm.vector.extract.v1i64.nxv1i64(<vscale x 1 x i64> [[X]], i64 0)
+// CHECK-64-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 8
+// CHECK-64-NEXT:    store <1 x i64> [[CAST_FIXED]], ptr [[Y]], align 8, !tbaa [[CHAR_TBAA6]]
 // CHECK-64-NEXT:    ret void
 //
-// CHECK-128-LABEL: @write_int64m1(
-// CHECK-128-NEXT:  entry:
-// CHECK-128-NEXT:    [[CAST_FIXED:%.*]] = tail call <2 x i64> @llvm.vector.extract.v2i64.nxv1i64(<vscale x 1 x i64> [[X:%.*]], i64 0)
-// CHECK-128-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 16
-// CHECK-128-NEXT:    store <2 x i64> [[CAST_FIXED]], ptr [[Y]], align 8, !tbaa [[TBAA6]]
+// CHECK-128-LABEL: define dso_local void @write_int64m1(
+// CHECK-128-SAME: ptr noundef writeonly captures(none) initializes((16, 32)) [[S:%.*]], <vscale x 1 x i64> [[X:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] {
+// CHECK-128-NEXT:  [[ENTRY:.*:]]
+// CHECK-128-NEXT:    [[CAST_FIXED:%.*]] = tail call <2 x i64> @llvm.vector.extract.v2i64.nxv1i64(<vscale x 1 x i64> [[X]], i64 0)
+// CHECK-128-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 16
+// CHECK-128-NEXT:    store <2 x i64> [[CAST_FIXED]], ptr [[Y]], align 8, !tbaa [[CHAR_TBAA6]]
 // CHECK-128-NEXT:    ret void
 //
-// CHECK-256-LABEL: @write_int64m1(
-// CHECK-256-NEXT:  entry:
-// CHECK-256-NEXT:    [[CAST_FIXED:%.*]] = tail call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[X:%.*]], i64 0)
-// CHECK-256-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 32
-// CHECK-256-NEXT:    store <4 x i64> [[CAST_FIXED]], ptr [[Y]], align 8, !tbaa [[TBAA6]]
+// CHECK-256-LABEL: define dso_local void @write_int64m1(
+// CHECK-256-SAME: ptr noundef writeonly captures(none) initializes((32, 64)) [[S:%.*]], <vscale x 1 x i64> [[X:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] {
+// CHECK-256-NEXT:  [[ENTRY:.*:]]
+// CHECK-256-NEXT:    [[CAST_FIXED:%.*]] = tail call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[X]], i64 0)
+// CHECK-256-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 32
+// CHECK-256-NEXT:    store <4 x i64> [[CAST_FIXED]], ptr [[Y]], align 8, !tbaa [[CHAR_TBAA6]]
 // CHECK-256-NEXT:    ret void
 //
 void write_int64m1(struct struct_int64m1 *s, vint64m1_t x) {
@@ -121,24 +127,27 @@ void write_int64m1(struct struct_int64m1 *s, vint64m1_t x) {
 // float64
 //===----------------------------------------------------------------------===//
 
-// CHECK-64-LABEL: @read_float64m1(
-// CHECK-64-NEXT:  entry:
-// CHECK-64-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 8
-// CHECK-64-NEXT:    [[TMP0:%.*]] = load <1 x double>, ptr [[Y]], align 8, !tbaa [[TBAA6]]
+// CHECK-64-LABEL: define dso_local <vscale x 1 x double> @read_float64m1(
+// CHECK-64-SAME: ptr noundef readonly captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-64-NEXT:  [[ENTRY:.*:]]
+// CHECK-64-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 8
+// CHECK-64-NEXT:    [[TMP0:%.*]] = load <1 x double>, ptr [[Y]], align 8, !tbaa [[CHAR_TBAA6]]
 // CHECK-64-NEXT:    [[CAST_SCALABLE:%.*]] = tail call <vscale x 1 x double> @llvm.vector.insert.nxv1f64.v1f64(<vscale x 1 x double> poison, <1 x double> [[TMP0]], i64 0)
 // CHECK-64-NEXT:    ret <vscale x 1 x double> [[CAST_SCALABLE]]
 //
-// CHECK-128-LABEL: @read_float64m1(
-// CHECK-128-NEXT:  entry:
-// CHECK-128-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 16
-// CHECK-128-NEXT:    [[TMP0:%.*]] = load <2 x double>, ptr [[Y]], align 8, !tbaa [[TBAA6]]
+// CHECK-128-LABEL: define dso_local <vscale x 1 x double> @read_float64m1(
+// CHECK-128-SAME: ptr noundef readonly captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-128-NEXT:  [[ENTRY:.*:]]
+// CHECK-128-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 16
+// CHECK-128-NEXT:    [[TMP0:%.*]] = load <2 x double>, ptr [[Y]], align 8, !tbaa [[CHAR_TBAA6]]
 // CHECK-128-NEXT:    [[CAST_SCALABLE:%.*]] = tail call <vscale x 1 x double> @llvm.vector.insert.nxv1f64.v2f64(<vscale x 1 x double> poison, <2 x double> [[TMP0]], i64 0)
 // CHECK-128-NEXT:    ret <vscale x 1 x double> [[CAST_SCALABLE]]
 //
-// CHECK-256-LABEL: @read_float64m1(
-// CHECK-256-NEXT:  entry:
-// CHECK-256-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 32
-// CHECK-256-NEXT:    [[TMP0:%.*]] = load <4 x double>, ptr [[Y]], align 8, !tbaa [[TBAA6]]
+// CHECK-256-LABEL: define dso_local <vscale x 1 x double> @read_float64m1(
+// CHECK-256-SAME: ptr noundef readonly captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-256-NEXT:  [[ENTRY:.*:]]
+// CHECK-256-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 32
+// CHECK-256-NEXT:    [[TMP0:%.*]] = load <4 x double>, ptr [[Y]], align 8, !tbaa [[CHAR_TBAA6]]
 // CHECK-256-NEXT:    [[CAST_SCALABLE:%.*]] = tail call <vscale x 1 x double> @llvm.vector.insert.nxv1f64.v4f64(<vscale x 1 x double> poison, <4 x double> [[TMP0]], i64 0)
 // CHECK-256-NEXT:    ret <vscale x 1 x double> [[CAST_SCALABLE]]
 //
@@ -146,25 +155,28 @@ vfloat64m1_t read_float64m1(struct struct_float64m1 *s) {
   return s->y[0];
 }
 
-// CHECK-64-LABEL: @write_float64m1(
-// CHECK-64-NEXT:  entry:
-// CHECK-64-NEXT:    [[CAST_FIXED:%.*]] = tail call <1 x double> @llvm.vector.extract.v1f64.nxv1f64(<vscale x 1 x double> [[X:%.*]], i64 0)
-// CHECK-64-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 8
-// CHECK-64-NEXT:    store <1 x double> [[CAST_FIXED]], ptr [[Y]], align 8, !tbaa [[TBAA6]]
+// CHECK-64-LABEL: define dso_local void @write_float64m1(
+// CHECK-64-SAME: ptr noundef writeonly captures(none) initializes((8, 16)) [[S:%.*]], <vscale x 1 x double> [[X:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-64-NEXT:  [[ENTRY:.*:]]
+// CHECK-64-NEXT:    [[CAST_FIXED:%.*]] = tail call <1 x double> @llvm.vector.extract.v1f64.nxv1f64(<vscale x 1 x double> [[X]], i64 0)
+// CHECK-64-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 8
+// CHECK-64-NEXT:    store <1 x double> [[CAST_FIXED]], ptr [[Y]], align 8, !tbaa [[CHAR_TBAA6]]
 // CHECK-64-NEXT:    ret void
 //
-// CHECK-128-LABEL: @write_float64m1(
-// CHECK-128-NEXT:  entry:
-// CHECK-128-NEXT:    [[CAST_FIXED:%.*]] = tail call <2 x double> @llvm.vector.extract.v2f64.nxv1f64(<vscale x 1 x double> [[X:%.*]], i64 0)
-// CHECK-128-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 16
-// CHECK-128-NEXT:    store <2 x double> [[CAST_FIXED]], ptr [[Y]], align 8, !tbaa [[TBAA6]]
+// CHECK-128-LABEL: define dso_local void @write_float64m1(
+// CHECK-128-SAME: ptr noundef writeonly captures(none) initializes((16, 32)) [[S:%.*]], <vscale x 1 x double> [[X:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-128-NEXT:  [[ENTRY:.*:]]
+// CHECK-128-NEXT:    [[CAST_FIXED:%.*]] = tail call <2 x double> @llvm.vector.extract.v2f64.nxv1f64(<vscale x 1 x double> [[X]], i64 0)
+// CHECK-128-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 16
+// CHECK-128-NEXT:    store <2 x double> [[CAST_FIXED]], ptr [[Y]], align 8, !tbaa [[CHAR_TBAA6]]
 // CHECK-128-NEXT:    ret void
 //
-// CHECK-256-LABEL: @write_float64m1(
-// CHECK-256-NEXT:  entry:
-// CHECK-256-NEXT:    [[CAST_FIXED:%.*]] = tail call <4 x double> @llvm.vector.extract.v4f64.nxv1f64(<vscale x 1 x double> [[X:%.*]], i64 0)
-// CHECK-256-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 32
-// CHECK-256-NEXT:    store <4 x double> [[CAST_FIXED]], ptr [[Y]], align 8, !tbaa [[TBAA6]]
+// CHECK-256-LABEL: define dso_local void @write_float64m1(
+// CHECK-256-SAME: ptr noundef writeonly captures(none) initializes((32, 64)) [[S:%.*]], <vscale x 1 x double> [[X:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-256-NEXT:  [[ENTRY:.*:]]
+// CHECK-256-NEXT:    [[CAST_FIXED:%.*]] = tail call <4 x double> @llvm.vector.extract.v4f64.nxv1f64(<vscale x 1 x double> [[X]], i64 0)
+// CHECK-256-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 32
+// CHECK-256-NEXT:    store <4 x double> [[CAST_FIXED]], ptr [[Y]], align 8, !tbaa [[CHAR_TBAA6]]
 // CHECK-256-NEXT:    ret void
 //
 void write_float64m1(struct struct_float64m1 *s, vfloat64m1_t x) {
@@ -175,26 +187,29 @@ void write_float64m1(struct struct_float64m1 *s, vfloat64m1_t x) {
 // bool
 //===----------------------------------------------------------------------===//
 
-// CHECK-64-LABEL: @read_bool1(
-// CHECK-64-NEXT:  entry:
-// CHECK-64-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 8
-// CHECK-64-NEXT:    [[TMP0:%.*]] = load <8 x i8>, ptr [[Y]], align 8, !tbaa [[TBAA6]]
+// CHECK-64-LABEL: define dso_local <vscale x 64 x i1> @read_bool1(
+// CHECK-64-SAME: ptr noundef readonly captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-64-NEXT:  [[ENTRY:.*:]]
+// CHECK-64-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 8
+// CHECK-64-NEXT:    [[TMP0:%.*]] = load <8 x i8>, ptr [[Y]], align 8, !tbaa [[CHAR_TBAA6]]
 // CHECK-64-NEXT:    [[CAST_SCALABLE:%.*]] = tail call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v8i8(<vscale x 8 x i8> poison, <8 x i8> [[TMP0]], i64 0)
 // CHECK-64-NEXT:    [[TMP1:%.*]] = bitcast <vscale x 8 x i8> [[CAST_SCALABLE]] to <vscale x 64 x i1>
 // CHECK-64-NEXT:    ret <vscale x 64 x i1> [[TMP1]]
 //
-// CHECK-128-LABEL: @read_bool1(
-// CHECK-128-NEXT:  entry:
-// CHECK-128-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 16
-// CHECK-128-NEXT:    [[TMP0:%.*]] = load <16 x i8>, ptr [[Y]], align 8, !tbaa [[TBAA6]]
+// CHECK-128-LABEL: define dso_local <vscale x 64 x i1> @read_bool1(
+// CHECK-128-SAME: ptr noundef readonly captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-128-NEXT:  [[ENTRY:.*:]]
+// CHECK-128-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 16
+// CHECK-128-NEXT:    [[TMP0:%.*]] = load <16 x i8>, ptr [[Y]], align 8, !tbaa [[CHAR_TBAA6]]
 // CHECK-128-NEXT:    [[CAST_SCALABLE:%.*]] = tail call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v16i8(<vscale x 8 x i8> poison, <16 x i8> [[TMP0]], i64 0)
 // CHECK-128-NEXT:    [[TMP1:%.*]] = bitcast <vscale x 8 x i8> [[CAST_SCALABLE]] to <vscale x 64 x i1>
 // CHECK-128-NEXT:    ret <vscale x 64 x i1> [[TMP1]]
 //
-// CHECK-256-LABEL: @read_bool1(
-// CHECK-256-NEXT:  entry:
-// CHECK-256-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 32
-// CHECK-256-NEXT:    [[TMP0:%.*]] = load <32 x i8>, ptr [[Y]], align 8, !tbaa [[TBAA6]]
+// CHECK-256-LABEL: define dso_local <vscale x 64 x i1> @read_bool1(
+// CHECK-256-SAME: ptr noundef readonly captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-256-NEXT:  [[ENTRY:.*:]]
+// CHECK-256-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 32
+// CHECK-256-NEXT:    [[TMP0:%.*]] = load <32 x i8>, ptr [[Y]], align 8, !tbaa [[CHAR_TBAA6]]
 // CHECK-256-NEXT:    [[CAST_SCALABLE:%.*]] = tail call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> poison, <32 x i8> [[TMP0]], i64 0)
 // CHECK-256-NEXT:    [[TMP1:%.*]] = bitcast <vscale x 8 x i8> [[CAST_SCALABLE]] to <vscale x 64 x i1>
 // CHECK-256-NEXT:    ret <vscale x 64 x i1> [[TMP1]]
@@ -203,30 +218,46 @@ vbool1_t read_bool1(struct struct_bool1 *s) {
   return s->y[0];
 }
 
-// CHECK-64-LABEL: @write_bool1(
-// CHECK-64-NEXT:  entry:
-// CHECK-64-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 64 x i1> [[X:%.*]] to <vscale x 8 x i8>
+// CHECK-64-LABEL: define dso_local void @write_bool1(
+// CHECK-64-SAME: ptr noundef writeonly captures(none) initializes((8, 16)) [[S:%.*]], <vscale x 64 x i1> [[X:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-64-NEXT:  [[ENTRY:.*:]]
+// CHECK-64-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 64 x i1> [[X]] to <vscale x 8 x i8>
 // CHECK-64-NEXT:    [[CAST_FIXED:%.*]] = tail call <8 x i8> @llvm.vector.extract.v8i8.nxv8i8(<vscale x 8 x i8> [[TMP0]], i64 0)
-// CHECK-64-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 8
-// CHECK-64-NEXT:    store <8 x i8> [[CAST_FIXED]], ptr [[Y]], align 8, !tbaa [[TBAA6]]
+// CHECK-64-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 8
+// CHECK-64-NEXT:    store <8 x i8> [[CAST_FIXED]], ptr [[Y]], align 8, !tbaa [[CHAR_TBAA6]]
 // CHECK-64-NEXT:    ret void
 //
-// CHECK-128-LABEL: @write_bool1(
-// CHECK-128-NEXT:  entry:
-// CHECK-128-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 64 x i1> [[X:%.*]] to <vscale x 8 x i8>
+// CHECK-128-LABEL: define dso_local void @write_bool1(
+// CHECK-128-SAME: ptr noundef writeonly captures(none) initializes((16, 32)) [[S:%.*]], <vscale x 64 x i1> [[X:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-128-NEXT:  [[ENTRY:.*:]]
+// CHECK-128-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 64 x i1> [[X]] to <vscale x 8 x i8>
 // CHECK-128-NEXT:    [[CAST_FIXED:%.*]] = tail call <16 x i8> @llvm.vector.extract.v16i8.nxv8i8(<vscale x 8 x i8> [[TMP0]], i64 0)
-// CHECK-128-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 16
-// CHECK-128-NEXT:    store <16 x i8> [[CAST_FIXED]], ptr [[Y]], align 8, !tbaa [[TBAA6]]
+// CHECK-128-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 16
+// CHECK-128-NEXT:    store <16 x i8> [[CAST_FIXED]], ptr [[Y]], align 8, !tbaa [[CHAR_TBAA6]]
 // CHECK-128-NEXT:    ret void
 //
-// CHECK-256-LABEL: @write_bool1(
-// CHECK-256-NEXT:  entry:
-// CHECK-256-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 64 x i1> [[X:%.*]] to <vscale x 8 x i8>
+// CHECK-256-LABEL: define dso_local void @write_bool1(
+// CHECK-256-SAME: ptr noundef writeonly captures(none) initializes((32, 64)) [[S:%.*]], <vscale x 64 x i1> [[X:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-256-NEXT:  [[ENTRY:.*:]]
+// CHECK-256-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 64 x i1> [[X]] to <vscale x 8 x i8>
 // CHECK-256-NEXT:    [[CAST_FIXED:%.*]] = tail call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[TMP0]], i64 0)
-// CHECK-256-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 32
-// CHECK-256-NEXT:    store <32 x i8> [[CAST_FIXED]], ptr [[Y]], align 8, !tbaa [[TBAA6]]
+// CHECK-256-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 32
+// CHECK-256-NEXT:    store <32 x i8> [[CAST_FIXED]], ptr [[Y]], align 8, !tbaa [[CHAR_TBAA6]]
 // CHECK-256-NEXT:    ret void
 //
 void write_bool1(struct struct_bool1 *s, vbool1_t x) {
   s->y[0] = x;
 }
+//.
+// CHECK-64: [[CHAR_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0}
+// CHECK-64: [[META7]] = !{!"omnipotent char", [[META8:![0-9]+]], i64 0}
+// CHECK-64: [[META8]] = !{!"Simple C/C++ TBAA"}
+//.
+// CHECK-128: [[CHAR_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0}
+// CHECK-128: [[META7]] = !{!"omnipotent char", [[META8:![0-9]+]], i64 0}
+// CHECK-128: [[META8]] = !{!"Simple C/C++ TBAA"}
+//.
+// CHECK-256: [[CHAR_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0}
+// CHECK-256: [[META7]] = !{!"omnipotent char", [[META8:![0-9]+]], i64 0}
+// CHECK-256: [[META8]] = !{!"Simple C/C++ TBAA"}
+//.
diff --git a/clang/test/CodeGen/RISCV/attr-rvv-vector-bits-cast.c b/clang/test/CodeGen/RISCV/attr-rvv-vector-bits-cast.c
index 0a50e41..4517b52 100644
--- a/clang/test/CodeGen/RISCV/attr-rvv-vector-bits-cast.c
+++ b/clang/test/CodeGen/RISCV/attr-rvv-vector-bits-cast.c
@@ -1,4 +1,4 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
 // RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -mvscale-min=4 -mvscale-max=4 -O1 -emit-llvm -o - %s | FileCheck %s
 
 // REQUIRES: riscv-registered-target
@@ -31,89 +31,100 @@ typedef vbool1_t fixed_bool1_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fi
 typedef vbool4_t fixed_bool4_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen/4)));
 typedef vbool32_t fixed_bool32_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen/32)));
 
-// CHECK-LABEL: @to_vint32m1_t(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    ret <vscale x 2 x i32> [[TYPE_COERCE:%.*]]
+// CHECK-LABEL: define dso_local noundef <vscale x 2 x i32> @to_vint32m1_t(
+// CHECK-SAME: <vscale x 2 x i32> noundef returned [[TYPE_COERCE:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[TYPE_COERCE]]
 //
 vint32m1_t to_vint32m1_t(fixed_int32m1_t type) {
   return type;
 }
 
-// CHECK-LABEL: @from_vint32m1_t(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    ret <vscale x 2 x i32> [[TYPE:%.*]]
+// CHECK-LABEL: define dso_local <vscale x 2 x i32> @from_vint32m1_t(
+// CHECK-SAME: <vscale x 2 x i32> returned [[TYPE:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[TYPE]]
 //
 fixed_int32m1_t from_vint32m1_t(vint32m1_t type) {
   return type;
 }
 
-// CHECK-LABEL: @to_vfloat64m1_t(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    ret <vscale x 1 x double> [[TYPE_COERCE:%.*]]
+// CHECK-LABEL: define dso_local noundef <vscale x 1 x double> @to_vfloat64m1_t(
+// CHECK-SAME: <vscale x 1 x double> noundef returned [[TYPE_COERCE:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    ret <vscale x 1 x double> [[TYPE_COERCE]]
 //
 vfloat64m1_t to_vfloat64m1_t(fixed_float64m1_t type) {
   return type;
 }
 
-// CHECK-LABEL: @from_vfloat64m1_t(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    ret <vscale x 1 x double> [[TYPE:%.*]]
+// CHECK-LABEL: define dso_local <vscale x 1 x double> @from_vfloat64m1_t(
+// CHECK-SAME: <vscale x 1 x double> returned [[TYPE:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    ret <vscale x 1 x double> [[TYPE]]
 //
 fixed_float64m1_t from_vfloat64m1_t(vfloat64m1_t type) {
   return type;
 }
 
-// CHECK-LABEL: @from_vbool1_t(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    ret <vscale x 64 x i1> [[TYPE:%.*]]
+// CHECK-LABEL: define dso_local <vscale x 64 x i1> @from_vbool1_t(
+// CHECK-SAME: <vscale x 64 x i1> returned [[TYPE:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    ret <vscale x 64 x i1> [[TYPE]]
 //
 fixed_bool1_t from_vbool1_t(vbool1_t type) {
   return type;
 }
 
-// CHECK-LABEL: @to_vbool1_t(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    ret <vscale x 64 x i1> [[TMP0:%.*]]
+// CHECK-LABEL: define dso_local noundef <vscale x 64 x i1> @to_vbool1_t(
+// CHECK-SAME: <vscale x 64 x i1> noundef returned [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    ret <vscale x 64 x i1> [[TMP0]]
 //
 vbool1_t to_vbool1_t(fixed_bool1_t type) {
   return type;
 }
 
-// CHECK-LABEL: @from_vbool4_t(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    ret <vscale x 16 x i1> [[TYPE:%.*]]
+// CHECK-LABEL: define dso_local <vscale x 16 x i1> @from_vbool4_t(
+// CHECK-SAME: <vscale x 16 x i1> returned [[TYPE:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    ret <vscale x 16 x i1> [[TYPE]]
 //
 fixed_bool4_t from_vbool4_t(vbool4_t type) {
   return type;
 }
 
-// CHECK-LABEL: @to_vbool4_t(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0:%.*]]
+// CHECK-LABEL: define dso_local noundef <vscale x 16 x i1> @to_vbool4_t(
+// CHECK-SAME: <vscale x 16 x i1> noundef returned [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
 vbool4_t to_vbool4_t(fixed_bool4_t type) {
   return type;
 }
 
-// CHECK-LABEL: @from_vbool32_t(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    ret <vscale x 2 x i1> [[TYPE:%.*]]
+// CHECK-LABEL: define dso_local <vscale x 2 x i1> @from_vbool32_t(
+// CHECK-SAME: <vscale x 2 x i1> returned [[TYPE:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    ret <vscale x 2 x i1> [[TYPE]]
 //
 fixed_bool32_t from_vbool32_t(vbool32_t type) {
   return type;
 }
 
-// CHECK-LABEL: @to_vbool32_t(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    ret <vscale x 2 x i1> [[TMP0:%.*]]
+// CHECK-LABEL: define dso_local noundef <vscale x 2 x i1> @to_vbool32_t(
+// CHECK-SAME: <vscale x 2 x i1> noundef returned [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    ret <vscale x 2 x i1> [[TMP0]]
 //
 vbool32_t to_vbool32_t(fixed_bool32_t type) {
   return type;
 }
 
-// CHECK-LABEL: @to_vint32m1_t__from_gnu_int32m1_t(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TYPE:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA6:![0-9]+]]
+// CHECK-LABEL: define dso_local <vscale x 2 x i32> @to_vint32m1_t__from_gnu_int32m1_t(
+// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TYPE:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6:![0-9]+]]
 // CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = tail call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> poison, <8 x i32> [[TYPE]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i32> [[CAST_SCALABLE]]
 //
@@ -121,19 +132,21 @@ vint32m1_t to_vint32m1_t__from_gnu_int32m1_t(gnu_int32m1_t type) {
   return type;
 }
 
-// CHECK-LABEL: @from_vint32m1_t__to_gnu_int32m1_t(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[CAST_FIXED:%.*]] = tail call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[TYPE:%.*]], i64 0)
-// CHECK-NEXT:    store <8 x i32> [[CAST_FIXED]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA6]]
+// CHECK-LABEL: define dso_local void @from_vint32m1_t__to_gnu_int32m1_t(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], <vscale x 2 x i32> [[TYPE:%.*]]) local_unnamed_addr #[[ATTR3:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[CAST_FIXED:%.*]] = tail call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[TYPE]], i64 0)
+// CHECK-NEXT:    store <8 x i32> [[CAST_FIXED]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]]
 // CHECK-NEXT:    ret void
 //
 gnu_int32m1_t from_vint32m1_t__to_gnu_int32m1_t(vint32m1_t type) {
   return type;
 }
 
-// CHECK-LABEL: @to_fixed_int32m1_t__from_gnu_int32m1_t(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TYPE:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA6]]
+// CHECK-LABEL: define dso_local <vscale x 2 x i32> @to_fixed_int32m1_t__from_gnu_int32m1_t(
+// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TYPE:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]]
 // CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = tail call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> poison, <8 x i32> [[TYPE]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i32> [[CAST_SCALABLE]]
 //
@@ -141,12 +154,18 @@ fixed_int32m1_t to_fixed_int32m1_t__from_gnu_int32m1_t(gnu_int32m1_t type) {
   return type;
 }
 
-// CHECK-LABEL: @from_fixed_int32m1_t__to_gnu_int32m1_t(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TYPE:%.*]] = tail call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[TYPE_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    store <8 x i32> [[TYPE]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA6]]
+// CHECK-LABEL: define dso_local void @from_fixed_int32m1_t__to_gnu_int32m1_t(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], <vscale x 2 x i32> noundef [[TYPE_COERCE:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TYPE:%.*]] = tail call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[TYPE_COERCE]], i64 0)
+// CHECK-NEXT:    store <8 x i32> [[TYPE]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]]
 // CHECK-NEXT:    ret void
 //
 gnu_int32m1_t from_fixed_int32m1_t__to_gnu_int32m1_t(fixed_int32m1_t type) {
   return type;
 }
+//.
+// CHECK: [[CHAR_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0}
+// CHECK: [[META7]] = !{!"omnipotent char", [[META8:![0-9]+]], i64 0}
+// CHECK: [[META8]] = !{!"Simple C/C++ TBAA"}
+//.
diff --git a/clang/test/CodeGen/RISCV/attr-rvv-vector-bits-globals.c b/clang/test/CodeGen/RISCV/attr-rvv-vector-bits-globals.c
index 92ba27fb..f3b91b2 100644
--- a/clang/test/CodeGen/RISCV/attr-rvv-vector-bits-globals.c
+++ b/clang/test/CodeGen/RISCV/attr-rvv-vector-bits-globals.c
@@ -1,4 +1,4 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
 // RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -mvscale-min=1 -mvscale-max=1 -O1 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-64
 // RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -mvscale-min=4 -mvscale-max=4 -O1 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-256
 
@@ -40,59 +40,66 @@ fixed_bool32_t global_bool32;
 // WRITES
 //===----------------------------------------------------------------------===//
 
-// CHECK-64-LABEL: @write_global_i64(
-// CHECK-64-NEXT:  entry:
-// CHECK-64-NEXT:    [[CAST_FIXED:%.*]] = tail call <1 x i64> @llvm.vector.extract.v1i64.nxv1i64(<vscale x 1 x i64> [[V:%.*]], i64 0)
-// CHECK-64-NEXT:    store <1 x i64> [[CAST_FIXED]], ptr @global_i64, align 8, !tbaa [[TBAA6:![0-9]+]]
+// CHECK-64-LABEL: define dso_local void @write_global_i64(
+// CHECK-64-SAME: <vscale x 1 x i64> [[V:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-64-NEXT:  [[ENTRY:.*:]]
+// CHECK-64-NEXT:    [[CAST_FIXED:%.*]] = tail call <1 x i64> @llvm.vector.extract.v1i64.nxv1i64(<vscale x 1 x i64> [[V]], i64 0)
+// CHECK-64-NEXT:    store <1 x i64> [[CAST_FIXED]], ptr @global_i64, align 8, !tbaa [[CHAR_TBAA6:![0-9]+]]
 // CHECK-64-NEXT:    ret void
 //
-// CHECK-256-LABEL: @write_global_i64(
-// CHECK-256-NEXT:  entry:
-// CHECK-256-NEXT:    [[CAST_FIXED:%.*]] = tail call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[V:%.*]], i64 0)
-// CHECK-256-NEXT:    store <4 x i64> [[CAST_FIXED]], ptr @global_i64, align 8, !tbaa [[TBAA6:![0-9]+]]
+// CHECK-256-LABEL: define dso_local void @write_global_i64(
+// CHECK-256-SAME: <vscale x 1 x i64> [[V:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-256-NEXT:  [[ENTRY:.*:]]
+// CHECK-256-NEXT:    [[CAST_FIXED:%.*]] = tail call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[V]], i64 0)
+// CHECK-256-NEXT:    store <4 x i64> [[CAST_FIXED]], ptr @global_i64, align 8, !tbaa [[CHAR_TBAA6:![0-9]+]]
 // CHECK-256-NEXT:    ret void
 //
 void write_global_i64(vint64m1_t v) { global_i64 = v; }
 
-// CHECK-64-LABEL: @write_global_bool1(
-// CHECK-64-NEXT:  entry:
-// CHECK-64-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 64 x i1> [[V:%.*]] to <vscale x 8 x i8>
+// CHECK-64-LABEL: define dso_local void @write_global_bool1(
+// CHECK-64-SAME: <vscale x 64 x i1> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-64-NEXT:  [[ENTRY:.*:]]
+// CHECK-64-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 64 x i1> [[V]] to <vscale x 8 x i8>
 // CHECK-64-NEXT:    [[CAST_FIXED:%.*]] = tail call <8 x i8> @llvm.vector.extract.v8i8.nxv8i8(<vscale x 8 x i8> [[TMP0]], i64 0)
-// CHECK-64-NEXT:    store <8 x i8> [[CAST_FIXED]], ptr @global_bool1, align 8, !tbaa [[TBAA6]]
+// CHECK-64-NEXT:    store <8 x i8> [[CAST_FIXED]], ptr @global_bool1, align 8, !tbaa [[CHAR_TBAA6]]
 // CHECK-64-NEXT:    ret void
 //
-// CHECK-256-LABEL: @write_global_bool1(
-// CHECK-256-NEXT:  entry:
-// CHECK-256-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 64 x i1> [[V:%.*]] to <vscale x 8 x i8>
+// CHECK-256-LABEL: define dso_local void @write_global_bool1(
+// CHECK-256-SAME: <vscale x 64 x i1> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-256-NEXT:  [[ENTRY:.*:]]
+// CHECK-256-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 64 x i1> [[V]] to <vscale x 8 x i8>
 // CHECK-256-NEXT:    [[CAST_FIXED:%.*]] = tail call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[TMP0]], i64 0)
-// CHECK-256-NEXT:    store <32 x i8> [[CAST_FIXED]], ptr @global_bool1, align 8, !tbaa [[TBAA6]]
+// CHECK-256-NEXT:    store <32 x i8> [[CAST_FIXED]], ptr @global_bool1, align 8, !tbaa [[CHAR_TBAA6]]
 // CHECK-256-NEXT:    ret void
 //
 void write_global_bool1(vbool1_t v) { global_bool1 = v; }
 
-// CHECK-64-LABEL: @write_global_bool4(
-// CHECK-64-NEXT:  entry:
-// CHECK-64-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 16 x i1> [[V:%.*]] to <vscale x 2 x i8>
+// CHECK-64-LABEL: define dso_local void @write_global_bool4(
+// CHECK-64-SAME: <vscale x 16 x i1> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-64-NEXT:  [[ENTRY:.*:]]
+// CHECK-64-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 16 x i1> [[V]] to <vscale x 2 x i8>
 // CHECK-64-NEXT:    [[CAST_FIXED:%.*]] = tail call <2 x i8> @llvm.vector.extract.v2i8.nxv2i8(<vscale x 2 x i8> [[TMP0]], i64 0)
-// CHECK-64-NEXT:    store <2 x i8> [[CAST_FIXED]], ptr @global_bool4, align 2, !tbaa [[TBAA6]]
+// CHECK-64-NEXT:    store <2 x i8> [[CAST_FIXED]], ptr @global_bool4, align 2, !tbaa [[CHAR_TBAA6]]
 // CHECK-64-NEXT:    ret void
 //
-// CHECK-256-LABEL: @write_global_bool4(
-// CHECK-256-NEXT:  entry:
-// CHECK-256-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 16 x i1> [[V:%.*]] to <vscale x 2 x i8>
+// CHECK-256-LABEL: define dso_local void @write_global_bool4(
+// CHECK-256-SAME: <vscale x 16 x i1> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-256-NEXT:  [[ENTRY:.*:]]
+// CHECK-256-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 16 x i1> [[V]] to <vscale x 2 x i8>
 // CHECK-256-NEXT:    [[CAST_FIXED:%.*]] = tail call <8 x i8> @llvm.vector.extract.v8i8.nxv2i8(<vscale x 2 x i8> [[TMP0]], i64 0)
-// CHECK-256-NEXT:    store <8 x i8> [[CAST_FIXED]], ptr @global_bool4, align 8, !tbaa [[TBAA6]]
+// CHECK-256-NEXT:    store <8 x i8> [[CAST_FIXED]], ptr @global_bool4, align 8, !tbaa [[CHAR_TBAA6]]
 // CHECK-256-NEXT:    ret void
 //
 void write_global_bool4(vbool4_t v) { global_bool4 = v; }
 
 #if __riscv_v_fixed_vlen >= 256
-// CHECK-256-LABEL: @write_global_bool32(
-// CHECK-256-NEXT:  entry:
-// CHECK-256-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.vector.insert.nxv8i1.nxv2i1(<vscale x 8 x i1> zeroinitializer, <vscale x 2 x i1> [[V:%.*]], i64 0)
+// CHECK-256-LABEL: define dso_local void @write_global_bool32(
+// CHECK-256-SAME: <vscale x 2 x i1> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-256-NEXT:  [[ENTRY:.*:]]
+// CHECK-256-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.vector.insert.nxv8i1.nxv2i1(<vscale x 8 x i1> zeroinitializer, <vscale x 2 x i1> [[V]], i64 0)
 // CHECK-256-NEXT:    [[TMP1:%.*]] = bitcast <vscale x 8 x i1> [[TMP0]] to <vscale x 1 x i8>
 // CHECK-256-NEXT:    [[CAST_FIXED:%.*]] = tail call <1 x i8> @llvm.vector.extract.v1i8.nxv1i8(<vscale x 1 x i8> [[TMP1]], i64 0)
-// CHECK-256-NEXT:    store <1 x i8> [[CAST_FIXED]], ptr @global_bool32, align 1, !tbaa [[TBAA6]]
+// CHECK-256-NEXT:    store <1 x i8> [[CAST_FIXED]], ptr @global_bool32, align 1, !tbaa [[CHAR_TBAA6]]
 // CHECK-256-NEXT:    ret void
 //
 void write_global_bool32(vbool32_t v) { global_bool32 = v; }
@@ -102,46 +109,52 @@ void write_global_bool32(vbool32_t v) { global_bool32 = v; }
 // READS
 //===----------------------------------------------------------------------===//
 
-// CHECK-64-LABEL: @read_global_i64(
-// CHECK-64-NEXT:  entry:
-// CHECK-64-NEXT:    [[TMP0:%.*]] = load <1 x i64>, ptr @global_i64, align 8, !tbaa [[TBAA6]]
+// CHECK-64-LABEL: define dso_local <vscale x 1 x i64> @read_global_i64(
+// CHECK-64-SAME: ) local_unnamed_addr #[[ATTR2:[0-9]+]] {
+// CHECK-64-NEXT:  [[ENTRY:.*:]]
+// CHECK-64-NEXT:    [[TMP0:%.*]] = load <1 x i64>, ptr @global_i64, align 8, !tbaa [[CHAR_TBAA6]]
 // CHECK-64-NEXT:    [[CAST_SCALABLE:%.*]] = tail call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v1i64(<vscale x 1 x i64> poison, <1 x i64> [[TMP0]], i64 0)
 // CHECK-64-NEXT:    ret <vscale x 1 x i64> [[CAST_SCALABLE]]
 //
-// CHECK-256-LABEL: @read_global_i64(
-// CHECK-256-NEXT:  entry:
-// CHECK-256-NEXT:    [[TMP0:%.*]] = load <4 x i64>, ptr @global_i64, align 8, !tbaa [[TBAA6]]
+// CHECK-256-LABEL: define dso_local <vscale x 1 x i64> @read_global_i64(
+// CHECK-256-SAME: ) local_unnamed_addr #[[ATTR2:[0-9]+]] {
+// CHECK-256-NEXT:  [[ENTRY:.*:]]
+// CHECK-256-NEXT:    [[TMP0:%.*]] = load <4 x i64>, ptr @global_i64, align 8, !tbaa [[CHAR_TBAA6]]
 // CHECK-256-NEXT:    [[CAST_SCALABLE:%.*]] = tail call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> poison, <4 x i64> [[TMP0]], i64 0)
 // CHECK-256-NEXT:    ret <vscale x 1 x i64> [[CAST_SCALABLE]]
 //
 vint64m1_t read_global_i64() { return global_i64; }
 
-// CHECK-64-LABEL: @read_global_bool1(
-// CHECK-64-NEXT:  entry:
-// CHECK-64-NEXT:    [[TMP0:%.*]] = load <8 x i8>, ptr @global_bool1, align 8, !tbaa [[TBAA6]]
+// CHECK-64-LABEL: define dso_local <vscale x 64 x i1> @read_global_bool1(
+// CHECK-64-SAME: ) local_unnamed_addr #[[ATTR2]] {
+// CHECK-64-NEXT:  [[ENTRY:.*:]]
+// CHECK-64-NEXT:    [[TMP0:%.*]] = load <8 x i8>, ptr @global_bool1, align 8, !tbaa [[CHAR_TBAA6]]
 // CHECK-64-NEXT:    [[CAST_SCALABLE:%.*]] = tail call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v8i8(<vscale x 8 x i8> poison, <8 x i8> [[TMP0]], i64 0)
 // CHECK-64-NEXT:    [[TMP1:%.*]] = bitcast <vscale x 8 x i8> [[CAST_SCALABLE]] to <vscale x 64 x i1>
 // CHECK-64-NEXT:    ret <vscale x 64 x i1> [[TMP1]]
 //
-// CHECK-256-LABEL: @read_global_bool1(
-// CHECK-256-NEXT:  entry:
-// CHECK-256-NEXT:    [[TMP0:%.*]] = load <32 x i8>, ptr @global_bool1, align 8, !tbaa [[TBAA6]]
+// CHECK-256-LABEL: define dso_local <vscale x 64 x i1> @read_global_bool1(
+// CHECK-256-SAME: ) local_unnamed_addr #[[ATTR2]] {
+// CHECK-256-NEXT:  [[ENTRY:.*:]]
+// CHECK-256-NEXT:    [[TMP0:%.*]] = load <32 x i8>, ptr @global_bool1, align 8, !tbaa [[CHAR_TBAA6]]
 // CHECK-256-NEXT:    [[CAST_SCALABLE:%.*]] = tail call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> poison, <32 x i8> [[TMP0]], i64 0)
 // CHECK-256-NEXT:    [[TMP1:%.*]] = bitcast <vscale x 8 x i8> [[CAST_SCALABLE]] to <vscale x 64 x i1>
 // CHECK-256-NEXT:    ret <vscale x 64 x i1> [[TMP1]]
 //
 vbool1_t read_global_bool1() { return global_bool1; }
 
-// CHECK-64-LABEL: @read_global_bool4(
-// CHECK-64-NEXT:  entry:
-// CHECK-64-NEXT:    [[TMP0:%.*]] = load <2 x i8>, ptr @global_bool4, align 2, !tbaa [[TBAA6]]
+// CHECK-64-LABEL: define dso_local <vscale x 16 x i1> @read_global_bool4(
+// CHECK-64-SAME: ) local_unnamed_addr #[[ATTR2]] {
+// CHECK-64-NEXT:  [[ENTRY:.*:]]
+// CHECK-64-NEXT:    [[TMP0:%.*]] = load <2 x i8>, ptr @global_bool4, align 2, !tbaa [[CHAR_TBAA6]]
 // CHECK-64-NEXT:    [[CAST_SCALABLE:%.*]] = tail call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v2i8(<vscale x 2 x i8> poison, <2 x i8> [[TMP0]], i64 0)
 // CHECK-64-NEXT:    [[TMP1:%.*]] = bitcast <vscale x 2 x i8> [[CAST_SCALABLE]] to <vscale x 16 x i1>
 // CHECK-64-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
-// CHECK-256-LABEL: @read_global_bool4(
-// CHECK-256-NEXT:  entry:
-// CHECK-256-NEXT:    [[TMP0:%.*]] = load <8 x i8>, ptr @global_bool4, align 8, !tbaa [[TBAA6]]
+// CHECK-256-LABEL: define dso_local <vscale x 16 x i1> @read_global_bool4(
+// CHECK-256-SAME: ) local_unnamed_addr #[[ATTR2]] {
+// CHECK-256-NEXT:  [[ENTRY:.*:]]
+// CHECK-256-NEXT:    [[TMP0:%.*]] = load <8 x i8>, ptr @global_bool4, align 8, !tbaa [[CHAR_TBAA6]]
 // CHECK-256-NEXT:    [[CAST_SCALABLE:%.*]] = tail call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v8i8(<vscale x 2 x i8> poison, <8 x i8> [[TMP0]], i64 0)
 // CHECK-256-NEXT:    [[TMP1:%.*]] = bitcast <vscale x 2 x i8> [[CAST_SCALABLE]] to <vscale x 16 x i1>
 // CHECK-256-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
@@ -149,9 +162,10 @@ vbool1_t read_global_bool1() { return global_bool1; }
 vbool4_t read_global_bool4() { return global_bool4; }
 
 #if __riscv_v_fixed_vlen >= 256
-// CHECK-256-LABEL: @read_global_bool32(
-// CHECK-256-NEXT:  entry:
-// CHECK-256-NEXT:    [[TMP0:%.*]] = load <1 x i8>, ptr @global_bool32, align 1, !tbaa [[TBAA6]]
+// CHECK-256-LABEL: define dso_local <vscale x 2 x i1> @read_global_bool32(
+// CHECK-256-SAME: ) local_unnamed_addr #[[ATTR2]] {
+// CHECK-256-NEXT:  [[ENTRY:.*:]]
+// CHECK-256-NEXT:    [[TMP0:%.*]] = load <1 x i8>, ptr @global_bool32, align 1, !tbaa [[CHAR_TBAA6]]
 // CHECK-256-NEXT:    [[CAST_SCALABLE:%.*]] = tail call <vscale x 1 x i8> @llvm.vector.insert.nxv1i8.v1i8(<vscale x 1 x i8> poison, <1 x i8> [[TMP0]], i64 0)
 // CHECK-256-NEXT:    [[TMP1:%.*]] = bitcast <vscale x 1 x i8> [[CAST_SCALABLE]] to <vscale x 8 x i1>
 // CHECK-256-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i1> @llvm.vector.extract.nxv2i1.nxv8i1(<vscale x 8 x i1> [[TMP1]], i64 0)
@@ -159,3 +173,12 @@ vbool4_t read_global_bool4() { return global_bool4; }
 //
 vbool32_t read_global_bool32() { return global_bool32; }
 #endif
+//.
+// CHECK-64: [[CHAR_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0}
+// CHECK-64: [[META7]] = !{!"omnipotent char", [[META8:![0-9]+]], i64 0}
+// CHECK-64: [[META8]] = !{!"Simple C/C++ TBAA"}
+//.
+// CHECK-256: [[CHAR_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0}
+// CHECK-256: [[META7]] = !{!"omnipotent char", [[META8:![0-9]+]], i64 0}
+// CHECK-256: [[META8]] = !{!"Simple C/C++ TBAA"}
+//.
diff --git a/clang/test/CodeGen/SystemZ/builtins-systemz-i128.c b/clang/test/CodeGen/SystemZ/builtins-systemz-i128.c
index 896cef5..d25b8d8 100644
--- a/clang/test/CodeGen/SystemZ/builtins-systemz-i128.c
+++ b/clang/test/CodeGen/SystemZ/builtins-systemz-i128.c
@@ -1,4 +1,4 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
 // REQUIRES: systemz-registered-target
 // RUN: %clang_cc1 -target-cpu z14 -triple s390x-linux-gnu \
 // RUN: -O2 -fzvector -flax-vector-conversions=none \
@@ -14,124 +14,124 @@ volatile vector unsigned long long vul;
 // CHECK-LABEL: define dso_local void @test(
 // CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3:![0-9]+]]
-// CHECK-NEXT:    [[TMP1:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA3:![0-9]+]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]]
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to i128
 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
 // CHECK-NEXT:    [[ADD_I:%.*]] = add nsw i128 [[TMP3]], [[TMP2]]
 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast i128 [[ADD_I]] to <16 x i8>
-// CHECK-NEXT:    store volatile <16 x i8> [[TMP4]], ptr @vuc, align 8, !tbaa [[TBAA3]]
-// CHECK-NEXT:    [[TMP5:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]]
-// CHECK-NEXT:    [[TMP6:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    store volatile <16 x i8> [[TMP4]], ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]]
+// CHECK-NEXT:    [[TMP5:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]]
+// CHECK-NEXT:    [[TMP6:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]]
 // CHECK-NEXT:    [[TMP7:%.*]] = bitcast <16 x i8> [[TMP5]] to i128
 // CHECK-NEXT:    [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to i128
 // CHECK-NEXT:    [[TMP9:%.*]] = tail call i128 @llvm.s390.vaccq(i128 [[TMP7]], i128 [[TMP8]])
 // CHECK-NEXT:    [[TMP10:%.*]] = bitcast i128 [[TMP9]] to <16 x i8>
-// CHECK-NEXT:    store volatile <16 x i8> [[TMP10]], ptr @vuc, align 8, !tbaa [[TBAA3]]
-// CHECK-NEXT:    [[TMP11:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]]
-// CHECK-NEXT:    [[TMP12:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]]
-// CHECK-NEXT:    [[TMP13:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    store volatile <16 x i8> [[TMP10]], ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]]
+// CHECK-NEXT:    [[TMP11:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]]
+// CHECK-NEXT:    [[TMP12:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]]
+// CHECK-NEXT:    [[TMP13:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]]
 // CHECK-NEXT:    [[TMP14:%.*]] = bitcast <16 x i8> [[TMP11]] to i128
 // CHECK-NEXT:    [[TMP15:%.*]] = bitcast <16 x i8> [[TMP12]] to i128
 // CHECK-NEXT:    [[TMP16:%.*]] = bitcast <16 x i8> [[TMP13]] to i128
 // CHECK-NEXT:    [[TMP17:%.*]] = tail call i128 @llvm.s390.vacq(i128 [[TMP14]], i128 [[TMP15]], i128 [[TMP16]])
 // CHECK-NEXT:    [[TMP18:%.*]] = bitcast i128 [[TMP17]] to <16 x i8>
-// CHECK-NEXT:    store volatile <16 x i8> [[TMP18]], ptr @vuc, align 8, !tbaa [[TBAA3]]
-// CHECK-NEXT:    [[TMP19:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]]
-// CHECK-NEXT:    [[TMP20:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]]
-// CHECK-NEXT:    [[TMP21:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    store volatile <16 x i8> [[TMP18]], ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]]
+// CHECK-NEXT:    [[TMP19:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]]
+// CHECK-NEXT:    [[TMP20:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]]
+// CHECK-NEXT:    [[TMP21:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]]
 // CHECK-NEXT:    [[TMP22:%.*]] = bitcast <16 x i8> [[TMP19]] to i128
 // CHECK-NEXT:    [[TMP23:%.*]] = bitcast <16 x i8> [[TMP20]] to i128
 // CHECK-NEXT:    [[TMP24:%.*]] = bitcast <16 x i8> [[TMP21]] to i128
 // CHECK-NEXT:    [[TMP25:%.*]] = tail call i128 @llvm.s390.vacccq(i128 [[TMP22]], i128 [[TMP23]], i128 [[TMP24]])
 // CHECK-NEXT:    [[TMP26:%.*]] = bitcast i128 [[TMP25]] to <16 x i8>
-// CHECK-NEXT:    store volatile <16 x i8> [[TMP26]], ptr @vuc, align 8, !tbaa [[TBAA3]]
-// CHECK-NEXT:    [[TMP27:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]]
-// CHECK-NEXT:    [[TMP28:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    store volatile <16 x i8> [[TMP26]], ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]]
+// CHECK-NEXT:    [[TMP27:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]]
+// CHECK-NEXT:    [[TMP28:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]]
 // CHECK-NEXT:    [[TMP29:%.*]] = bitcast <16 x i8> [[TMP27]] to i128
 // CHECK-NEXT:    [[TMP30:%.*]] = bitcast <16 x i8> [[TMP28]] to i128
 // CHECK-NEXT:    [[SUB_I:%.*]] = sub nsw i128 [[TMP29]], [[TMP30]]
 // CHECK-NEXT:    [[TMP31:%.*]] = bitcast i128 [[SUB_I]] to <16 x i8>
-// CHECK-NEXT:    store volatile <16 x i8> [[TMP31]], ptr @vuc, align 8, !tbaa [[TBAA3]]
-// CHECK-NEXT:    [[TMP32:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]]
-// CHECK-NEXT:    [[TMP33:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    store volatile <16 x i8> [[TMP31]], ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]]
+// CHECK-NEXT:    [[TMP32:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]]
+// CHECK-NEXT:    [[TMP33:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]]
 // CHECK-NEXT:    [[TMP34:%.*]] = bitcast <16 x i8> [[TMP32]] to i128
 // CHECK-NEXT:    [[TMP35:%.*]] = bitcast <16 x i8> [[TMP33]] to i128
 // CHECK-NEXT:    [[TMP36:%.*]] = tail call i128 @llvm.s390.vscbiq(i128 [[TMP34]], i128 [[TMP35]])
 // CHECK-NEXT:    [[TMP37:%.*]] = bitcast i128 [[TMP36]] to <16 x i8>
-// CHECK-NEXT:    store volatile <16 x i8> [[TMP37]], ptr @vuc, align 8, !tbaa [[TBAA3]]
-// CHECK-NEXT:    [[TMP38:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]]
-// CHECK-NEXT:    [[TMP39:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]]
-// CHECK-NEXT:    [[TMP40:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    store volatile <16 x i8> [[TMP37]], ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]]
+// CHECK-NEXT:    [[TMP38:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]]
+// CHECK-NEXT:    [[TMP39:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]]
+// CHECK-NEXT:    [[TMP40:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]]
 // CHECK-NEXT:    [[TMP41:%.*]] = bitcast <16 x i8> [[TMP38]] to i128
 // CHECK-NEXT:    [[TMP42:%.*]] = bitcast <16 x i8> [[TMP39]] to i128
 // CHECK-NEXT:    [[TMP43:%.*]] = bitcast <16 x i8> [[TMP40]] to i128
 // CHECK-NEXT:    [[TMP44:%.*]] = tail call i128 @llvm.s390.vsbiq(i128 [[TMP41]], i128 [[TMP42]], i128 [[TMP43]])
 // CHECK-NEXT:    [[TMP45:%.*]] = bitcast i128 [[TMP44]] to <16 x i8>
-// CHECK-NEXT:    store volatile <16 x i8> [[TMP45]], ptr @vuc, align 8, !tbaa [[TBAA3]]
-// CHECK-NEXT:    [[TMP46:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]]
-// CHECK-NEXT:    [[TMP47:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]]
-// CHECK-NEXT:    [[TMP48:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    store volatile <16 x i8> [[TMP45]], ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]]
+// CHECK-NEXT:    [[TMP46:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]]
+// CHECK-NEXT:    [[TMP47:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]]
+// CHECK-NEXT:    [[TMP48:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]]
 // CHECK-NEXT:    [[TMP49:%.*]] = bitcast <16 x i8> [[TMP46]] to i128
 // CHECK-NEXT:    [[TMP50:%.*]] = bitcast <16 x i8> [[TMP47]] to i128
 // CHECK-NEXT:    [[TMP51:%.*]] = bitcast <16 x i8> [[TMP48]] to i128
 // CHECK-NEXT:    [[TMP52:%.*]] = tail call i128 @llvm.s390.vsbcbiq(i128 [[TMP49]], i128 [[TMP50]], i128 [[TMP51]])
 // CHECK-NEXT:    [[TMP53:%.*]] = bitcast i128 [[TMP52]] to <16 x i8>
-// CHECK-NEXT:    store volatile <16 x i8> [[TMP53]], ptr @vuc, align 8, !tbaa [[TBAA3]]
-// CHECK-NEXT:    [[TMP54:%.*]] = load volatile <4 x i32>, ptr @vui, align 8, !tbaa [[TBAA3]]
-// CHECK-NEXT:    [[TMP55:%.*]] = load volatile <4 x i32>, ptr @vui, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    store volatile <16 x i8> [[TMP53]], ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]]
+// CHECK-NEXT:    [[TMP54:%.*]] = load volatile <4 x i32>, ptr @vui, align 8, !tbaa [[CHAR_TBAA3]]
+// CHECK-NEXT:    [[TMP55:%.*]] = load volatile <4 x i32>, ptr @vui, align 8, !tbaa [[CHAR_TBAA3]]
 // CHECK-NEXT:    [[TMP56:%.*]] = tail call i128 @llvm.s390.vsumqf(<4 x i32> [[TMP54]], <4 x i32> [[TMP55]])
 // CHECK-NEXT:    [[TMP57:%.*]] = bitcast i128 [[TMP56]] to <16 x i8>
-// CHECK-NEXT:    store volatile <16 x i8> [[TMP57]], ptr @vuc, align 8, !tbaa [[TBAA3]]
-// CHECK-NEXT:    [[TMP58:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[TBAA3]]
-// CHECK-NEXT:    [[TMP59:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    store volatile <16 x i8> [[TMP57]], ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]]
+// CHECK-NEXT:    [[TMP58:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[CHAR_TBAA3]]
+// CHECK-NEXT:    [[TMP59:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[CHAR_TBAA3]]
 // CHECK-NEXT:    [[TMP60:%.*]] = tail call i128 @llvm.s390.vsumqg(<2 x i64> [[TMP58]], <2 x i64> [[TMP59]])
 // CHECK-NEXT:    [[TMP61:%.*]] = bitcast i128 [[TMP60]] to <16 x i8>
-// CHECK-NEXT:    store volatile <16 x i8> [[TMP61]], ptr @vuc, align 8, !tbaa [[TBAA3]]
-// CHECK-NEXT:    [[TMP62:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[TBAA3]]
-// CHECK-NEXT:    [[TMP63:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    store volatile <16 x i8> [[TMP61]], ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]]
+// CHECK-NEXT:    [[TMP62:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[CHAR_TBAA3]]
+// CHECK-NEXT:    [[TMP63:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[CHAR_TBAA3]]
 // CHECK-NEXT:    [[TMP64:%.*]] = tail call i128 @llvm.s390.vgfmg(<2 x i64> [[TMP62]], <2 x i64> [[TMP63]])
 // CHECK-NEXT:    [[TMP65:%.*]] = bitcast i128 [[TMP64]] to <16 x i8>
-// CHECK-NEXT:    store volatile <16 x i8> [[TMP65]], ptr @vuc, align 8, !tbaa [[TBAA3]]
-// CHECK-NEXT:    [[TMP66:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[TBAA3]]
-// CHECK-NEXT:    [[TMP67:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[TBAA3]]
-// CHECK-NEXT:    [[TMP68:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    store volatile <16 x i8> [[TMP65]], ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]]
+// CHECK-NEXT:    [[TMP66:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[CHAR_TBAA3]]
+// CHECK-NEXT:    [[TMP67:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[CHAR_TBAA3]]
+// CHECK-NEXT:    [[TMP68:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]]
 // CHECK-NEXT:    [[TMP69:%.*]] = bitcast <16 x i8> [[TMP68]] to i128
 // CHECK-NEXT:    [[TMP70:%.*]] = tail call i128 @llvm.s390.vgfmag(<2 x i64> [[TMP66]], <2 x i64> [[TMP67]], i128 [[TMP69]])
 // CHECK-NEXT:    [[TMP71:%.*]] = bitcast i128 [[TMP70]] to <16 x i8>
-// CHECK-NEXT:    store volatile <16 x i8> [[TMP71]], ptr @vuc, align 8, !tbaa [[TBAA3]]
-// CHECK-NEXT:    [[TMP72:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[TBAA3]]
-// CHECK-NEXT:    [[TMP73:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[TBAA3]]
-// CHECK-NEXT:    [[TMP74:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    store volatile <16 x i8> [[TMP71]], ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]]
+// CHECK-NEXT:    [[TMP72:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[CHAR_TBAA3]]
+// CHECK-NEXT:    [[TMP73:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[CHAR_TBAA3]]
+// CHECK-NEXT:    [[TMP74:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]]
 // CHECK-NEXT:    [[TMP75:%.*]] = bitcast <16 x i8> [[TMP74]] to i128
 // CHECK-NEXT:    [[TMP76:%.*]] = tail call i128 @llvm.s390.vmslg(<2 x i64> [[TMP72]], <2 x i64> [[TMP73]], i128 [[TMP75]], i32 0)
 // CHECK-NEXT:    [[TMP77:%.*]] = bitcast i128 [[TMP76]] to <16 x i8>
-// CHECK-NEXT:    store volatile <16 x i8> [[TMP77]], ptr @vuc, align 8, !tbaa [[TBAA3]]
-// CHECK-NEXT:    [[TMP78:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[TBAA3]]
-// CHECK-NEXT:    [[TMP79:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[TBAA3]]
-// CHECK-NEXT:    [[TMP80:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    store volatile <16 x i8> [[TMP77]], ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]]
+// CHECK-NEXT:    [[TMP78:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[CHAR_TBAA3]]
+// CHECK-NEXT:    [[TMP79:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[CHAR_TBAA3]]
+// CHECK-NEXT:    [[TMP80:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]]
 // CHECK-NEXT:    [[TMP81:%.*]] = bitcast <16 x i8> [[TMP80]] to i128
 // CHECK-NEXT:    [[TMP82:%.*]] = tail call i128 @llvm.s390.vmslg(<2 x i64> [[TMP78]], <2 x i64> [[TMP79]], i128 [[TMP81]], i32 4)
 // CHECK-NEXT:    [[TMP83:%.*]] = bitcast i128 [[TMP82]] to <16 x i8>
-// CHECK-NEXT:    store volatile <16 x i8> [[TMP83]], ptr @vuc, align 8, !tbaa [[TBAA3]]
-// CHECK-NEXT:    [[TMP84:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[TBAA3]]
-// CHECK-NEXT:    [[TMP85:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[TBAA3]]
-// CHECK-NEXT:    [[TMP86:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    store volatile <16 x i8> [[TMP83]], ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]]
+// CHECK-NEXT:    [[TMP84:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[CHAR_TBAA3]]
+// CHECK-NEXT:    [[TMP85:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[CHAR_TBAA3]]
+// CHECK-NEXT:    [[TMP86:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]]
 // CHECK-NEXT:    [[TMP87:%.*]] = bitcast <16 x i8> [[TMP86]] to i128
 // CHECK-NEXT:    [[TMP88:%.*]] = tail call i128 @llvm.s390.vmslg(<2 x i64> [[TMP84]], <2 x i64> [[TMP85]], i128 [[TMP87]], i32 8)
 // CHECK-NEXT:    [[TMP89:%.*]] = bitcast i128 [[TMP88]] to <16 x i8>
-// CHECK-NEXT:    store volatile <16 x i8> [[TMP89]], ptr @vuc, align 8, !tbaa [[TBAA3]]
-// CHECK-NEXT:    [[TMP90:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[TBAA3]]
-// CHECK-NEXT:    [[TMP91:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[TBAA3]]
-// CHECK-NEXT:    [[TMP92:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    store volatile <16 x i8> [[TMP89]], ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]]
+// CHECK-NEXT:    [[TMP90:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[CHAR_TBAA3]]
+// CHECK-NEXT:    [[TMP91:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[CHAR_TBAA3]]
+// CHECK-NEXT:    [[TMP92:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]]
 // CHECK-NEXT:    [[TMP93:%.*]] = bitcast <16 x i8> [[TMP92]] to i128
 // CHECK-NEXT:    [[TMP94:%.*]] = tail call i128 @llvm.s390.vmslg(<2 x i64> [[TMP90]], <2 x i64> [[TMP91]], i128 [[TMP93]], i32 12)
 // CHECK-NEXT:    [[TMP95:%.*]] = bitcast i128 [[TMP94]] to <16 x i8>
-// CHECK-NEXT:    store volatile <16 x i8> [[TMP95]], ptr @vuc, align 8, !tbaa [[TBAA3]]
-// CHECK-NEXT:    [[TMP96:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]]
-// CHECK-NEXT:    [[TMP97:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    store volatile <16 x i8> [[TMP95]], ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]]
+// CHECK-NEXT:    [[TMP96:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]]
+// CHECK-NEXT:    [[TMP97:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]]
 // CHECK-NEXT:    [[TMP98:%.*]] = tail call <2 x i64> @llvm.s390.vbperm(<16 x i8> [[TMP96]], <16 x i8> [[TMP97]])
-// CHECK-NEXT:    store volatile <2 x i64> [[TMP98]], ptr @vul, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    store volatile <2 x i64> [[TMP98]], ptr @vul, align 8, !tbaa [[CHAR_TBAA3]]
 // CHECK-NEXT:    ret void
 //
 void test(void) {
@@ -159,7 +159,7 @@ void test(void) {
   vul = vec_bperm_u128(vuc, vuc);
 }
 //.
-// CHECK: [[TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0}
+// CHECK: [[CHAR_TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0}
 // CHECK: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0}
 // CHECK: [[META5]] = !{!"Simple C/C++ TBAA"}
 //.
diff --git a/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i128-16Al.c b/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i128-16Al.c
index e3db206..5f3b0ec 100644
--- a/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i128-16Al.c
+++ b/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i128-16Al.c
@@ -1,4 +1,4 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
 // RUN: %clang_cc1 -triple s390x-linux-gnu -O1 -emit-llvm %s -o - | FileCheck %s
 //
 // Test GNU atomic builtins for __int128 aligned to 16 bytes, which should be
@@ -13,21 +13,23 @@ __int128 Val __attribute__((aligned(16)));
 __int128 Exp __attribute__((aligned(16)));
 __int128 Des __attribute__((aligned(16)));
 
-// CHECK-LABEL: @f1(
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define dso_local void @f1(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[TMP0:%.*]] = load atomic i128, ptr @Ptr seq_cst, align 16
-// CHECK-NEXT:    store i128 [[TMP0]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2:![0-9]+]]
+// CHECK-NEXT:    store i128 [[TMP0]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2:![0-9]+]]
 // CHECK-NEXT:    ret void
 //
 __int128 f1() {
   return __atomic_load_n(&Ptr, memory_order_seq_cst);
 }
 
-// CHECK-LABEL: @f2(
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define dso_local void @f2(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[TMP0:%.*]] = load atomic i128, ptr @Ptr seq_cst, align 16
 // CHECK-NEXT:    store i128 [[TMP0]], ptr @Ret, align 16
-// CHECK-NEXT:    store i128 [[TMP0]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store i128 [[TMP0]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 __int128 f2() {
@@ -35,9 +37,10 @@ __int128 f2() {
   return Ret;
 }
 
-// CHECK-LABEL: @f3(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @f3(
+// CHECK-SAME: ) local_unnamed_addr #[[ATTR1:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    store atomic i128 [[TMP0]], ptr @Ptr seq_cst, align 16
 // CHECK-NEXT:    ret void
 //
@@ -45,8 +48,9 @@ void f3() {
   __atomic_store_n(&Ptr, Val, memory_order_seq_cst);
 }
 
-// CHECK-LABEL: @f4(
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define dso_local void @f4(
+// CHECK-SAME: ) local_unnamed_addr #[[ATTR1]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 16
 // CHECK-NEXT:    store atomic i128 [[TMP0]], ptr @Ptr seq_cst, align 16
 // CHECK-NEXT:    ret void
@@ -55,23 +59,25 @@ void f4() {
   __atomic_store(&Ptr, &Val, memory_order_seq_cst);
 }
 
-// CHECK-LABEL: @f5(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @f5(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw xchg ptr @Ptr, i128 [[TMP0]] seq_cst, align 16
-// CHECK-NEXT:    store i128 [[TMP1]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 __int128 f5() {
   return __atomic_exchange_n(&Ptr, Val, memory_order_seq_cst);
 }
 
-// CHECK-LABEL: @f6(
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define dso_local void @f6(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 16
 // CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw xchg ptr @Ptr, i128 [[TMP0]] seq_cst, align 16
 // CHECK-NEXT:    store i128 [[TMP1]], ptr @Ret, align 16
-// CHECK-NEXT:    store i128 [[TMP1]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 __int128 f6() {
@@ -79,18 +85,19 @@ __int128 f6() {
   return Ret;
 }
 
-// CHECK-LABEL: @f7(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Des, align 16, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local noundef zeroext i1 @f7(
+// CHECK-SAME: ) local_unnamed_addr #[[ATTR1]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Des, align 16, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = load i128, ptr @Exp, align 16
 // CHECK-NEXT:    [[TMP2:%.*]] = cmpxchg ptr @Ptr, i128 [[TMP1]], i128 [[TMP0]] seq_cst seq_cst, align 16
 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { i128, i1 } [[TMP2]], 1
-// CHECK-NEXT:    br i1 [[TMP3]], label [[CMPXCHG_CONTINUE:%.*]], label [[CMPXCHG_STORE_EXPECTED:%.*]]
-// CHECK:       cmpxchg.store_expected:
+// CHECK-NEXT:    br i1 [[TMP3]], label %[[CMPXCHG_CONTINUE:.*]], label %[[CMPXCHG_STORE_EXPECTED:.*]]
+// CHECK:       [[CMPXCHG_STORE_EXPECTED]]:
 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { i128, i1 } [[TMP2]], 0
 // CHECK-NEXT:    store i128 [[TMP4]], ptr @Exp, align 16
-// CHECK-NEXT:    br label [[CMPXCHG_CONTINUE]]
-// CHECK:       cmpxchg.continue:
+// CHECK-NEXT:    br label %[[CMPXCHG_CONTINUE]]
+// CHECK:       [[CMPXCHG_CONTINUE]]:
 // CHECK-NEXT:    ret i1 [[TMP3]]
 //
 _Bool f7() {
@@ -98,18 +105,19 @@ _Bool f7() {
                                      memory_order_seq_cst, memory_order_seq_cst);
 }
 
-// CHECK-LABEL: @f8(
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define dso_local noundef zeroext i1 @f8(
+// CHECK-SAME: ) local_unnamed_addr #[[ATTR1]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Exp, align 16
 // CHECK-NEXT:    [[TMP1:%.*]] = load i128, ptr @Des, align 16
 // CHECK-NEXT:    [[TMP2:%.*]] = cmpxchg ptr @Ptr, i128 [[TMP0]], i128 [[TMP1]] seq_cst seq_cst, align 16
 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { i128, i1 } [[TMP2]], 1
-// CHECK-NEXT:    br i1 [[TMP3]], label [[CMPXCHG_CONTINUE:%.*]], label [[CMPXCHG_STORE_EXPECTED:%.*]]
-// CHECK:       cmpxchg.store_expected:
+// CHECK-NEXT:    br i1 [[TMP3]], label %[[CMPXCHG_CONTINUE:.*]], label %[[CMPXCHG_STORE_EXPECTED:.*]]
+// CHECK:       [[CMPXCHG_STORE_EXPECTED]]:
 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { i128, i1 } [[TMP2]], 0
 // CHECK-NEXT:    store i128 [[TMP4]], ptr @Exp, align 16
-// CHECK-NEXT:    br label [[CMPXCHG_CONTINUE]]
-// CHECK:       cmpxchg.continue:
+// CHECK-NEXT:    br label %[[CMPXCHG_CONTINUE]]
+// CHECK:       [[CMPXCHG_CONTINUE]]:
 // CHECK-NEXT:    ret i1 [[TMP3]]
 //
 _Bool f8() {
@@ -117,141 +125,159 @@ _Bool f8() {
                                    memory_order_seq_cst, memory_order_seq_cst);
 }
 
-// CHECK-LABEL: @f9(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @f9(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw add ptr @Ptr, i128 [[TMP0]] seq_cst, align 16
 // CHECK-NEXT:    [[TMP2:%.*]] = add i128 [[TMP1]], [[TMP0]]
-// CHECK-NEXT:    store i128 [[TMP2]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store i128 [[TMP2]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 __int128 f9() {
   return __atomic_add_fetch(&Ptr, Val, memory_order_seq_cst);
 }
 
-// CHECK-LABEL: @f10(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @f10(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw sub ptr @Ptr, i128 [[TMP0]] seq_cst, align 16
 // CHECK-NEXT:    [[TMP2:%.*]] = sub i128 [[TMP1]], [[TMP0]]
-// CHECK-NEXT:    store i128 [[TMP2]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store i128 [[TMP2]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 __int128 f10() {
   return __atomic_sub_fetch(&Ptr, Val, memory_order_seq_cst);
 }
 
-// CHECK-LABEL: @f11(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @f11(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw and ptr @Ptr, i128 [[TMP0]] seq_cst, align 16
 // CHECK-NEXT:    [[TMP2:%.*]] = and i128 [[TMP1]], [[TMP0]]
-// CHECK-NEXT:    store i128 [[TMP2]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store i128 [[TMP2]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 __int128 f11() {
   return __atomic_and_fetch(&Ptr, Val, memory_order_seq_cst);
 }
 
-// CHECK-LABEL: @f12(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @f12(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw xor ptr @Ptr, i128 [[TMP0]] seq_cst, align 16
 // CHECK-NEXT:    [[TMP2:%.*]] = xor i128 [[TMP1]], [[TMP0]]
-// CHECK-NEXT:    store i128 [[TMP2]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store i128 [[TMP2]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 __int128 f12() {
   return __atomic_xor_fetch(&Ptr, Val, memory_order_seq_cst);
 }
 
-// CHECK-LABEL: @f13(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @f13(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw or ptr @Ptr, i128 [[TMP0]] seq_cst, align 16
 // CHECK-NEXT:    [[TMP2:%.*]] = or i128 [[TMP1]], [[TMP0]]
-// CHECK-NEXT:    store i128 [[TMP2]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store i128 [[TMP2]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 __int128 f13() {
   return __atomic_or_fetch(&Ptr, Val, memory_order_seq_cst);
 }
 
-// CHECK-LABEL: @f14(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @f14(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw nand ptr @Ptr, i128 [[TMP0]] seq_cst, align 16
 // CHECK-NEXT:    [[TMP2:%.*]] = and i128 [[TMP1]], [[TMP0]]
 // CHECK-NEXT:    [[TMP3:%.*]] = xor i128 [[TMP2]], -1
-// CHECK-NEXT:    store i128 [[TMP3]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store i128 [[TMP3]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 __int128 f14() {
   return __atomic_nand_fetch(&Ptr, Val, memory_order_seq_cst);
 }
 
-// CHECK-LABEL: @f15(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @f15(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw add ptr @Ptr, i128 [[TMP0]] seq_cst, align 16
-// CHECK-NEXT:    store i128 [[TMP1]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 __int128 f15() {
   return __atomic_fetch_add(&Ptr, Val, memory_order_seq_cst);
 }
 
-// CHECK-LABEL: @f16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @f16(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw sub ptr @Ptr, i128 [[TMP0]] seq_cst, align 16
-// CHECK-NEXT:    store i128 [[TMP1]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 __int128 f16() {
   return __atomic_fetch_sub(&Ptr, Val, memory_order_seq_cst);
 }
 
-// CHECK-LABEL: @f17(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @f17(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw and ptr @Ptr, i128 [[TMP0]] seq_cst, align 16
-// CHECK-NEXT:    store i128 [[TMP1]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 __int128 f17() {
   return __atomic_fetch_and(&Ptr, Val, memory_order_seq_cst);
 }
 
-// CHECK-LABEL: @f18(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @f18(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw xor ptr @Ptr, i128 [[TMP0]] seq_cst, align 16
-// CHECK-NEXT:    store i128 [[TMP1]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 __int128 f18() {
   return __atomic_fetch_xor(&Ptr, Val, memory_order_seq_cst);
 }
 
-// CHECK-LABEL: @f19(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @f19(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw or ptr @Ptr, i128 [[TMP0]] seq_cst, align 16
-// CHECK-NEXT:    store i128 [[TMP1]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 __int128 f19() {
   return __atomic_fetch_or(&Ptr, Val, memory_order_seq_cst);
 }
 
-// CHECK-LABEL: @f20(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @f20(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw nand ptr @Ptr, i128 [[TMP0]] seq_cst, align 16
-// CHECK-NEXT:    store i128 [[TMP1]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 __int128 f20() {
   return __atomic_fetch_nand(&Ptr, Val, memory_order_seq_cst);
 }
+//.
+// CHECK: [[__INT128_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0}
+// CHECK: [[META3]] = !{!"__int128", [[META4:![0-9]+]], i64 0}
+// CHECK: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0}
+// CHECK: [[META5]] = !{!"Simple C/C++ TBAA"}
+//.
diff --git a/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i128-8Al.c b/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i128-8Al.c
index 8759df7..3ac5959 100644
--- a/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i128-8Al.c
+++ b/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i128-8Al.c
@@ -1,4 +1,4 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
 // RUN: %clang_cc1 -triple s390x-linux-gnu -O1 -emit-llvm %s -o - | FileCheck %s
 //
 // Test GNU atomic builtins for __int128 (with default alignment of 8 bytes
@@ -18,21 +18,23 @@ __int128 Des;
 // pass. It seems that a 'writable' attribute should now be added to the argument
 // in order for this optimization to proceed.
 
-// CHECK-LABEL: @f1(
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define dso_local void @f1(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[TMP0:%.*]] = load atomic i128, ptr @Ptr seq_cst, align 8
-// CHECK-NEXT:    store i128 [[TMP0]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2:![0-9]+]]
+// CHECK-NEXT:    store i128 [[TMP0]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2:![0-9]+]]
 // CHECK-NEXT:    ret void
 //
 __int128 f1() {
   return __atomic_load_n(&Ptr, memory_order_seq_cst);
 }
 
-// CHECK-LABEL: @f2(
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define dso_local void @f2(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[TMP0:%.*]] = load atomic i128, ptr @Ptr seq_cst, align 8
 // CHECK-NEXT:    store i128 [[TMP0]], ptr @Ret, align 8
-// CHECK-NEXT:    store i128 [[TMP0]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store i128 [[TMP0]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 __int128 f2() {
@@ -40,9 +42,10 @@ __int128 f2() {
   return Ret;
 }
 
-// CHECK-LABEL: @f3(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @f3(
+// CHECK-SAME: ) local_unnamed_addr #[[ATTR1:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    store atomic i128 [[TMP0]], ptr @Ptr seq_cst, align 8
 // CHECK-NEXT:    ret void
 //
@@ -50,8 +53,9 @@ void f3() {
   __atomic_store_n(&Ptr, Val, memory_order_seq_cst);
 }
 
-// CHECK-LABEL: @f4(
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define dso_local void @f4(
+// CHECK-SAME: ) local_unnamed_addr #[[ATTR1]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 8
 // CHECK-NEXT:    store atomic i128 [[TMP0]], ptr @Ptr seq_cst, align 8
 // CHECK-NEXT:    ret void
@@ -60,23 +64,25 @@ void f4() {
   __atomic_store(&Ptr, &Val, memory_order_seq_cst);
 }
 
-// CHECK-LABEL: @f5(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @f5(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw xchg ptr @Ptr, i128 [[TMP0]] seq_cst, align 8
-// CHECK-NEXT:    store i128 [[TMP1]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 __int128 f5() {
   return __atomic_exchange_n(&Ptr, Val, memory_order_seq_cst);
 }
 
-// CHECK-LABEL: @f6(
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define dso_local void @f6(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 8
 // CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw xchg ptr @Ptr, i128 [[TMP0]] seq_cst, align 8
 // CHECK-NEXT:    store i128 [[TMP1]], ptr @Ret, align 8
-// CHECK-NEXT:    store i128 [[TMP1]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 __int128 f6() {
@@ -84,18 +90,19 @@ __int128 f6() {
   return Ret;
 }
 
-// CHECK-LABEL: @f7(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Des, align 8, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local noundef zeroext i1 @f7(
+// CHECK-SAME: ) local_unnamed_addr #[[ATTR1]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Des, align 8, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = load i128, ptr @Exp, align 8
 // CHECK-NEXT:    [[TMP2:%.*]] = cmpxchg ptr @Ptr, i128 [[TMP1]], i128 [[TMP0]] seq_cst seq_cst, align 8
 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { i128, i1 } [[TMP2]], 1
-// CHECK-NEXT:    br i1 [[TMP3]], label [[CMPXCHG_CONTINUE:%.*]], label [[CMPXCHG_STORE_EXPECTED:%.*]]
-// CHECK:       cmpxchg.store_expected:
+// CHECK-NEXT:    br i1 [[TMP3]], label %[[CMPXCHG_CONTINUE:.*]], label %[[CMPXCHG_STORE_EXPECTED:.*]]
+// CHECK:       [[CMPXCHG_STORE_EXPECTED]]:
 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { i128, i1 } [[TMP2]], 0
 // CHECK-NEXT:    store i128 [[TMP4]], ptr @Exp, align 8
-// CHECK-NEXT:    br label [[CMPXCHG_CONTINUE]]
-// CHECK:       cmpxchg.continue:
+// CHECK-NEXT:    br label %[[CMPXCHG_CONTINUE]]
+// CHECK:       [[CMPXCHG_CONTINUE]]:
 // CHECK-NEXT:    ret i1 [[TMP3]]
 //
 _Bool f7() {
@@ -103,18 +110,19 @@ _Bool f7() {
                                      memory_order_seq_cst, memory_order_seq_cst);
 }
 
-// CHECK-LABEL: @f8(
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define dso_local noundef zeroext i1 @f8(
+// CHECK-SAME: ) local_unnamed_addr #[[ATTR1]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Exp, align 8
 // CHECK-NEXT:    [[TMP1:%.*]] = load i128, ptr @Des, align 8
 // CHECK-NEXT:    [[TMP2:%.*]] = cmpxchg ptr @Ptr, i128 [[TMP0]], i128 [[TMP1]] seq_cst seq_cst, align 8
 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { i128, i1 } [[TMP2]], 1
-// CHECK-NEXT:    br i1 [[TMP3]], label [[CMPXCHG_CONTINUE:%.*]], label [[CMPXCHG_STORE_EXPECTED:%.*]]
-// CHECK:       cmpxchg.store_expected:
+// CHECK-NEXT:    br i1 [[TMP3]], label %[[CMPXCHG_CONTINUE:.*]], label %[[CMPXCHG_STORE_EXPECTED:.*]]
+// CHECK:       [[CMPXCHG_STORE_EXPECTED]]:
 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { i128, i1 } [[TMP2]], 0
 // CHECK-NEXT:    store i128 [[TMP4]], ptr @Exp, align 8
-// CHECK-NEXT:    br label [[CMPXCHG_CONTINUE]]
-// CHECK:       cmpxchg.continue:
+// CHECK-NEXT:    br label %[[CMPXCHG_CONTINUE]]
+// CHECK:       [[CMPXCHG_CONTINUE]]:
 // CHECK-NEXT:    ret i1 [[TMP3]]
 //
 _Bool f8() {
@@ -122,141 +130,159 @@ _Bool f8() {
                                    memory_order_seq_cst, memory_order_seq_cst);
 }
 
-// CHECK-LABEL: @f9(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @f9(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw add ptr @Ptr, i128 [[TMP0]] seq_cst, align 8
 // CHECK-NEXT:    [[TMP2:%.*]] = add i128 [[TMP1]], [[TMP0]]
-// CHECK-NEXT:    store i128 [[TMP2]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store i128 [[TMP2]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 __int128 f9() {
   return __atomic_add_fetch(&Ptr, Val, memory_order_seq_cst);
 }
 
-// CHECK-LABEL: @f10(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @f10(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw sub ptr @Ptr, i128 [[TMP0]] seq_cst, align 8
 // CHECK-NEXT:    [[TMP2:%.*]] = sub i128 [[TMP1]], [[TMP0]]
-// CHECK-NEXT:    store i128 [[TMP2]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store i128 [[TMP2]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 __int128 f10() {
   return __atomic_sub_fetch(&Ptr, Val, memory_order_seq_cst);
 }
 
-// CHECK-LABEL: @f11(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @f11(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw and ptr @Ptr, i128 [[TMP0]] seq_cst, align 8
 // CHECK-NEXT:    [[TMP2:%.*]] = and i128 [[TMP1]], [[TMP0]]
-// CHECK-NEXT:    store i128 [[TMP2]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store i128 [[TMP2]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 __int128 f11() {
   return __atomic_and_fetch(&Ptr, Val, memory_order_seq_cst);
 }
 
-// CHECK-LABEL: @f12(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @f12(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw xor ptr @Ptr, i128 [[TMP0]] seq_cst, align 8
 // CHECK-NEXT:    [[TMP2:%.*]] = xor i128 [[TMP1]], [[TMP0]]
-// CHECK-NEXT:    store i128 [[TMP2]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store i128 [[TMP2]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 __int128 f12() {
   return __atomic_xor_fetch(&Ptr, Val, memory_order_seq_cst);
 }
 
-// CHECK-LABEL: @f13(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @f13(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw or ptr @Ptr, i128 [[TMP0]] seq_cst, align 8
 // CHECK-NEXT:    [[TMP2:%.*]] = or i128 [[TMP1]], [[TMP0]]
-// CHECK-NEXT:    store i128 [[TMP2]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store i128 [[TMP2]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 __int128 f13() {
   return __atomic_or_fetch(&Ptr, Val, memory_order_seq_cst);
 }
 
-// CHECK-LABEL: @f14(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @f14(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw nand ptr @Ptr, i128 [[TMP0]] seq_cst, align 8
 // CHECK-NEXT:    [[TMP2:%.*]] = and i128 [[TMP1]], [[TMP0]]
 // CHECK-NEXT:    [[TMP3:%.*]] = xor i128 [[TMP2]], -1
-// CHECK-NEXT:    store i128 [[TMP3]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store i128 [[TMP3]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 __int128 f14() {
   return __atomic_nand_fetch(&Ptr, Val, memory_order_seq_cst);
 }
 
-// CHECK-LABEL: @f15(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @f15(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw add ptr @Ptr, i128 [[TMP0]] seq_cst, align 8
-// CHECK-NEXT:    store i128 [[TMP1]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 __int128 f15() {
   return __atomic_fetch_add(&Ptr, Val, memory_order_seq_cst);
 }
 
-// CHECK-LABEL: @f16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @f16(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw sub ptr @Ptr, i128 [[TMP0]] seq_cst, align 8
-// CHECK-NEXT:    store i128 [[TMP1]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 __int128 f16() {
   return __atomic_fetch_sub(&Ptr, Val, memory_order_seq_cst);
 }
 
-// CHECK-LABEL: @f17(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @f17(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw and ptr @Ptr, i128 [[TMP0]] seq_cst, align 8
-// CHECK-NEXT:    store i128 [[TMP1]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 __int128 f17() {
   return __atomic_fetch_and(&Ptr, Val, memory_order_seq_cst);
 }
 
-// CHECK-LABEL: @f18(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @f18(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw xor ptr @Ptr, i128 [[TMP0]] seq_cst, align 8
-// CHECK-NEXT:    store i128 [[TMP1]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 __int128 f18() {
   return __atomic_fetch_xor(&Ptr, Val, memory_order_seq_cst);
 }
 
-// CHECK-LABEL: @f19(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @f19(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw or ptr @Ptr, i128 [[TMP0]] seq_cst, align 8
-// CHECK-NEXT:    store i128 [[TMP1]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 __int128 f19() {
   return __atomic_fetch_or(&Ptr, Val, memory_order_seq_cst);
 }
 
-// CHECK-LABEL: @f20(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @f20(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw nand ptr @Ptr, i128 [[TMP0]] seq_cst, align 8
-// CHECK-NEXT:    store i128 [[TMP1]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 __int128 f20() {
   return __atomic_fetch_nand(&Ptr, Val, memory_order_seq_cst);
 }
+//.
+// CHECK: [[__INT128_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0}
+// CHECK: [[META3]] = !{!"__int128", [[META4:![0-9]+]], i64 0}
+// CHECK: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0}
+// CHECK: [[META5]] = !{!"Simple C/C++ TBAA"}
+//.
diff --git a/clang/test/CodeGen/SystemZ/sync-builtins-i128-16Al.c b/clang/test/CodeGen/SystemZ/sync-builtins-i128-16Al.c
index e80f2b6..601bd7f 100644
--- a/clang/test/CodeGen/SystemZ/sync-builtins-i128-16Al.c
+++ b/clang/test/CodeGen/SystemZ/sync-builtins-i128-16Al.c
@@ -1,4 +1,4 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
 // RUN: %clang_cc1 -triple s390x-linux-gnu -O1 -emit-llvm %s -o - \
 // RUN:   | FileCheck %s
 //
@@ -10,149 +10,162 @@ __int128 Ptr __attribute__((aligned(16)));
 __int128 Val __attribute__((aligned(16)));
 __int128 OldVal __attribute__((aligned(16)));
 
-// CHECK-LABEL: @f1(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2:![0-9]+]]
+// CHECK-LABEL: define dso_local void @f1(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2:![0-9]+]]
 // CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw add ptr @Ptr, i128 [[TMP0]] seq_cst, align 16
-// CHECK-NEXT:    store i128 [[TMP1]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 __int128 f1() {
   return __sync_fetch_and_add(&Ptr, Val);
 }
 
-// CHECK-LABEL: @f2(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @f2(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw sub ptr @Ptr, i128 [[TMP0]] seq_cst, align 16
-// CHECK-NEXT:    store i128 [[TMP1]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 __int128 f2() {
   return __sync_fetch_and_sub(&Ptr, Val);
 }
 
-// CHECK-LABEL: @f3(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @f3(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw or ptr @Ptr, i128 [[TMP0]] seq_cst, align 16
-// CHECK-NEXT:    store i128 [[TMP1]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 __int128 f3() {
   return __sync_fetch_and_or(&Ptr, Val);
 }
 
-// CHECK-LABEL: @f4(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @f4(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw and ptr @Ptr, i128 [[TMP0]] seq_cst, align 16
-// CHECK-NEXT:    store i128 [[TMP1]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 __int128 f4() {
   return __sync_fetch_and_and(&Ptr, Val);
 }
 
-// CHECK-LABEL: @f5(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @f5(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw xor ptr @Ptr, i128 [[TMP0]] seq_cst, align 16
-// CHECK-NEXT:    store i128 [[TMP1]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 __int128 f5() {
   return __sync_fetch_and_xor(&Ptr, Val);
 }
 
-// CHECK-LABEL: @f6(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @f6(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw nand ptr @Ptr, i128 [[TMP0]] seq_cst, align 16
-// CHECK-NEXT:    store i128 [[TMP1]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 __int128 f6() {
   return __sync_fetch_and_nand(&Ptr, Val);
 }
 
-// CHECK-LABEL: @f7(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @f7(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw add ptr @Ptr, i128 [[TMP0]] seq_cst, align 16
 // CHECK-NEXT:    [[TMP2:%.*]] = add i128 [[TMP1]], [[TMP0]]
-// CHECK-NEXT:    store i128 [[TMP2]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store i128 [[TMP2]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 __int128 f7() {
   return __sync_add_and_fetch(&Ptr, Val);
 }
 
-// CHECK-LABEL: @f8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @f8(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw sub ptr @Ptr, i128 [[TMP0]] seq_cst, align 16
 // CHECK-NEXT:    [[TMP2:%.*]] = sub i128 [[TMP1]], [[TMP0]]
-// CHECK-NEXT:    store i128 [[TMP2]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store i128 [[TMP2]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 __int128 f8() {
   return __sync_sub_and_fetch(&Ptr, Val);
 }
 
-// CHECK-LABEL: @f9(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @f9(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw or ptr @Ptr, i128 [[TMP0]] seq_cst, align 16
 // CHECK-NEXT:    [[TMP2:%.*]] = or i128 [[TMP1]], [[TMP0]]
-// CHECK-NEXT:    store i128 [[TMP2]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store i128 [[TMP2]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 __int128 f9() {
   return __sync_or_and_fetch(&Ptr, Val);
 }
 
-// CHECK-LABEL: @f10(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @f10(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw and ptr @Ptr, i128 [[TMP0]] seq_cst, align 16
 // CHECK-NEXT:    [[TMP2:%.*]] = and i128 [[TMP1]], [[TMP0]]
-// CHECK-NEXT:    store i128 [[TMP2]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store i128 [[TMP2]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 __int128 f10() {
   return __sync_and_and_fetch(&Ptr, Val);
 }
 
-// CHECK-LABEL: @f11(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @f11(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw xor ptr @Ptr, i128 [[TMP0]] seq_cst, align 16
 // CHECK-NEXT:    [[TMP2:%.*]] = xor i128 [[TMP1]], [[TMP0]]
-// CHECK-NEXT:    store i128 [[TMP2]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store i128 [[TMP2]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 __int128 f11() {
   return __sync_xor_and_fetch(&Ptr, Val);
 }
 
-// CHECK-LABEL: @f12(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @f12(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw nand ptr @Ptr, i128 [[TMP0]] seq_cst, align 16
 // CHECK-NEXT:    [[TMP2:%.*]] = and i128 [[TMP1]], [[TMP0]]
 // CHECK-NEXT:    [[TMP3:%.*]] = xor i128 [[TMP2]], -1
-// CHECK-NEXT:    store i128 [[TMP3]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store i128 [[TMP3]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 __int128 f12() {
   return __sync_nand_and_fetch(&Ptr, Val);
 }
 
-// CHECK-LABEL: @f13(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @OldVal, align 16, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local zeroext i1 @f13(
+// CHECK-SAME: ) local_unnamed_addr #[[ATTR1:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @OldVal, align 16, !tbaa [[__INT128_TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = cmpxchg ptr @Ptr, i128 [[TMP0]], i128 [[TMP1]] seq_cst seq_cst, align 16
 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { i128, i1 } [[TMP2]], 1
 // CHECK-NEXT:    ret i1 [[TMP3]]
@@ -161,32 +174,35 @@ _Bool f13() {
   return __sync_bool_compare_and_swap(&Ptr, OldVal, Val);
 }
 
-// CHECK-LABEL: @f14(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @OldVal, align 16, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @f14(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @OldVal, align 16, !tbaa [[__INT128_TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = cmpxchg ptr @Ptr, i128 [[TMP0]], i128 [[TMP1]] seq_cst seq_cst, align 16
 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { i128, i1 } [[TMP2]], 0
-// CHECK-NEXT:    store i128 [[TMP3]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store i128 [[TMP3]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 __int128 f14() {
   return __sync_val_compare_and_swap(&Ptr, OldVal, Val);
 }
 
-// CHECK-LABEL: @f15(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @f15(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw xchg ptr @Ptr, i128 [[TMP0]] seq_cst, align 16
-// CHECK-NEXT:    store i128 [[TMP1]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 __int128 f15() {
   return __sync_lock_test_and_set(&Ptr, Val);
 }
 
-// CHECK-LABEL: @f16(
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define dso_local void @f16(
+// CHECK-SAME: ) local_unnamed_addr #[[ATTR1]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    store atomic i128 0, ptr @Ptr release, align 16
 // CHECK-NEXT:    ret void
 //
@@ -194,11 +210,12 @@ void f16() {
   return __sync_lock_release(&Ptr);
 }
 
-// CHECK-LABEL: @f17(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @f17(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw xchg ptr @Ptr, i128 [[TMP0]] seq_cst, align 16
-// CHECK-NEXT:    store i128 [[TMP1]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 __int128 f17() {
@@ -206,14 +223,21 @@ __int128 f17() {
 }
 
 // Test that a statement expression compiles.
-// CHECK-LABEL: @f18(
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define dso_local void @f18(
+// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[T_ADDR:%.*]] = alloca i128, align 8
-// CHECK-NEXT:    [[T:%.*]] = load i128, ptr [[TMP0:%.*]], align 8, !tbaa [[TBAA2]]
-// CHECK-NEXT:    store i128 [[T]], ptr [[T_ADDR]], align 8, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[T:%.*]] = load i128, ptr [[TMP0]], align 8, !tbaa [[__INT128_TBAA2]]
+// CHECK-NEXT:    store i128 [[T]], ptr [[T_ADDR]], align 8, !tbaa [[__INT128_TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = cmpxchg ptr [[T_ADDR]], i128 [[T]], i128 [[T]] seq_cst seq_cst, align 16
 // CHECK-NEXT:    ret void
 //
 void f18(__int128 t) {
   __sync_bool_compare_and_swap(({int x = 1; &t;}), t, t);
 }
+//.
+// CHECK: [[__INT128_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0}
+// CHECK: [[META3]] = !{!"__int128", [[META4:![0-9]+]], i64 0}
+// CHECK: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0}
+// CHECK: [[META5]] = !{!"Simple C/C++ TBAA"}
+//.
diff --git a/clang/test/CodeGen/SystemZ/zvector2.c b/clang/test/CodeGen/SystemZ/zvector2.c
index b021ae8..f00fcdd 100644
--- a/clang/test/CodeGen/SystemZ/zvector2.c
+++ b/clang/test/CodeGen/SystemZ/zvector2.c
@@ -1,4 +1,4 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
 // RUN: %clang_cc1 -triple s390x-linux-gnu -target-cpu z14 -fzvector \
 // RUN:  -O -emit-llvm -o - -W -Wall -Werror %s | FileCheck %s
 
@@ -8,8 +8,8 @@ volatile vector bool int bi;
 // CHECK-LABEL: define dso_local void @test_assign(
 // CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[TBAA3:![0-9]+]]
-// CHECK-NEXT:    store volatile <4 x float> [[TMP0]], ptr @ff, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[CHAR_TBAA3:![0-9]+]]
+// CHECK-NEXT:    store volatile <4 x float> [[TMP0]], ptr @ff, align 8, !tbaa [[CHAR_TBAA3]]
 // CHECK-NEXT:    ret void
 //
 void test_assign (void)
@@ -20,8 +20,8 @@ void test_assign (void)
 // CHECK-LABEL: define dso_local void @test_pos(
 // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[TBAA3]]
-// CHECK-NEXT:    store volatile <4 x float> [[TMP0]], ptr @ff, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[CHAR_TBAA3]]
+// CHECK-NEXT:    store volatile <4 x float> [[TMP0]], ptr @ff, align 8, !tbaa [[CHAR_TBAA3]]
 // CHECK-NEXT:    ret void
 //
 void test_pos (void)
@@ -32,9 +32,9 @@ void test_pos (void)
 // CHECK-LABEL: define dso_local void @test_neg(
 // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[CHAR_TBAA3]]
 // CHECK-NEXT:    [[FNEG:%.*]] = fneg <4 x float> [[TMP0]]
-// CHECK-NEXT:    store volatile <4 x float> [[FNEG]], ptr @ff, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    store volatile <4 x float> [[FNEG]], ptr @ff, align 8, !tbaa [[CHAR_TBAA3]]
 // CHECK-NEXT:    ret void
 //
 void test_neg (void)
@@ -45,9 +45,9 @@ void test_neg (void)
 // CHECK-LABEL: define dso_local void @test_preinc(
 // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[CHAR_TBAA3]]
 // CHECK-NEXT:    [[INC:%.*]] = fadd <4 x float> [[TMP0]], splat (float 1.000000e+00)
-// CHECK-NEXT:    store volatile <4 x float> [[INC]], ptr @ff2, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    store volatile <4 x float> [[INC]], ptr @ff2, align 8, !tbaa [[CHAR_TBAA3]]
 // CHECK-NEXT:    ret void
 //
 void test_preinc (void)
@@ -58,9 +58,9 @@ void test_preinc (void)
 // CHECK-LABEL: define dso_local void @test_postinc(
 // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[CHAR_TBAA3]]
 // CHECK-NEXT:    [[INC:%.*]] = fadd <4 x float> [[TMP0]], splat (float 1.000000e+00)
-// CHECK-NEXT:    store volatile <4 x float> [[INC]], ptr @ff2, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    store volatile <4 x float> [[INC]], ptr @ff2, align 8, !tbaa [[CHAR_TBAA3]]
 // CHECK-NEXT:    ret void
 //
 void test_postinc (void)
@@ -71,9 +71,9 @@ void test_postinc (void)
 // CHECK-LABEL: define dso_local void @test_predec(
 // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[CHAR_TBAA3]]
 // CHECK-NEXT:    [[DEC:%.*]] = fadd <4 x float> [[TMP0]], splat (float -1.000000e+00)
-// CHECK-NEXT:    store volatile <4 x float> [[DEC]], ptr @ff2, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    store volatile <4 x float> [[DEC]], ptr @ff2, align 8, !tbaa [[CHAR_TBAA3]]
 // CHECK-NEXT:    ret void
 //
 void test_predec (void)
@@ -84,9 +84,9 @@ void test_predec (void)
 // CHECK-LABEL: define dso_local void @test_postdec(
 // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[CHAR_TBAA3]]
 // CHECK-NEXT:    [[DEC:%.*]] = fadd <4 x float> [[TMP0]], splat (float -1.000000e+00)
-// CHECK-NEXT:    store volatile <4 x float> [[DEC]], ptr @ff2, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    store volatile <4 x float> [[DEC]], ptr @ff2, align 8, !tbaa [[CHAR_TBAA3]]
 // CHECK-NEXT:    ret void
 //
 void test_postdec (void)
@@ -97,10 +97,10 @@ void test_postdec (void)
 // CHECK-LABEL: define dso_local void @test_add(
 // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff, align 8, !tbaa [[TBAA3]]
-// CHECK-NEXT:    [[TMP1:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff, align 8, !tbaa [[CHAR_TBAA3]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[CHAR_TBAA3]]
 // CHECK-NEXT:    [[ADD:%.*]] = fadd <4 x float> [[TMP0]], [[TMP1]]
-// CHECK-NEXT:    store volatile <4 x float> [[ADD]], ptr @ff, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    store volatile <4 x float> [[ADD]], ptr @ff, align 8, !tbaa [[CHAR_TBAA3]]
 // CHECK-NEXT:    ret void
 //
 void test_add (void)
@@ -111,10 +111,10 @@ void test_add (void)
 // CHECK-LABEL: define dso_local void @test_add_assign(
 // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[TBAA3]]
-// CHECK-NEXT:    [[TMP1:%.*]] = load volatile <4 x float>, ptr @ff, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[CHAR_TBAA3]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load volatile <4 x float>, ptr @ff, align 8, !tbaa [[CHAR_TBAA3]]
 // CHECK-NEXT:    [[ADD:%.*]] = fadd <4 x float> [[TMP0]], [[TMP1]]
-// CHECK-NEXT:    store volatile <4 x float> [[ADD]], ptr @ff, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    store volatile <4 x float> [[ADD]], ptr @ff, align 8, !tbaa [[CHAR_TBAA3]]
 // CHECK-NEXT:    ret void
 //
 void test_add_assign (void)
@@ -125,10 +125,10 @@ void test_add_assign (void)
 // CHECK-LABEL: define dso_local void @test_sub(
 // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff, align 8, !tbaa [[TBAA3]]
-// CHECK-NEXT:    [[TMP1:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff, align 8, !tbaa [[CHAR_TBAA3]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[CHAR_TBAA3]]
 // CHECK-NEXT:    [[SUB:%.*]] = fsub <4 x float> [[TMP0]], [[TMP1]]
-// CHECK-NEXT:    store volatile <4 x float> [[SUB]], ptr @ff, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    store volatile <4 x float> [[SUB]], ptr @ff, align 8, !tbaa [[CHAR_TBAA3]]
 // CHECK-NEXT:    ret void
 //
 void test_sub (void)
@@ -139,10 +139,10 @@ void test_sub (void)
 // CHECK-LABEL: define dso_local void @test_sub_assign(
 // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[TBAA3]]
-// CHECK-NEXT:    [[TMP1:%.*]] = load volatile <4 x float>, ptr @ff, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[CHAR_TBAA3]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load volatile <4 x float>, ptr @ff, align 8, !tbaa [[CHAR_TBAA3]]
 // CHECK-NEXT:    [[SUB:%.*]] = fsub <4 x float> [[TMP1]], [[TMP0]]
-// CHECK-NEXT:    store volatile <4 x float> [[SUB]], ptr @ff, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    store volatile <4 x float> [[SUB]], ptr @ff, align 8, !tbaa [[CHAR_TBAA3]]
 // CHECK-NEXT:    ret void
 //
 void test_sub_assign (void)
@@ -153,10 +153,10 @@ void test_sub_assign (void)
 // CHECK-LABEL: define dso_local void @test_mul(
 // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff, align 8, !tbaa [[TBAA3]]
-// CHECK-NEXT:    [[TMP1:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff, align 8, !tbaa [[CHAR_TBAA3]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[CHAR_TBAA3]]
 // CHECK-NEXT:    [[MUL:%.*]] = fmul <4 x float> [[TMP0]], [[TMP1]]
-// CHECK-NEXT:    store volatile <4 x float> [[MUL]], ptr @ff, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    store volatile <4 x float> [[MUL]], ptr @ff, align 8, !tbaa [[CHAR_TBAA3]]
 // CHECK-NEXT:    ret void
 //
 void test_mul (void)
@@ -167,10 +167,10 @@ void test_mul (void)
 // CHECK-LABEL: define dso_local void @test_mul_assign(
 // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[TBAA3]]
-// CHECK-NEXT:    [[TMP1:%.*]] = load volatile <4 x float>, ptr @ff, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[CHAR_TBAA3]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load volatile <4 x float>, ptr @ff, align 8, !tbaa [[CHAR_TBAA3]]
 // CHECK-NEXT:    [[MUL:%.*]] = fmul <4 x float> [[TMP0]], [[TMP1]]
-// CHECK-NEXT:    store volatile <4 x float> [[MUL]], ptr @ff, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    store volatile <4 x float> [[MUL]], ptr @ff, align 8, !tbaa [[CHAR_TBAA3]]
 // CHECK-NEXT:    ret void
 //
 void test_mul_assign (void)
@@ -181,10 +181,10 @@ void test_mul_assign (void)
 // CHECK-LABEL: define dso_local void @test_div(
 // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff, align 8, !tbaa [[TBAA3]]
-// CHECK-NEXT:    [[TMP1:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff, align 8, !tbaa [[CHAR_TBAA3]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[CHAR_TBAA3]]
 // CHECK-NEXT:    [[DIV:%.*]] = fdiv <4 x float> [[TMP0]], [[TMP1]]
-// CHECK-NEXT:    store volatile <4 x float> [[DIV]], ptr @ff, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    store volatile <4 x float> [[DIV]], ptr @ff, align 8, !tbaa [[CHAR_TBAA3]]
 // CHECK-NEXT:    ret void
 //
 void test_div (void)
@@ -195,10 +195,10 @@ void test_div (void)
 // CHECK-LABEL: define dso_local void @test_div_assign(
 // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[TBAA3]]
-// CHECK-NEXT:    [[TMP1:%.*]] = load volatile <4 x float>, ptr @ff, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[CHAR_TBAA3]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load volatile <4 x float>, ptr @ff, align 8, !tbaa [[CHAR_TBAA3]]
 // CHECK-NEXT:    [[DIV:%.*]] = fdiv <4 x float> [[TMP1]], [[TMP0]]
-// CHECK-NEXT:    store volatile <4 x float> [[DIV]], ptr @ff, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    store volatile <4 x float> [[DIV]], ptr @ff, align 8, !tbaa [[CHAR_TBAA3]]
 // CHECK-NEXT:    ret void
 //
 void test_div_assign (void)
@@ -209,11 +209,11 @@ void test_div_assign (void)
 // CHECK-LABEL: define dso_local void @test_cmpeq(
 // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff, align 8, !tbaa [[TBAA3]]
-// CHECK-NEXT:    [[TMP1:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff, align 8, !tbaa [[CHAR_TBAA3]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[CHAR_TBAA3]]
 // CHECK-NEXT:    [[CMP:%.*]] = fcmp oeq <4 x float> [[TMP0]], [[TMP1]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
-// CHECK-NEXT:    store volatile <4 x i32> [[SEXT]], ptr @bi, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    store volatile <4 x i32> [[SEXT]], ptr @bi, align 8, !tbaa [[CHAR_TBAA3]]
 // CHECK-NEXT:    ret void
 //
 void test_cmpeq (void)
@@ -224,11 +224,11 @@ void test_cmpeq (void)
 // CHECK-LABEL: define dso_local void @test_cmpne(
 // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff, align 8, !tbaa [[TBAA3]]
-// CHECK-NEXT:    [[TMP1:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff, align 8, !tbaa [[CHAR_TBAA3]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[CHAR_TBAA3]]
 // CHECK-NEXT:    [[CMP:%.*]] = fcmp une <4 x float> [[TMP0]], [[TMP1]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
-// CHECK-NEXT:    store volatile <4 x i32> [[SEXT]], ptr @bi, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    store volatile <4 x i32> [[SEXT]], ptr @bi, align 8, !tbaa [[CHAR_TBAA3]]
 // CHECK-NEXT:    ret void
 //
 void test_cmpne (void)
@@ -239,11 +239,11 @@ void test_cmpne (void)
 // CHECK-LABEL: define dso_local void @test_cmpge(
 // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff, align 8, !tbaa [[TBAA3]]
-// CHECK-NEXT:    [[TMP1:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff, align 8, !tbaa [[CHAR_TBAA3]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[CHAR_TBAA3]]
 // CHECK-NEXT:    [[CMP:%.*]] = fcmp oge <4 x float> [[TMP0]], [[TMP1]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
-// CHECK-NEXT:    store volatile <4 x i32> [[SEXT]], ptr @bi, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    store volatile <4 x i32> [[SEXT]], ptr @bi, align 8, !tbaa [[CHAR_TBAA3]]
 // CHECK-NEXT:    ret void
 //
 void test_cmpge (void)
@@ -254,11 +254,11 @@ void test_cmpge (void)
 // CHECK-LABEL: define dso_local void @test_cmpgt(
 // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff, align 8, !tbaa [[TBAA3]]
-// CHECK-NEXT:    [[TMP1:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff, align 8, !tbaa [[CHAR_TBAA3]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[CHAR_TBAA3]]
 // CHECK-NEXT:    [[CMP:%.*]] = fcmp ogt <4 x float> [[TMP0]], [[TMP1]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
-// CHECK-NEXT:    store volatile <4 x i32> [[SEXT]], ptr @bi, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    store volatile <4 x i32> [[SEXT]], ptr @bi, align 8, !tbaa [[CHAR_TBAA3]]
 // CHECK-NEXT:    ret void
 //
 void test_cmpgt (void)
@@ -269,11 +269,11 @@ void test_cmpgt (void)
 // CHECK-LABEL: define dso_local void @test_cmple(
 // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff, align 8, !tbaa [[TBAA3]]
-// CHECK-NEXT:    [[TMP1:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff, align 8, !tbaa [[CHAR_TBAA3]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[CHAR_TBAA3]]
 // CHECK-NEXT:    [[CMP:%.*]] = fcmp ole <4 x float> [[TMP0]], [[TMP1]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
-// CHECK-NEXT:    store volatile <4 x i32> [[SEXT]], ptr @bi, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    store volatile <4 x i32> [[SEXT]], ptr @bi, align 8, !tbaa [[CHAR_TBAA3]]
 // CHECK-NEXT:    ret void
 //
 void test_cmple (void)
@@ -284,11 +284,11 @@ void test_cmple (void)
 // CHECK-LABEL: define dso_local void @test_cmplt(
 // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff, align 8, !tbaa [[TBAA3]]
-// CHECK-NEXT:    [[TMP1:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff, align 8, !tbaa [[CHAR_TBAA3]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[CHAR_TBAA3]]
 // CHECK-NEXT:    [[CMP:%.*]] = fcmp olt <4 x float> [[TMP0]], [[TMP1]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
-// CHECK-NEXT:    store volatile <4 x i32> [[SEXT]], ptr @bi, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    store volatile <4 x i32> [[SEXT]], ptr @bi, align 8, !tbaa [[CHAR_TBAA3]]
 // CHECK-NEXT:    ret void
 //
 void test_cmplt (void)
@@ -297,7 +297,7 @@ void test_cmplt (void)
 }
 
 //.
-// CHECK: [[TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0}
+// CHECK: [[CHAR_TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0}
 // CHECK: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0}
 // CHECK: [[META5]] = !{!"Simple C/C++ TBAA"}
 //.
diff --git a/clang/test/CodeGen/X86/f16c-builtins.c b/clang/test/CodeGen/X86/f16c-builtins.c
index 6a69627..c08ef76 100644..100755
--- a/clang/test/CodeGen/X86/f16c-builtins.c
+++ b/clang/test/CodeGen/X86/f16c-builtins.c
@@ -10,6 +10,7 @@
 
 
 #include <immintrin.h>
+#include "builtin_test_helpers.h"
 
 float test_cvtsh_ss(unsigned short a) {
   // CHECK-LABEL: test_cvtsh_ss
@@ -18,6 +19,10 @@ float test_cvtsh_ss(unsigned short a) {
   return _cvtsh_ss(a);
 }
 
+TEST_CONSTEXPR(_cvtsh_ss(0x0000) == 0.0f);
+TEST_CONSTEXPR(_cvtsh_ss(0x4500) == 5.0f);
+TEST_CONSTEXPR(_cvtsh_ss(0xC000) == -2.0f);
+
 unsigned short test_cvtss_sh(float a) {
   // CHECK-LABEL: test_cvtss_sh
   // CHECK: insertelement <4 x float> poison, float %{{.*}}, i32 0
@@ -29,6 +34,11 @@ unsigned short test_cvtss_sh(float a) {
   return _cvtss_sh(a, 0);
 }
 
+TEST_CONSTEXPR(match_m128(
+    _mm_cvtph_ps(_mm_setr_epi16(0x3C00, 0x4000, 0x4200, 0x4400, 0, 0, 0, 0)), 
+    1.0f, 2.0f, 3.0f, 4.0f
+));
+
 __m128 test_mm_cvtph_ps(__m128i a) {
   // CHECK-LABEL: test_mm_cvtph_ps
   // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -41,6 +51,10 @@ __m256 test_mm256_cvtph_ps(__m128i a) {
   // CHECK: fpext <8 x half> %{{.*}} to <8 x float>
   return _mm256_cvtph_ps(a);
 }
+TEST_CONSTEXPR(match_m256(
+    _mm256_cvtph_ps(_mm_setr_epi16(0x3C00, 0x4000, 0x4200, 0x4400, 0x4500, 0x3800, 0xC000, 0x0000)), 
+    1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 0.5f, -2.0f, 0.0f
+));
 
 __m128i test_mm_cvtps_ph(__m128 a) {
   // CHECK-LABEL: test_mm_cvtps_ph
diff --git a/clang/test/CodeGen/allow-ubsan-check.c b/clang/test/CodeGen/allow-ubsan-check.c
index 6de7676..8d30e29 100644
--- a/clang/test/CodeGen/allow-ubsan-check.c
+++ b/clang/test/CodeGen/allow-ubsan-check.c
@@ -1,4 +1,4 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
 //
 // We can't use -fsanitize-skip-hot-cutoff because that includes both -ubsan-guard-checks and
 //-lower-allow-check-percentile-cutoff.
@@ -98,7 +98,7 @@ int div(int x, int y) {
 // CHECK-NEXT:    tail call void @__ubsan_handle_type_mismatch_v1_abort(ptr nonnull @[[GLOB2:[0-9]+]], i64 0) #[[ATTR6]], !nosanitize [[META2]]
 // CHECK-NEXT:    unreachable, !nosanitize [[META2]]
 // CHECK:       [[CONT]]:
-// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA5:![0-9]+]]
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[INT_TBAA5:![0-9]+]]
 // CHECK-NEXT:    ret i32 [[TMP2]]
 //
 // TR-LABEL: define dso_local i32 @null(
@@ -112,7 +112,7 @@ int div(int x, int y) {
 // TR-NEXT:    tail call void @llvm.ubsantrap(i8 22) #[[ATTR7]], !nosanitize [[META2]]
 // TR-NEXT:    unreachable, !nosanitize [[META2]]
 // TR:       [[CONT]]:
-// TR-NEXT:    [[TMP2:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA5:![0-9]+]]
+// TR-NEXT:    [[TMP2:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[INT_TBAA5:![0-9]+]]
 // TR-NEXT:    ret i32 [[TMP2]]
 //
 // REC-LABEL: define dso_local i32 @null(
@@ -126,7 +126,7 @@ int div(int x, int y) {
 // REC-NEXT:    tail call void @__ubsan_handle_type_mismatch_v1(ptr nonnull @[[GLOB2:[0-9]+]], i64 0) #[[ATTR6]], !nosanitize [[META2]]
 // REC-NEXT:    br label %[[CONT]], !nosanitize [[META2]]
 // REC:       [[CONT]]:
-// REC-NEXT:    [[TMP2:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA5:![0-9]+]]
+// REC-NEXT:    [[TMP2:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[INT_TBAA5:![0-9]+]]
 // REC-NEXT:    ret i32 [[TMP2]]
 //
 int null(int* x) {
@@ -205,7 +205,7 @@ void use(double*);
 // CHECK-NEXT:    br i1 [[TMP3]], label %[[TRAP:.*]], label %[[BB4:.*]]
 // CHECK:       [[BB4]]:
 // CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[VLA]], i64 [[IDXPROM]]
-// CHECK-NEXT:    [[TMP5:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA9:![0-9]+]]
+// CHECK-NEXT:    [[TMP5:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !tbaa [[DOUBLE_TBAA9:![0-9]+]]
 // CHECK-NEXT:    ret double [[TMP5]]
 // CHECK:       [[TRAP]]:
 // CHECK-NEXT:    call void @__ubsan_handle_local_out_of_bounds_abort() #[[ATTR6]], !nosanitize [[META2]]
@@ -224,7 +224,7 @@ void use(double*);
 // TR-NEXT:    br i1 [[TMP3]], label %[[TRAP:.*]], label %[[BB4:.*]]
 // TR:       [[BB4]]:
 // TR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[VLA]], i64 [[IDXPROM]]
-// TR-NEXT:    [[TMP5:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA9:![0-9]+]]
+// TR-NEXT:    [[TMP5:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !tbaa [[DOUBLE_TBAA9:![0-9]+]]
 // TR-NEXT:    ret double [[TMP5]]
 // TR:       [[TRAP]]:
 // TR-NEXT:    call void @llvm.ubsantrap(i8 71) #[[ATTR7]], !nosanitize [[META2]]
@@ -243,7 +243,7 @@ void use(double*);
 // REC-NEXT:    br i1 [[TMP3]], label %[[TRAP:.*]], label %[[BB4:.*]]
 // REC:       [[BB4]]:
 // REC-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[VLA]], i64 [[IDXPROM]]
-// REC-NEXT:    [[TMP5:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA9:![0-9]+]]
+// REC-NEXT:    [[TMP5:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !tbaa [[DOUBLE_TBAA9:![0-9]+]]
 // REC-NEXT:    ret double [[TMP5]]
 // REC:       [[TRAP]]:
 // REC-NEXT:    call void @__ubsan_handle_local_out_of_bounds() #[[ATTR6]], !nosanitize [[META2]]
@@ -259,30 +259,30 @@ double lbounds(int b, int i) {
 // CHECK: [[META2]] = !{}
 // CHECK: [[PROF3]] = !{!"branch_weights", i32 1048575, i32 1}
 // CHECK: [[PROF4]] = !{!"branch_weights", i32 1, i32 1048575}
-// CHECK: [[TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0}
+// CHECK: [[INT_TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0}
 // CHECK: [[META6]] = !{!"int", [[META7:![0-9]+]], i64 0}
 // CHECK: [[META7]] = !{!"omnipotent char", [[META8:![0-9]+]], i64 0}
 // CHECK: [[META8]] = !{!"Simple C/C++ TBAA"}
-// CHECK: [[TBAA9]] = !{[[META10:![0-9]+]], [[META10]], i64 0}
+// CHECK: [[DOUBLE_TBAA9]] = !{[[META10:![0-9]+]], [[META10]], i64 0}
 // CHECK: [[META10]] = !{!"double", [[META7]], i64 0}
 //.
 // TR: [[META2]] = !{}
 // TR: [[PROF3]] = !{!"branch_weights", i32 1048575, i32 1}
 // TR: [[PROF4]] = !{!"branch_weights", i32 1, i32 1048575}
-// TR: [[TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0}
+// TR: [[INT_TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0}
 // TR: [[META6]] = !{!"int", [[META7:![0-9]+]], i64 0}
 // TR: [[META7]] = !{!"omnipotent char", [[META8:![0-9]+]], i64 0}
 // TR: [[META8]] = !{!"Simple C/C++ TBAA"}
-// TR: [[TBAA9]] = !{[[META10:![0-9]+]], [[META10]], i64 0}
+// TR: [[DOUBLE_TBAA9]] = !{[[META10:![0-9]+]], [[META10]], i64 0}
 // TR: [[META10]] = !{!"double", [[META7]], i64 0}
 //.
 // REC: [[META2]] = !{}
 // REC: [[PROF3]] = !{!"branch_weights", i32 1048575, i32 1}
 // REC: [[PROF4]] = !{!"branch_weights", i32 1, i32 1048575}
-// REC: [[TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0}
+// REC: [[INT_TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0}
 // REC: [[META6]] = !{!"int", [[META7:![0-9]+]], i64 0}
 // REC: [[META7]] = !{!"omnipotent char", [[META8:![0-9]+]], i64 0}
 // REC: [[META8]] = !{!"Simple C/C++ TBAA"}
-// REC: [[TBAA9]] = !{[[META10:![0-9]+]], [[META10]], i64 0}
+// REC: [[DOUBLE_TBAA9]] = !{[[META10:![0-9]+]], [[META10]], i64 0}
 // REC: [[META10]] = !{!"double", [[META7]], i64 0}
 //.
diff --git a/clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c b/clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c
index c5a4101..847ce67f 100644
--- a/clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c
+++ b/clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c
@@ -1,4 +1,4 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -mvscale-min=1 -mvscale-max=1 -O1 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-128
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -mvscale-min=2 -mvscale-max=2 -O1 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-256
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -mvscale-min=4 -mvscale-max=4 -O1 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-512
@@ -28,24 +28,27 @@ DEFINE_STRUCT(bool)
 // int64
 //===----------------------------------------------------------------------===//
 
-// CHECK-128-LABEL: @read_int64(
-// CHECK-128-NEXT:  entry:
-// CHECK-128-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 16
-// CHECK-128-NEXT:    [[TMP0:%.*]] = load <2 x i64>, ptr [[Y]], align 16, !tbaa [[TBAA2:![0-9]+]]
+// CHECK-128-LABEL: define dso_local <vscale x 2 x i64> @read_int64(
+// CHECK-128-SAME: ptr noundef readonly captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-128-NEXT:  [[ENTRY:.*:]]
+// CHECK-128-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 16
+// CHECK-128-NEXT:    [[TMP0:%.*]] = load <2 x i64>, ptr [[Y]], align 16, !tbaa [[CHAR_TBAA2:![0-9]+]]
 // CHECK-128-NEXT:    [[CAST_SCALABLE:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> poison, <2 x i64> [[TMP0]], i64 0)
 // CHECK-128-NEXT:    ret <vscale x 2 x i64> [[CAST_SCALABLE]]
 //
-// CHECK-256-LABEL: @read_int64(
-// CHECK-256-NEXT:  entry:
-// CHECK-256-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 32
-// CHECK-256-NEXT:    [[TMP0:%.*]] = load <4 x i64>, ptr [[Y]], align 16, !tbaa [[TBAA2:![0-9]+]]
+// CHECK-256-LABEL: define dso_local <vscale x 2 x i64> @read_int64(
+// CHECK-256-SAME: ptr noundef readonly captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-256-NEXT:  [[ENTRY:.*:]]
+// CHECK-256-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 32
+// CHECK-256-NEXT:    [[TMP0:%.*]] = load <4 x i64>, ptr [[Y]], align 16, !tbaa [[CHAR_TBAA2:![0-9]+]]
 // CHECK-256-NEXT:    [[CAST_SCALABLE:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v4i64(<vscale x 2 x i64> poison, <4 x i64> [[TMP0]], i64 0)
 // CHECK-256-NEXT:    ret <vscale x 2 x i64> [[CAST_SCALABLE]]
 //
-// CHECK-512-LABEL: @read_int64(
-// CHECK-512-NEXT:  entry:
-// CHECK-512-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 64
-// CHECK-512-NEXT:    [[TMP0:%.*]] = load <8 x i64>, ptr [[Y]], align 16, !tbaa [[TBAA2:![0-9]+]]
+// CHECK-512-LABEL: define dso_local <vscale x 2 x i64> @read_int64(
+// CHECK-512-SAME: ptr noundef readonly captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-512-NEXT:  [[ENTRY:.*:]]
+// CHECK-512-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 64
+// CHECK-512-NEXT:    [[TMP0:%.*]] = load <8 x i64>, ptr [[Y]], align 16, !tbaa [[CHAR_TBAA2:![0-9]+]]
 // CHECK-512-NEXT:    [[CAST_SCALABLE:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> poison, <8 x i64> [[TMP0]], i64 0)
 // CHECK-512-NEXT:    ret <vscale x 2 x i64> [[CAST_SCALABLE]]
 //
@@ -53,25 +56,28 @@ svint64_t read_int64(struct struct_int64 *s) {
   return s->y[0];
 }
 
-// CHECK-128-LABEL: @write_int64(
-// CHECK-128-NEXT:  entry:
-// CHECK-128-NEXT:    [[CAST_FIXED:%.*]] = tail call <2 x i64> @llvm.vector.extract.v2i64.nxv2i64(<vscale x 2 x i64> [[X:%.*]], i64 0)
-// CHECK-128-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 16
-// CHECK-128-NEXT:    store <2 x i64> [[CAST_FIXED]], ptr [[Y]], align 16, !tbaa [[TBAA2]]
+// CHECK-128-LABEL: define dso_local void @write_int64(
+// CHECK-128-SAME: ptr noundef writeonly captures(none) initializes((16, 32)) [[S:%.*]], <vscale x 2 x i64> [[X:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] {
+// CHECK-128-NEXT:  [[ENTRY:.*:]]
+// CHECK-128-NEXT:    [[CAST_FIXED:%.*]] = tail call <2 x i64> @llvm.vector.extract.v2i64.nxv2i64(<vscale x 2 x i64> [[X]], i64 0)
+// CHECK-128-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 16
+// CHECK-128-NEXT:    store <2 x i64> [[CAST_FIXED]], ptr [[Y]], align 16, !tbaa [[CHAR_TBAA2]]
 // CHECK-128-NEXT:    ret void
 //
-// CHECK-256-LABEL: @write_int64(
-// CHECK-256-NEXT:  entry:
-// CHECK-256-NEXT:    [[CAST_FIXED:%.*]] = tail call <4 x i64> @llvm.vector.extract.v4i64.nxv2i64(<vscale x 2 x i64> [[X:%.*]], i64 0)
-// CHECK-256-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 32
-// CHECK-256-NEXT:    store <4 x i64> [[CAST_FIXED]], ptr [[Y]], align 16, !tbaa [[TBAA2]]
+// CHECK-256-LABEL: define dso_local void @write_int64(
+// CHECK-256-SAME: ptr noundef writeonly captures(none) initializes((32, 64)) [[S:%.*]], <vscale x 2 x i64> [[X:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] {
+// CHECK-256-NEXT:  [[ENTRY:.*:]]
+// CHECK-256-NEXT:    [[CAST_FIXED:%.*]] = tail call <4 x i64> @llvm.vector.extract.v4i64.nxv2i64(<vscale x 2 x i64> [[X]], i64 0)
+// CHECK-256-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 32
+// CHECK-256-NEXT:    store <4 x i64> [[CAST_FIXED]], ptr [[Y]], align 16, !tbaa [[CHAR_TBAA2]]
 // CHECK-256-NEXT:    ret void
 //
-// CHECK-512-LABEL: @write_int64(
-// CHECK-512-NEXT:  entry:
-// CHECK-512-NEXT:    [[CAST_FIXED:%.*]] = tail call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[X:%.*]], i64 0)
-// CHECK-512-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 64
-// CHECK-512-NEXT:    store <8 x i64> [[CAST_FIXED]], ptr [[Y]], align 16, !tbaa [[TBAA2]]
+// CHECK-512-LABEL: define dso_local void @write_int64(
+// CHECK-512-SAME: ptr noundef writeonly captures(none) initializes((64, 128)) [[S:%.*]], <vscale x 2 x i64> [[X:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] {
+// CHECK-512-NEXT:  [[ENTRY:.*:]]
+// CHECK-512-NEXT:    [[CAST_FIXED:%.*]] = tail call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[X]], i64 0)
+// CHECK-512-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 64
+// CHECK-512-NEXT:    store <8 x i64> [[CAST_FIXED]], ptr [[Y]], align 16, !tbaa [[CHAR_TBAA2]]
 // CHECK-512-NEXT:    ret void
 //
 void write_int64(struct struct_int64 *s, svint64_t x) {
@@ -82,24 +88,27 @@ void write_int64(struct struct_int64 *s, svint64_t x) {
 // float64
 //===----------------------------------------------------------------------===//
 
-// CHECK-128-LABEL: @read_float64(
-// CHECK-128-NEXT:  entry:
-// CHECK-128-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 16
-// CHECK-128-NEXT:    [[TMP0:%.*]] = load <2 x double>, ptr [[Y]], align 16, !tbaa [[TBAA2]]
+// CHECK-128-LABEL: define dso_local <vscale x 2 x double> @read_float64(
+// CHECK-128-SAME: ptr noundef readonly captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-128-NEXT:  [[ENTRY:.*:]]
+// CHECK-128-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 16
+// CHECK-128-NEXT:    [[TMP0:%.*]] = load <2 x double>, ptr [[Y]], align 16, !tbaa [[CHAR_TBAA2]]
 // CHECK-128-NEXT:    [[CAST_SCALABLE:%.*]] = tail call <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v2f64(<vscale x 2 x double> poison, <2 x double> [[TMP0]], i64 0)
 // CHECK-128-NEXT:    ret <vscale x 2 x double> [[CAST_SCALABLE]]
 //
-// CHECK-256-LABEL: @read_float64(
-// CHECK-256-NEXT:  entry:
-// CHECK-256-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 32
-// CHECK-256-NEXT:    [[TMP0:%.*]] = load <4 x double>, ptr [[Y]], align 16, !tbaa [[TBAA2]]
+// CHECK-256-LABEL: define dso_local <vscale x 2 x double> @read_float64(
+// CHECK-256-SAME: ptr noundef readonly captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-256-NEXT:  [[ENTRY:.*:]]
+// CHECK-256-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 32
+// CHECK-256-NEXT:    [[TMP0:%.*]] = load <4 x double>, ptr [[Y]], align 16, !tbaa [[CHAR_TBAA2]]
 // CHECK-256-NEXT:    [[CAST_SCALABLE:%.*]] = tail call <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v4f64(<vscale x 2 x double> poison, <4 x double> [[TMP0]], i64 0)
 // CHECK-256-NEXT:    ret <vscale x 2 x double> [[CAST_SCALABLE]]
 //
-// CHECK-512-LABEL: @read_float64(
-// CHECK-512-NEXT:  entry:
-// CHECK-512-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 64
-// CHECK-512-NEXT:    [[TMP0:%.*]] = load <8 x double>, ptr [[Y]], align 16, !tbaa [[TBAA2]]
+// CHECK-512-LABEL: define dso_local <vscale x 2 x double> @read_float64(
+// CHECK-512-SAME: ptr noundef readonly captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-512-NEXT:  [[ENTRY:.*:]]
+// CHECK-512-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 64
+// CHECK-512-NEXT:    [[TMP0:%.*]] = load <8 x double>, ptr [[Y]], align 16, !tbaa [[CHAR_TBAA2]]
 // CHECK-512-NEXT:    [[CAST_SCALABLE:%.*]] = tail call <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v8f64(<vscale x 2 x double> poison, <8 x double> [[TMP0]], i64 0)
 // CHECK-512-NEXT:    ret <vscale x 2 x double> [[CAST_SCALABLE]]
 //
@@ -107,25 +116,28 @@ svfloat64_t read_float64(struct struct_float64 *s) {
   return s->y[0];
 }
 
-// CHECK-128-LABEL: @write_float64(
-// CHECK-128-NEXT:  entry:
-// CHECK-128-NEXT:    [[CAST_FIXED:%.*]] = tail call <2 x double> @llvm.vector.extract.v2f64.nxv2f64(<vscale x 2 x double> [[X:%.*]], i64 0)
-// CHECK-128-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 16
-// CHECK-128-NEXT:    store <2 x double> [[CAST_FIXED]], ptr [[Y]], align 16, !tbaa [[TBAA2]]
+// CHECK-128-LABEL: define dso_local void @write_float64(
+// CHECK-128-SAME: ptr noundef writeonly captures(none) initializes((16, 32)) [[S:%.*]], <vscale x 2 x double> [[X:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-128-NEXT:  [[ENTRY:.*:]]
+// CHECK-128-NEXT:    [[CAST_FIXED:%.*]] = tail call <2 x double> @llvm.vector.extract.v2f64.nxv2f64(<vscale x 2 x double> [[X]], i64 0)
+// CHECK-128-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 16
+// CHECK-128-NEXT:    store <2 x double> [[CAST_FIXED]], ptr [[Y]], align 16, !tbaa [[CHAR_TBAA2]]
 // CHECK-128-NEXT:    ret void
 //
-// CHECK-256-LABEL: @write_float64(
-// CHECK-256-NEXT:  entry:
-// CHECK-256-NEXT:    [[CAST_FIXED:%.*]] = tail call <4 x double> @llvm.vector.extract.v4f64.nxv2f64(<vscale x 2 x double> [[X:%.*]], i64 0)
-// CHECK-256-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 32
-// CHECK-256-NEXT:    store <4 x double> [[CAST_FIXED]], ptr [[Y]], align 16, !tbaa [[TBAA2]]
+// CHECK-256-LABEL: define dso_local void @write_float64(
+// CHECK-256-SAME: ptr noundef writeonly captures(none) initializes((32, 64)) [[S:%.*]], <vscale x 2 x double> [[X:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-256-NEXT:  [[ENTRY:.*:]]
+// CHECK-256-NEXT:    [[CAST_FIXED:%.*]] = tail call <4 x double> @llvm.vector.extract.v4f64.nxv2f64(<vscale x 2 x double> [[X]], i64 0)
+// CHECK-256-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 32
+// CHECK-256-NEXT:    store <4 x double> [[CAST_FIXED]], ptr [[Y]], align 16, !tbaa [[CHAR_TBAA2]]
 // CHECK-256-NEXT:    ret void
 //
-// CHECK-512-LABEL: @write_float64(
-// CHECK-512-NEXT:  entry:
-// CHECK-512-NEXT:    [[CAST_FIXED:%.*]] = tail call <8 x double> @llvm.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[X:%.*]], i64 0)
-// CHECK-512-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 64
-// CHECK-512-NEXT:    store <8 x double> [[CAST_FIXED]], ptr [[Y]], align 16, !tbaa [[TBAA2]]
+// CHECK-512-LABEL: define dso_local void @write_float64(
+// CHECK-512-SAME: ptr noundef writeonly captures(none) initializes((64, 128)) [[S:%.*]], <vscale x 2 x double> [[X:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-512-NEXT:  [[ENTRY:.*:]]
+// CHECK-512-NEXT:    [[CAST_FIXED:%.*]] = tail call <8 x double> @llvm.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[X]], i64 0)
+// CHECK-512-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 64
+// CHECK-512-NEXT:    store <8 x double> [[CAST_FIXED]], ptr [[Y]], align 16, !tbaa [[CHAR_TBAA2]]
 // CHECK-512-NEXT:    ret void
 //
 void write_float64(struct struct_float64 *s, svfloat64_t x) {
@@ -136,24 +148,27 @@ void write_float64(struct struct_float64 *s, svfloat64_t x) {
 // bfloat16
 //===----------------------------------------------------------------------===//
 
-// CHECK-128-LABEL: @read_bfloat16(
-// CHECK-128-NEXT:  entry:
-// CHECK-128-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 16
-// CHECK-128-NEXT:    [[TMP0:%.*]] = load <8 x bfloat>, ptr [[Y]], align 16, !tbaa [[TBAA2]]
+// CHECK-128-LABEL: define dso_local <vscale x 8 x bfloat> @read_bfloat16(
+// CHECK-128-SAME: ptr noundef readonly captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-128-NEXT:  [[ENTRY:.*:]]
+// CHECK-128-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 16
+// CHECK-128-NEXT:    [[TMP0:%.*]] = load <8 x bfloat>, ptr [[Y]], align 16, !tbaa [[CHAR_TBAA2]]
 // CHECK-128-NEXT:    [[CAST_SCALABLE:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.insert.nxv8bf16.v8bf16(<vscale x 8 x bfloat> poison, <8 x bfloat> [[TMP0]], i64 0)
 // CHECK-128-NEXT:    ret <vscale x 8 x bfloat> [[CAST_SCALABLE]]
 //
-// CHECK-256-LABEL: @read_bfloat16(
-// CHECK-256-NEXT:  entry:
-// CHECK-256-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 32
-// CHECK-256-NEXT:    [[TMP0:%.*]] = load <16 x bfloat>, ptr [[Y]], align 16, !tbaa [[TBAA2]]
+// CHECK-256-LABEL: define dso_local <vscale x 8 x bfloat> @read_bfloat16(
+// CHECK-256-SAME: ptr noundef readonly captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-256-NEXT:  [[ENTRY:.*:]]
+// CHECK-256-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 32
+// CHECK-256-NEXT:    [[TMP0:%.*]] = load <16 x bfloat>, ptr [[Y]], align 16, !tbaa [[CHAR_TBAA2]]
 // CHECK-256-NEXT:    [[CAST_SCALABLE:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.insert.nxv8bf16.v16bf16(<vscale x 8 x bfloat> poison, <16 x bfloat> [[TMP0]], i64 0)
 // CHECK-256-NEXT:    ret <vscale x 8 x bfloat> [[CAST_SCALABLE]]
 //
-// CHECK-512-LABEL: @read_bfloat16(
-// CHECK-512-NEXT:  entry:
-// CHECK-512-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 64
-// CHECK-512-NEXT:    [[TMP0:%.*]] = load <32 x bfloat>, ptr [[Y]], align 16, !tbaa [[TBAA2]]
+// CHECK-512-LABEL: define dso_local <vscale x 8 x bfloat> @read_bfloat16(
+// CHECK-512-SAME: ptr noundef readonly captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-512-NEXT:  [[ENTRY:.*:]]
+// CHECK-512-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 64
+// CHECK-512-NEXT:    [[TMP0:%.*]] = load <32 x bfloat>, ptr [[Y]], align 16, !tbaa [[CHAR_TBAA2]]
 // CHECK-512-NEXT:    [[CAST_SCALABLE:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.insert.nxv8bf16.v32bf16(<vscale x 8 x bfloat> poison, <32 x bfloat> [[TMP0]], i64 0)
 // CHECK-512-NEXT:    ret <vscale x 8 x bfloat> [[CAST_SCALABLE]]
 //
@@ -161,25 +176,28 @@ svbfloat16_t read_bfloat16(struct struct_bfloat16 *s) {
   return s->y[0];
 }
 
-// CHECK-128-LABEL: @write_bfloat16(
-// CHECK-128-NEXT:  entry:
-// CHECK-128-NEXT:    [[CAST_FIXED:%.*]] = tail call <8 x bfloat> @llvm.vector.extract.v8bf16.nxv8bf16(<vscale x 8 x bfloat> [[X:%.*]], i64 0)
-// CHECK-128-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 16
-// CHECK-128-NEXT:    store <8 x bfloat> [[CAST_FIXED]], ptr [[Y]], align 16, !tbaa [[TBAA2]]
+// CHECK-128-LABEL: define dso_local void @write_bfloat16(
+// CHECK-128-SAME: ptr noundef writeonly captures(none) initializes((16, 32)) [[S:%.*]], <vscale x 8 x bfloat> [[X:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-128-NEXT:  [[ENTRY:.*:]]
+// CHECK-128-NEXT:    [[CAST_FIXED:%.*]] = tail call <8 x bfloat> @llvm.vector.extract.v8bf16.nxv8bf16(<vscale x 8 x bfloat> [[X]], i64 0)
+// CHECK-128-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 16
+// CHECK-128-NEXT:    store <8 x bfloat> [[CAST_FIXED]], ptr [[Y]], align 16, !tbaa [[CHAR_TBAA2]]
 // CHECK-128-NEXT:    ret void
 //
-// CHECK-256-LABEL: @write_bfloat16(
-// CHECK-256-NEXT:  entry:
-// CHECK-256-NEXT:    [[CAST_FIXED:%.*]] = tail call <16 x bfloat> @llvm.vector.extract.v16bf16.nxv8bf16(<vscale x 8 x bfloat> [[X:%.*]], i64 0)
-// CHECK-256-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 32
-// CHECK-256-NEXT:    store <16 x bfloat> [[CAST_FIXED]], ptr [[Y]], align 16, !tbaa [[TBAA2]]
+// CHECK-256-LABEL: define dso_local void @write_bfloat16(
+// CHECK-256-SAME: ptr noundef writeonly captures(none) initializes((32, 64)) [[S:%.*]], <vscale x 8 x bfloat> [[X:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-256-NEXT:  [[ENTRY:.*:]]
+// CHECK-256-NEXT:    [[CAST_FIXED:%.*]] = tail call <16 x bfloat> @llvm.vector.extract.v16bf16.nxv8bf16(<vscale x 8 x bfloat> [[X]], i64 0)
+// CHECK-256-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 32
+// CHECK-256-NEXT:    store <16 x bfloat> [[CAST_FIXED]], ptr [[Y]], align 16, !tbaa [[CHAR_TBAA2]]
 // CHECK-256-NEXT:    ret void
 //
-// CHECK-512-LABEL: @write_bfloat16(
-// CHECK-512-NEXT:  entry:
-// CHECK-512-NEXT:    [[CAST_FIXED:%.*]] = tail call <32 x bfloat> @llvm.vector.extract.v32bf16.nxv8bf16(<vscale x 8 x bfloat> [[X:%.*]], i64 0)
-// CHECK-512-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 64
-// CHECK-512-NEXT:    store <32 x bfloat> [[CAST_FIXED]], ptr [[Y]], align 16, !tbaa [[TBAA2]]
+// CHECK-512-LABEL: define dso_local void @write_bfloat16(
+// CHECK-512-SAME: ptr noundef writeonly captures(none) initializes((64, 128)) [[S:%.*]], <vscale x 8 x bfloat> [[X:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-512-NEXT:  [[ENTRY:.*:]]
+// CHECK-512-NEXT:    [[CAST_FIXED:%.*]] = tail call <32 x bfloat> @llvm.vector.extract.v32bf16.nxv8bf16(<vscale x 8 x bfloat> [[X]], i64 0)
+// CHECK-512-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 64
+// CHECK-512-NEXT:    store <32 x bfloat> [[CAST_FIXED]], ptr [[Y]], align 16, !tbaa [[CHAR_TBAA2]]
 // CHECK-512-NEXT:    ret void
 //
 void write_bfloat16(struct struct_bfloat16 *s, svbfloat16_t x) {
@@ -190,26 +208,29 @@ void write_bfloat16(struct struct_bfloat16 *s, svbfloat16_t x) {
 // bool
 //===----------------------------------------------------------------------===//
 
-// CHECK-128-LABEL: @read_bool(
-// CHECK-128-NEXT:  entry:
-// CHECK-128-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 2
-// CHECK-128-NEXT:    [[TMP0:%.*]] = load <2 x i8>, ptr [[Y]], align 2, !tbaa [[TBAA2]]
+// CHECK-128-LABEL: define dso_local <vscale x 16 x i1> @read_bool(
+// CHECK-128-SAME: ptr noundef readonly captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-128-NEXT:  [[ENTRY:.*:]]
+// CHECK-128-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 2
+// CHECK-128-NEXT:    [[TMP0:%.*]] = load <2 x i8>, ptr [[Y]], align 2, !tbaa [[CHAR_TBAA2]]
 // CHECK-128-NEXT:    [[CAST_SCALABLE:%.*]] = tail call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v2i8(<vscale x 2 x i8> poison, <2 x i8> [[TMP0]], i64 0)
 // CHECK-128-NEXT:    [[TMP1:%.*]] = bitcast <vscale x 2 x i8> [[CAST_SCALABLE]] to <vscale x 16 x i1>
 // CHECK-128-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
-// CHECK-256-LABEL: @read_bool(
-// CHECK-256-NEXT:  entry:
-// CHECK-256-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 4
-// CHECK-256-NEXT:    [[TMP0:%.*]] = load <4 x i8>, ptr [[Y]], align 2, !tbaa [[TBAA2]]
+// CHECK-256-LABEL: define dso_local <vscale x 16 x i1> @read_bool(
+// CHECK-256-SAME: ptr noundef readonly captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-256-NEXT:  [[ENTRY:.*:]]
+// CHECK-256-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 4
+// CHECK-256-NEXT:    [[TMP0:%.*]] = load <4 x i8>, ptr [[Y]], align 2, !tbaa [[CHAR_TBAA2]]
 // CHECK-256-NEXT:    [[CAST_SCALABLE:%.*]] = tail call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v4i8(<vscale x 2 x i8> poison, <4 x i8> [[TMP0]], i64 0)
 // CHECK-256-NEXT:    [[TMP1:%.*]] = bitcast <vscale x 2 x i8> [[CAST_SCALABLE]] to <vscale x 16 x i1>
 // CHECK-256-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
-// CHECK-512-LABEL: @read_bool(
-// CHECK-512-NEXT:  entry:
-// CHECK-512-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 8
-// CHECK-512-NEXT:    [[TMP0:%.*]] = load <8 x i8>, ptr [[Y]], align 2, !tbaa [[TBAA2]]
+// CHECK-512-LABEL: define dso_local <vscale x 16 x i1> @read_bool(
+// CHECK-512-SAME: ptr noundef readonly captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-512-NEXT:  [[ENTRY:.*:]]
+// CHECK-512-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 8
+// CHECK-512-NEXT:    [[TMP0:%.*]] = load <8 x i8>, ptr [[Y]], align 2, !tbaa [[CHAR_TBAA2]]
 // CHECK-512-NEXT:    [[CAST_SCALABLE:%.*]] = tail call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v8i8(<vscale x 2 x i8> poison, <8 x i8> [[TMP0]], i64 0)
 // CHECK-512-NEXT:    [[TMP1:%.*]] = bitcast <vscale x 2 x i8> [[CAST_SCALABLE]] to <vscale x 16 x i1>
 // CHECK-512-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
@@ -218,30 +239,46 @@ svbool_t read_bool(struct struct_bool *s) {
   return s->y[0];
 }
 
-// CHECK-128-LABEL: @write_bool(
-// CHECK-128-NEXT:  entry:
-// CHECK-128-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 16 x i1> [[X:%.*]] to <vscale x 2 x i8>
+// CHECK-128-LABEL: define dso_local void @write_bool(
+// CHECK-128-SAME: ptr noundef writeonly captures(none) initializes((2, 4)) [[S:%.*]], <vscale x 16 x i1> [[X:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-128-NEXT:  [[ENTRY:.*:]]
+// CHECK-128-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 16 x i1> [[X]] to <vscale x 2 x i8>
 // CHECK-128-NEXT:    [[CAST_FIXED:%.*]] = tail call <2 x i8> @llvm.vector.extract.v2i8.nxv2i8(<vscale x 2 x i8> [[TMP0]], i64 0)
-// CHECK-128-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 2
-// CHECK-128-NEXT:    store <2 x i8> [[CAST_FIXED]], ptr [[Y]], align 2, !tbaa [[TBAA2]]
+// CHECK-128-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 2
+// CHECK-128-NEXT:    store <2 x i8> [[CAST_FIXED]], ptr [[Y]], align 2, !tbaa [[CHAR_TBAA2]]
 // CHECK-128-NEXT:    ret void
 //
-// CHECK-256-LABEL: @write_bool(
-// CHECK-256-NEXT:  entry:
-// CHECK-256-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 16 x i1> [[X:%.*]] to <vscale x 2 x i8>
+// CHECK-256-LABEL: define dso_local void @write_bool(
+// CHECK-256-SAME: ptr noundef writeonly captures(none) initializes((4, 8)) [[S:%.*]], <vscale x 16 x i1> [[X:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-256-NEXT:  [[ENTRY:.*:]]
+// CHECK-256-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 16 x i1> [[X]] to <vscale x 2 x i8>
 // CHECK-256-NEXT:    [[CAST_FIXED:%.*]] = tail call <4 x i8> @llvm.vector.extract.v4i8.nxv2i8(<vscale x 2 x i8> [[TMP0]], i64 0)
-// CHECK-256-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 4
-// CHECK-256-NEXT:    store <4 x i8> [[CAST_FIXED]], ptr [[Y]], align 2, !tbaa [[TBAA2]]
+// CHECK-256-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 4
+// CHECK-256-NEXT:    store <4 x i8> [[CAST_FIXED]], ptr [[Y]], align 2, !tbaa [[CHAR_TBAA2]]
 // CHECK-256-NEXT:    ret void
 //
-// CHECK-512-LABEL: @write_bool(
-// CHECK-512-NEXT:  entry:
-// CHECK-512-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 16 x i1> [[X:%.*]] to <vscale x 2 x i8>
+// CHECK-512-LABEL: define dso_local void @write_bool(
+// CHECK-512-SAME: ptr noundef writeonly captures(none) initializes((8, 16)) [[S:%.*]], <vscale x 16 x i1> [[X:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-512-NEXT:  [[ENTRY:.*:]]
+// CHECK-512-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 16 x i1> [[X]] to <vscale x 2 x i8>
 // CHECK-512-NEXT:    [[CAST_FIXED:%.*]] = tail call <8 x i8> @llvm.vector.extract.v8i8.nxv2i8(<vscale x 2 x i8> [[TMP0]], i64 0)
-// CHECK-512-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 8
-// CHECK-512-NEXT:    store <8 x i8> [[CAST_FIXED]], ptr [[Y]], align 2, !tbaa [[TBAA2]]
+// CHECK-512-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 8
+// CHECK-512-NEXT:    store <8 x i8> [[CAST_FIXED]], ptr [[Y]], align 2, !tbaa [[CHAR_TBAA2]]
 // CHECK-512-NEXT:    ret void
 //
 void write_bool(struct struct_bool *s, svbool_t x) {
   s->y[0] = x;
 }
+//.
+// CHECK-128: [[CHAR_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0}
+// CHECK-128: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0}
+// CHECK-128: [[META4]] = !{!"Simple C/C++ TBAA"}
+//.
+// CHECK-256: [[CHAR_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0}
+// CHECK-256: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0}
+// CHECK-256: [[META4]] = !{!"Simple C/C++ TBAA"}
+//.
+// CHECK-512: [[CHAR_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0}
+// CHECK-512: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0}
+// CHECK-512: [[META4]] = !{!"Simple C/C++ TBAA"}
+//.
diff --git a/clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c b/clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c
index fcd4314..bdaebf7 100644
--- a/clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c
+++ b/clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c
@@ -1,4 +1,4 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -mvscale-min=4 -mvscale-max=4 -O1 -emit-llvm -o - %s | FileCheck %s
 
 // REQUIRES: aarch64-registered-target
@@ -12,66 +12,74 @@ typedef svfloat64_t fixed_float64_t __attribute__((arm_sve_vector_bits(N)));
 typedef svbool_t fixed_bool_t __attribute__((arm_sve_vector_bits(N)));
 typedef int32_t gnu_int32_t __attribute__((vector_size(N / 8)));
 
-// CHECK-LABEL: @to_svint32_t(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    ret <vscale x 4 x i32> [[TYPE_COERCE:%.*]]
+// CHECK-LABEL: define dso_local noundef <vscale x 4 x i32> @to_svint32_t(
+// CHECK-SAME: <vscale x 4 x i32> noundef returned [[TYPE_COERCE:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TYPE_COERCE]]
 //
 svint32_t to_svint32_t(fixed_int32_t type) {
   return type;
 }
 
-// CHECK-LABEL: @from_svint32_t(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    ret <vscale x 4 x i32> [[TYPE:%.*]]
+// CHECK-LABEL: define dso_local <vscale x 4 x i32> @from_svint32_t(
+// CHECK-SAME: <vscale x 4 x i32> returned [[TYPE:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TYPE]]
 //
 fixed_int32_t from_svint32_t(svint32_t type) {
   return type;
 }
 
-// CHECK-LABEL: @to_svfloat64_t(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    ret <vscale x 2 x double> [[TYPE_COERCE:%.*]]
+// CHECK-LABEL: define dso_local noundef <vscale x 2 x double> @to_svfloat64_t(
+// CHECK-SAME: <vscale x 2 x double> noundef returned [[TYPE_COERCE:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    ret <vscale x 2 x double> [[TYPE_COERCE]]
 //
 svfloat64_t to_svfloat64_t(fixed_float64_t type) {
   return type;
 }
 
-// CHECK-LABEL: @from_svfloat64_t(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    ret <vscale x 2 x double> [[TYPE:%.*]]
+// CHECK-LABEL: define dso_local <vscale x 2 x double> @from_svfloat64_t(
+// CHECK-SAME: <vscale x 2 x double> returned [[TYPE:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    ret <vscale x 2 x double> [[TYPE]]
 //
 fixed_float64_t from_svfloat64_t(svfloat64_t type) {
   return type;
 }
 
-// CHECK-LABEL: @to_svbool_t(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0:%.*]]
+// CHECK-LABEL: define dso_local noundef <vscale x 16 x i1> @to_svbool_t(
+// CHECK-SAME: <vscale x 16 x i1> noundef returned [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
 svbool_t to_svbool_t(fixed_bool_t type) {
   return type;
 }
 
-// CHECK-LABEL: @from_svbool_t(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    ret <vscale x 16 x i1> [[TYPE:%.*]]
+// CHECK-LABEL: define dso_local <vscale x 16 x i1> @from_svbool_t(
+// CHECK-SAME: <vscale x 16 x i1> returned [[TYPE:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    ret <vscale x 16 x i1> [[TYPE]]
 //
 fixed_bool_t from_svbool_t(svbool_t type) {
   return type;
 }
 
-// CHECK-LABEL: @lax_cast(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 4 x i32> [[TYPE_COERCE:%.*]] to <vscale x 2 x i64>
+// CHECK-LABEL: define dso_local noundef <vscale x 2 x i64> @lax_cast(
+// CHECK-SAME: <vscale x 4 x i32> noundef [[TYPE_COERCE:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 4 x i32> [[TYPE_COERCE]] to <vscale x 2 x i64>
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
 svint64_t lax_cast(fixed_int32_t type) {
   return type;
 }
 
-// CHECK-LABEL: @to_svint32_t__from_gnu_int32_t(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TYPE:%.*]] = load <16 x i32>, ptr [[TMP0:%.*]], align 16, !tbaa [[TBAA2:![0-9]+]]
+// CHECK-LABEL: define dso_local <vscale x 4 x i32> @to_svint32_t__from_gnu_int32_t(
+// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TYPE:%.*]] = load <16 x i32>, ptr [[TMP0]], align 16, !tbaa [[CHAR_TBAA2:![0-9]+]]
 // CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[TYPE]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CAST_SCALABLE]]
 //
@@ -79,19 +87,21 @@ svint32_t to_svint32_t__from_gnu_int32_t(gnu_int32_t type) {
   return type;
 }
 
-// CHECK-LABEL: @from_svint32_t__to_gnu_int32_t(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[CAST_FIXED:%.*]] = tail call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[TYPE:%.*]], i64 0)
-// CHECK-NEXT:    store <16 x i32> [[CAST_FIXED]], ptr [[AGG_RESULT:%.*]], align 16, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @from_svint32_t__to_gnu_int32_t(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i32>) align 16 captures(none) initializes((0, 64)) [[AGG_RESULT:%.*]], <vscale x 4 x i32> [[TYPE:%.*]]) local_unnamed_addr #[[ATTR3:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[CAST_FIXED:%.*]] = tail call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[TYPE]], i64 0)
+// CHECK-NEXT:    store <16 x i32> [[CAST_FIXED]], ptr [[AGG_RESULT]], align 16, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 gnu_int32_t from_svint32_t__to_gnu_int32_t(svint32_t type) {
   return type;
 }
 
-// CHECK-LABEL: @to_fixed_int32_t__from_gnu_int32_t(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TYPE:%.*]] = load <16 x i32>, ptr [[TMP0:%.*]], align 16, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local <vscale x 4 x i32> @to_fixed_int32_t__from_gnu_int32_t(
+// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TYPE:%.*]] = load <16 x i32>, ptr [[TMP0]], align 16, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[TYPE]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CAST_SCALABLE]]
 //
@@ -99,12 +109,18 @@ fixed_int32_t to_fixed_int32_t__from_gnu_int32_t(gnu_int32_t type) {
   return type;
 }
 
-// CHECK-LABEL: @from_fixed_int32_t__to_gnu_int32_t(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TYPE:%.*]] = tail call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[TYPE_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    store <16 x i32> [[TYPE]], ptr [[AGG_RESULT:%.*]], align 16, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @from_fixed_int32_t__to_gnu_int32_t(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i32>) align 16 captures(none) initializes((0, 64)) [[AGG_RESULT:%.*]], <vscale x 4 x i32> noundef [[TYPE_COERCE:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TYPE:%.*]] = tail call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[TYPE_COERCE]], i64 0)
+// CHECK-NEXT:    store <16 x i32> [[TYPE]], ptr [[AGG_RESULT]], align 16, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 gnu_int32_t from_fixed_int32_t__to_gnu_int32_t(fixed_int32_t type) {
   return type;
 }
+//.
+// CHECK: [[CHAR_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0}
+// CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0}
+// CHECK: [[META4]] = !{!"Simple C/C++ TBAA"}
+//.
diff --git a/clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c b/clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c
index 011518c6..b604a06 100644
--- a/clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c
+++ b/clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c
@@ -1,4 +1,4 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -mvscale-min=1 -mvscale-max=1 -O1 -emit-llvm -o - %s -fhalf-no-semantic-interposition | FileCheck %s --check-prefix=CHECK-128
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -mvscale-min=4 -mvscale-max=4 -O1 -emit-llvm -o - %s -fhalf-no-semantic-interposition | FileCheck %s --check-prefix=CHECK-512
 
@@ -20,46 +20,52 @@ fixed_bool_t global_bool;
 // WRITES
 //===----------------------------------------------------------------------===//
 
-// CHECK-128-LABEL: @write_global_i64(
-// CHECK-128-NEXT:  entry:
-// CHECK-128-NEXT:    [[CASTFIXEDSVE:%.*]] = tail call <2 x i64> @llvm.vector.extract.v2i64.nxv2i64(<vscale x 2 x i64> [[V:%.*]], i64 0)
-// CHECK-128-NEXT:    store <2 x i64> [[CASTFIXEDSVE]], ptr @global_i64, align 16, !tbaa [[TBAA6:![0-9]+]]
+// CHECK-128-LABEL: define void @write_global_i64(
+// CHECK-128-SAME: <vscale x 2 x i64> [[V:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-128-NEXT:  [[ENTRY:.*:]]
+// CHECK-128-NEXT:    [[CAST_FIXED:%.*]] = tail call <2 x i64> @llvm.vector.extract.v2i64.nxv2i64(<vscale x 2 x i64> [[V]], i64 0)
+// CHECK-128-NEXT:    store <2 x i64> [[CAST_FIXED]], ptr @global_i64, align 16, !tbaa [[CHAR_TBAA2:![0-9]+]]
 // CHECK-128-NEXT:    ret void
 //
-// CHECK-512-LABEL: @write_global_i64(
-// CHECK-512-NEXT:  entry:
-// CHECK-512-NEXT:    [[CASTFIXEDSVE:%.*]] = tail call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[V:%.*]], i64 0)
-// CHECK-512-NEXT:    store <8 x i64> [[CASTFIXEDSVE]], ptr @global_i64, align 16, !tbaa [[TBAA6:![0-9]+]]
+// CHECK-512-LABEL: define void @write_global_i64(
+// CHECK-512-SAME: <vscale x 2 x i64> [[V:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-512-NEXT:  [[ENTRY:.*:]]
+// CHECK-512-NEXT:    [[CAST_FIXED:%.*]] = tail call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[V]], i64 0)
+// CHECK-512-NEXT:    store <8 x i64> [[CAST_FIXED]], ptr @global_i64, align 16, !tbaa [[CHAR_TBAA2:![0-9]+]]
 // CHECK-512-NEXT:    ret void
 //
 void write_global_i64(svint64_t v) { global_i64 = v; }
 
-// CHECK-128-LABEL: @write_global_bf16(
-// CHECK-128-NEXT:  entry:
-// CHECK-128-NEXT:    [[CASTFIXEDSVE:%.*]] = tail call <8 x bfloat> @llvm.vector.extract.v8bf16.nxv8bf16(<vscale x 8 x bfloat> [[V:%.*]], i64 0)
-// CHECK-128-NEXT:    store <8 x bfloat> [[CASTFIXEDSVE]], ptr @global_bf16, align 16, !tbaa [[TBAA6]]
+// CHECK-128-LABEL: define void @write_global_bf16(
+// CHECK-128-SAME: <vscale x 8 x bfloat> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-128-NEXT:  [[ENTRY:.*:]]
+// CHECK-128-NEXT:    [[CAST_FIXED:%.*]] = tail call <8 x bfloat> @llvm.vector.extract.v8bf16.nxv8bf16(<vscale x 8 x bfloat> [[V]], i64 0)
+// CHECK-128-NEXT:    store <8 x bfloat> [[CAST_FIXED]], ptr @global_bf16, align 16, !tbaa [[CHAR_TBAA2]]
 // CHECK-128-NEXT:    ret void
 //
-// CHECK-512-LABEL: @write_global_bf16(
-// CHECK-512-NEXT:  entry:
-// CHECK-512-NEXT:    [[CASTFIXEDSVE:%.*]] = tail call <32 x bfloat> @llvm.vector.extract.v32bf16.nxv8bf16(<vscale x 8 x bfloat> [[V:%.*]], i64 0)
-// CHECK-512-NEXT:    store <32 x bfloat> [[CASTFIXEDSVE]], ptr @global_bf16, align 16, !tbaa [[TBAA6]]
+// CHECK-512-LABEL: define void @write_global_bf16(
+// CHECK-512-SAME: <vscale x 8 x bfloat> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-512-NEXT:  [[ENTRY:.*:]]
+// CHECK-512-NEXT:    [[CAST_FIXED:%.*]] = tail call <32 x bfloat> @llvm.vector.extract.v32bf16.nxv8bf16(<vscale x 8 x bfloat> [[V]], i64 0)
+// CHECK-512-NEXT:    store <32 x bfloat> [[CAST_FIXED]], ptr @global_bf16, align 16, !tbaa [[CHAR_TBAA2]]
 // CHECK-512-NEXT:    ret void
 //
 void write_global_bf16(svbfloat16_t v) { global_bf16 = v; }
 
-// CHECK-128-LABEL: @write_global_bool(
-// CHECK-128-NEXT:  entry:
-// CHECK-128-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 16 x i1> [[V:%.*]] to <vscale x 2 x i8>
-// CHECK-128-NEXT:    [[CASTFIXEDSVE:%.*]] = tail call <2 x i8> @llvm.vector.extract.v2i8.nxv2i8(<vscale x 2 x i8> [[TMP0]], i64 0)
-// CHECK-128-NEXT:    store <2 x i8> [[CASTFIXEDSVE]], ptr @global_bool, align 2, !tbaa [[TBAA6]]
+// CHECK-128-LABEL: define void @write_global_bool(
+// CHECK-128-SAME: <vscale x 16 x i1> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-128-NEXT:  [[ENTRY:.*:]]
+// CHECK-128-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 16 x i1> [[V]] to <vscale x 2 x i8>
+// CHECK-128-NEXT:    [[CAST_FIXED:%.*]] = tail call <2 x i8> @llvm.vector.extract.v2i8.nxv2i8(<vscale x 2 x i8> [[TMP0]], i64 0)
+// CHECK-128-NEXT:    store <2 x i8> [[CAST_FIXED]], ptr @global_bool, align 2, !tbaa [[CHAR_TBAA2]]
 // CHECK-128-NEXT:    ret void
 //
-// CHECK-512-LABEL: @write_global_bool(
-// CHECK-512-NEXT:  entry:
-// CHECK-512-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 16 x i1> [[V:%.*]] to <vscale x 2 x i8>
-// CHECK-512-NEXT:    [[CASTFIXEDSVE:%.*]] = tail call <8 x i8> @llvm.vector.extract.v8i8.nxv2i8(<vscale x 2 x i8> [[TMP0]], i64 0)
-// CHECK-512-NEXT:    store <8 x i8> [[CASTFIXEDSVE]], ptr @global_bool, align 2, !tbaa [[TBAA6]]
+// CHECK-512-LABEL: define void @write_global_bool(
+// CHECK-512-SAME: <vscale x 16 x i1> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-512-NEXT:  [[ENTRY:.*:]]
+// CHECK-512-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 16 x i1> [[V]] to <vscale x 2 x i8>
+// CHECK-512-NEXT:    [[CAST_FIXED:%.*]] = tail call <8 x i8> @llvm.vector.extract.v8i8.nxv2i8(<vscale x 2 x i8> [[TMP0]], i64 0)
+// CHECK-512-NEXT:    store <8 x i8> [[CAST_FIXED]], ptr @global_bool, align 2, !tbaa [[CHAR_TBAA2]]
 // CHECK-512-NEXT:    ret void
 //
 void write_global_bool(svbool_t v) { global_bool = v; }
@@ -68,46 +74,61 @@ void write_global_bool(svbool_t v) { global_bool = v; }
 // READS
 //===----------------------------------------------------------------------===//
 
-// CHECK-128-LABEL: @read_global_i64(
-// CHECK-128-NEXT:  entry:
-// CHECK-128-NEXT:    [[TMP0:%.*]] = load <2 x i64>, ptr @global_i64, align 16, !tbaa [[TBAA6]]
-// CHECK-128-NEXT:    [[CASTSCALABLESVE:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> poison, <2 x i64> [[TMP0]], i64 0)
-// CHECK-128-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
+// CHECK-128-LABEL: define <vscale x 2 x i64> @read_global_i64(
+// CHECK-128-SAME: ) local_unnamed_addr #[[ATTR2:[0-9]+]] {
+// CHECK-128-NEXT:  [[ENTRY:.*:]]
+// CHECK-128-NEXT:    [[TMP0:%.*]] = load <2 x i64>, ptr @global_i64, align 16, !tbaa [[CHAR_TBAA2]]
+// CHECK-128-NEXT:    [[CAST_SCALABLE:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> poison, <2 x i64> [[TMP0]], i64 0)
+// CHECK-128-NEXT:    ret <vscale x 2 x i64> [[CAST_SCALABLE]]
 //
-// CHECK-512-LABEL: @read_global_i64(
-// CHECK-512-NEXT:  entry:
-// CHECK-512-NEXT:    [[TMP0:%.*]] = load <8 x i64>, ptr @global_i64, align 16, !tbaa [[TBAA6]]
-// CHECK-512-NEXT:    [[CASTSCALABLESVE:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> poison, <8 x i64> [[TMP0]], i64 0)
-// CHECK-512-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
+// CHECK-512-LABEL: define <vscale x 2 x i64> @read_global_i64(
+// CHECK-512-SAME: ) local_unnamed_addr #[[ATTR2:[0-9]+]] {
+// CHECK-512-NEXT:  [[ENTRY:.*:]]
+// CHECK-512-NEXT:    [[TMP0:%.*]] = load <8 x i64>, ptr @global_i64, align 16, !tbaa [[CHAR_TBAA2]]
+// CHECK-512-NEXT:    [[CAST_SCALABLE:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> poison, <8 x i64> [[TMP0]], i64 0)
+// CHECK-512-NEXT:    ret <vscale x 2 x i64> [[CAST_SCALABLE]]
 //
 svint64_t read_global_i64() { return global_i64; }
 
-// CHECK-128-LABEL: @read_global_bf16(
-// CHECK-128-NEXT:  entry:
-// CHECK-128-NEXT:    [[TMP0:%.*]] = load <8 x bfloat>, ptr @global_bf16, align 16, !tbaa [[TBAA6]]
-// CHECK-128-NEXT:    [[CASTSCALABLESVE:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.insert.nxv8bf16.v8bf16(<vscale x 8 x bfloat> poison, <8 x bfloat> [[TMP0]], i64 0)
-// CHECK-128-NEXT:    ret <vscale x 8 x bfloat> [[CASTSCALABLESVE]]
+// CHECK-128-LABEL: define <vscale x 8 x bfloat> @read_global_bf16(
+// CHECK-128-SAME: ) local_unnamed_addr #[[ATTR2]] {
+// CHECK-128-NEXT:  [[ENTRY:.*:]]
+// CHECK-128-NEXT:    [[TMP0:%.*]] = load <8 x bfloat>, ptr @global_bf16, align 16, !tbaa [[CHAR_TBAA2]]
+// CHECK-128-NEXT:    [[CAST_SCALABLE:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.insert.nxv8bf16.v8bf16(<vscale x 8 x bfloat> poison, <8 x bfloat> [[TMP0]], i64 0)
+// CHECK-128-NEXT:    ret <vscale x 8 x bfloat> [[CAST_SCALABLE]]
 //
-// CHECK-512-LABEL: @read_global_bf16(
-// CHECK-512-NEXT:  entry:
-// CHECK-512-NEXT:    [[TMP0:%.*]] = load <32 x bfloat>, ptr @global_bf16, align 16, !tbaa [[TBAA6]]
-// CHECK-512-NEXT:    [[CASTSCALABLESVE:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.insert.nxv8bf16.v32bf16(<vscale x 8 x bfloat> poison, <32 x bfloat> [[TMP0]], i64 0)
-// CHECK-512-NEXT:    ret <vscale x 8 x bfloat> [[CASTSCALABLESVE]]
+// CHECK-512-LABEL: define <vscale x 8 x bfloat> @read_global_bf16(
+// CHECK-512-SAME: ) local_unnamed_addr #[[ATTR2]] {
+// CHECK-512-NEXT:  [[ENTRY:.*:]]
+// CHECK-512-NEXT:    [[TMP0:%.*]] = load <32 x bfloat>, ptr @global_bf16, align 16, !tbaa [[CHAR_TBAA2]]
+// CHECK-512-NEXT:    [[CAST_SCALABLE:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.insert.nxv8bf16.v32bf16(<vscale x 8 x bfloat> poison, <32 x bfloat> [[TMP0]], i64 0)
+// CHECK-512-NEXT:    ret <vscale x 8 x bfloat> [[CAST_SCALABLE]]
 //
 svbfloat16_t read_global_bf16() { return global_bf16; }
 
-// CHECK-128-LABEL: @read_global_bool(
-// CHECK-128-NEXT:  entry:
-// CHECK-128-NEXT:    [[TMP0:%.*]] = load <2 x i8>, ptr @global_bool, align 2, !tbaa [[TBAA6]]
-// CHECK-128-NEXT:    [[CASTSCALABLESVE:%.*]] = tail call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v2i8(<vscale x 2 x i8> poison, <2 x i8> [[TMP0]], i64 0)
-// CHECK-128-NEXT:    [[TMP1:%.*]] = bitcast <vscale x 2 x i8> [[CASTSCALABLESVE]] to <vscale x 16 x i1>
+// CHECK-128-LABEL: define <vscale x 16 x i1> @read_global_bool(
+// CHECK-128-SAME: ) local_unnamed_addr #[[ATTR2]] {
+// CHECK-128-NEXT:  [[ENTRY:.*:]]
+// CHECK-128-NEXT:    [[TMP0:%.*]] = load <2 x i8>, ptr @global_bool, align 2, !tbaa [[CHAR_TBAA2]]
+// CHECK-128-NEXT:    [[CAST_SCALABLE:%.*]] = tail call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v2i8(<vscale x 2 x i8> poison, <2 x i8> [[TMP0]], i64 0)
+// CHECK-128-NEXT:    [[TMP1:%.*]] = bitcast <vscale x 2 x i8> [[CAST_SCALABLE]] to <vscale x 16 x i1>
 // CHECK-128-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
-// CHECK-512-LABEL: @read_global_bool(
-// CHECK-512-NEXT:  entry:
-// CHECK-512-NEXT:    [[TMP0:%.*]] = load <8 x i8>, ptr @global_bool, align 2, !tbaa [[TBAA6]]
-// CHECK-512-NEXT:    [[CASTSCALABLESVE:%.*]] = tail call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v8i8(<vscale x 2 x i8> poison, <8 x i8> [[TMP0]], i64 0)
-// CHECK-512-NEXT:    [[TMP1:%.*]] = bitcast <vscale x 2 x i8> [[CASTSCALABLESVE]] to <vscale x 16 x i1>
+// CHECK-512-LABEL: define <vscale x 16 x i1> @read_global_bool(
+// CHECK-512-SAME: ) local_unnamed_addr #[[ATTR2]] {
+// CHECK-512-NEXT:  [[ENTRY:.*:]]
+// CHECK-512-NEXT:    [[TMP0:%.*]] = load <8 x i8>, ptr @global_bool, align 2, !tbaa [[CHAR_TBAA2]]
+// CHECK-512-NEXT:    [[CAST_SCALABLE:%.*]] = tail call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v8i8(<vscale x 2 x i8> poison, <8 x i8> [[TMP0]], i64 0)
+// CHECK-512-NEXT:    [[TMP1:%.*]] = bitcast <vscale x 2 x i8> [[CAST_SCALABLE]] to <vscale x 16 x i1>
 // CHECK-512-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
 svbool_t read_global_bool() { return global_bool; }
+//.
+// CHECK-128: [[CHAR_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0}
+// CHECK-128: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0}
+// CHECK-128: [[META4]] = !{!"Simple C/C++ TBAA"}
+//.
+// CHECK-512: [[CHAR_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0}
+// CHECK-512: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0}
+// CHECK-512: [[META4]] = !{!"Simple C/C++ TBAA"}
+//.
diff --git a/clang/test/CodeGen/attr-counted-by-for-pointers.c b/clang/test/CodeGen/attr-counted-by-for-pointers.c
index 0d72b58..f7b737d 100644
--- a/clang/test/CodeGen/attr-counted-by-for-pointers.c
+++ b/clang/test/CodeGen/attr-counted-by-for-pointers.c
@@ -1,4 +1,4 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 3
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
 // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -DWITH_ATTRS -Wall -fsanitize=array-bounds,object-size,local-bounds -fstrict-flex-arrays=3 -fexperimental-late-parse-attributes -emit-llvm -o - %s | FileCheck --check-prefix=SANITIZE-WITH-ATTR %s
 // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -DWITH_ATTRS -Wall -fstrict-flex-arrays=3 -fexperimental-late-parse-attributes -emit-llvm -o - %s | FileCheck --check-prefix=NO-SANITIZE-WITH-ATTR %s
 // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -Wall -fsanitize=array-bounds,object-size,local-bounds -fstrict-flex-arrays=3 -fexperimental-late-parse-attributes -emit-llvm -o - %s | FileCheck --check-prefix=SANITIZE-WITHOUT-ATTR %s
@@ -29,51 +29,51 @@ struct annotated_ptr {
 
 // SANITIZE-WITH-ATTR-LABEL: define dso_local void @test1(
 // SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]], ptr noundef [[VALUE:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
-// SANITIZE-WITH-ATTR-NEXT:  entry:
+// SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
 // SANITIZE-WITH-ATTR-NEXT:    [[DOTCOUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 16
 // SANITIZE-WITH-ATTR-NEXT:    [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOTCOUNTED_BY_GEP]], align 8
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META2:![0-9]+]]
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP1]], label [[CONT10:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3:![0-9]+]], !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       handler.out_of_bounds:
+// SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP1]], label %[[CONT10:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3:![0-9]+]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR:       [[HANDLER_OUT_OF_BOUNDS]]:
 // SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB2:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR3:[0-9]+]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       cont10:
+// SANITIZE-WITH-ATTR:       [[CONT10]]:
 // SANITIZE-WITH-ATTR-NEXT:    [[BUF:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
-// SANITIZE-WITH-ATTR-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[BUF]], align 8, !tbaa [[TBAA4:![0-9]+]]
+// SANITIZE-WITH-ATTR-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[BUF]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA4:![0-9]+]]
 // SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 [[IDXPROM]]
-// SANITIZE-WITH-ATTR-NEXT:    store ptr [[VALUE]], ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA13:![0-9]+]]
+// SANITIZE-WITH-ATTR-NEXT:    store ptr [[VALUE]], ptr [[ARRAYIDX]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA13:![0-9]+]]
 // SANITIZE-WITH-ATTR-NEXT:    ret void
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local void @test1(
 // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]], i32 noundef [[INDEX:%.*]], ptr noundef [[VALUE:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
-// NO-SANITIZE-WITH-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[BUF:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[BUF]], align 8, !tbaa [[TBAA2:![0-9]+]]
+// NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[BUF]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA2:![0-9]+]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i64 [[IDXPROM]]
-// NO-SANITIZE-WITH-ATTR-NEXT:    store ptr [[VALUE]], ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA11:![0-9]+]]
+// NO-SANITIZE-WITH-ATTR-NEXT:    store ptr [[VALUE]], ptr [[ARRAYIDX]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA11:![0-9]+]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    ret void
 //
 // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test1(
 // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]], ptr noundef [[VALUE:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
-// SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[BUF:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
-// SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[BUF]], align 8, !tbaa [[TBAA2:![0-9]+]]
+// SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[BUF]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA2:![0-9]+]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i64 [[IDXPROM]]
-// SANITIZE-WITHOUT-ATTR-NEXT:    store ptr [[VALUE]], ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA11:![0-9]+]]
+// SANITIZE-WITHOUT-ATTR-NEXT:    store ptr [[VALUE]], ptr [[ARRAYIDX]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA11:![0-9]+]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    ret void
 //
 // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test1(
 // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]], i32 noundef [[INDEX:%.*]], ptr noundef [[VALUE:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[BUF:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[BUF]], align 8, !tbaa [[TBAA2:![0-9]+]]
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[BUF]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA2:![0-9]+]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i64 [[IDXPROM]]
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:    store ptr [[VALUE]], ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA11:![0-9]+]]
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:    store ptr [[VALUE]], ptr [[ARRAYIDX]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA11:![0-9]+]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    ret void
 //
 void test1(struct annotated_ptr *p, int index, struct foo *value) {
@@ -82,51 +82,51 @@ void test1(struct annotated_ptr *p, int index, struct foo *value) {
 
 // SANITIZE-WITH-ATTR-LABEL: define dso_local void @test2(
 // SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]], ptr noundef [[VALUE:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITH-ATTR-NEXT:  entry:
+// SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
 // SANITIZE-WITH-ATTR-NEXT:    [[DOTCOUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 16
 // SANITIZE-WITH-ATTR-NEXT:    [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOTCOUNTED_BY_GEP]], align 8
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP1]], label [[CONT10:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       handler.out_of_bounds:
+// SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP1]], label %[[CONT10:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR:       [[HANDLER_OUT_OF_BOUNDS]]:
 // SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB4:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       cont10:
+// SANITIZE-WITH-ATTR:       [[CONT10]]:
 // SANITIZE-WITH-ATTR-NEXT:    [[BUF:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
-// SANITIZE-WITH-ATTR-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[BUF]], align 8, !tbaa [[TBAA4]]
+// SANITIZE-WITH-ATTR-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[BUF]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA4]]
 // SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 [[IDXPROM]]
-// SANITIZE-WITH-ATTR-NEXT:    store ptr [[VALUE]], ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA13]]
+// SANITIZE-WITH-ATTR-NEXT:    store ptr [[VALUE]], ptr [[ARRAYIDX]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA13]]
 // SANITIZE-WITH-ATTR-NEXT:    ret void
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local void @test2(
 // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]], i32 noundef [[INDEX:%.*]], ptr noundef [[VALUE:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// NO-SANITIZE-WITH-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[BUF:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[BUF]], align 8, !tbaa [[TBAA2]]
+// NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[BUF]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA2]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i64 [[IDXPROM]]
-// NO-SANITIZE-WITH-ATTR-NEXT:    store ptr [[VALUE]], ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA11]]
+// NO-SANITIZE-WITH-ATTR-NEXT:    store ptr [[VALUE]], ptr [[ARRAYIDX]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA11]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    ret void
 //
 // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test2(
 // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]], ptr noundef [[VALUE:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[BUF:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
-// SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[BUF]], align 8, !tbaa [[TBAA2]]
+// SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[BUF]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA2]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i64 [[IDXPROM]]
-// SANITIZE-WITHOUT-ATTR-NEXT:    store ptr [[VALUE]], ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA11]]
+// SANITIZE-WITHOUT-ATTR-NEXT:    store ptr [[VALUE]], ptr [[ARRAYIDX]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA11]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    ret void
 //
 // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test2(
 // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]], i32 noundef [[INDEX:%.*]], ptr noundef [[VALUE:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[BUF:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[BUF]], align 8, !tbaa [[TBAA2]]
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[BUF]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA2]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i64 [[IDXPROM]]
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:    store ptr [[VALUE]], ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA11]]
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:    store ptr [[VALUE]], ptr [[ARRAYIDX]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA11]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    ret void
 //
 void test2(struct annotated_ptr *p, int index, struct foo *value) {
@@ -135,51 +135,51 @@ void test2(struct annotated_ptr *p, int index, struct foo *value) {
 
 // SANITIZE-WITH-ATTR-LABEL: define dso_local void @test3(
 // SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]], ptr noundef [[VALUE:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITH-ATTR-NEXT:  entry:
+// SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
 // SANITIZE-WITH-ATTR-NEXT:    [[DOTCOUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 16
 // SANITIZE-WITH-ATTR-NEXT:    [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOTCOUNTED_BY_GEP]], align 8
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    [[DOTNOT:%.*]] = icmp ugt i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR-NEXT:    br i1 [[DOTNOT]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], label [[CONT10:%.*]], !prof [[PROF15:![0-9]+]], !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       handler.out_of_bounds:
+// SANITIZE-WITH-ATTR-NEXT:    br i1 [[DOTNOT]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], label %[[CONT10:.*]], !prof [[PROF15:![0-9]+]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR:       [[HANDLER_OUT_OF_BOUNDS]]:
 // SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB5:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       cont10:
+// SANITIZE-WITH-ATTR:       [[CONT10]]:
 // SANITIZE-WITH-ATTR-NEXT:    [[BUF:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
-// SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[BUF]], align 8, !tbaa [[TBAA4]]
+// SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[BUF]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA4]]
 // SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i64 [[IDXPROM]]
-// SANITIZE-WITH-ATTR-NEXT:    store ptr [[VALUE]], ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA13]]
+// SANITIZE-WITH-ATTR-NEXT:    store ptr [[VALUE]], ptr [[ARRAYIDX]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA13]]
 // SANITIZE-WITH-ATTR-NEXT:    ret void
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local void @test3(
 // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]], i32 noundef [[INDEX:%.*]], ptr noundef [[VALUE:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// NO-SANITIZE-WITH-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[BUF:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[BUF]], align 8, !tbaa [[TBAA2]]
+// NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[BUF]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA2]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i64 [[IDXPROM]]
-// NO-SANITIZE-WITH-ATTR-NEXT:    store ptr [[VALUE]], ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA11]]
+// NO-SANITIZE-WITH-ATTR-NEXT:    store ptr [[VALUE]], ptr [[ARRAYIDX]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA11]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    ret void
 //
 // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test3(
 // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]], ptr noundef [[VALUE:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[BUF:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
-// SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[BUF]], align 8, !tbaa [[TBAA2]]
+// SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[BUF]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA2]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i64 [[IDXPROM]]
-// SANITIZE-WITHOUT-ATTR-NEXT:    store ptr [[VALUE]], ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA11]]
+// SANITIZE-WITHOUT-ATTR-NEXT:    store ptr [[VALUE]], ptr [[ARRAYIDX]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA11]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    ret void
 //
 // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test3(
 // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]], i32 noundef [[INDEX:%.*]], ptr noundef [[VALUE:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[BUF:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[BUF]], align 8, !tbaa [[TBAA2]]
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[BUF]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA2]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i64 [[IDXPROM]]
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:    store ptr [[VALUE]], ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA11]]
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:    store ptr [[VALUE]], ptr [[ARRAYIDX]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA11]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    ret void
 //
 void test3(struct annotated_ptr *p, int index, struct foo *value) {
@@ -188,7 +188,7 @@ void test3(struct annotated_ptr *p, int index, struct foo *value) {
 
 // SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 -17179869184, 17179869177) i64 @test4(
 // SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITH-ATTR-NEXT:  entry:
+// SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 16
 // SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4
 // SANITIZE-WITH-ATTR-NEXT:    [[COUNT:%.*]] = sext i32 [[COUNTED_BY_LOAD]] to i64
@@ -199,7 +199,7 @@ void test3(struct annotated_ptr *p, int index, struct foo *value) {
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 -17179869184, 17179869177) i64 @test4(
 // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] {
-// NO-SANITIZE-WITH-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 16
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[COUNT:%.*]] = sext i32 [[COUNTED_BY_LOAD]] to i64
@@ -210,12 +210,12 @@ void test3(struct annotated_ptr *p, int index, struct foo *value) {
 //
 // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test4(
 // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    ret i64 -1
 //
 // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test4(
 // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] {
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    ret i64 -1
 //
 size_t test4(struct annotated_ptr *p) {
@@ -224,7 +224,7 @@ size_t test4(struct annotated_ptr *p) {
 
 // SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 -17179869184, 17179869177) i64 @test5(
 // SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITH-ATTR-NEXT:  entry:
+// SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 16
 // SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4
 // SANITIZE-WITH-ATTR-NEXT:    [[COUNT:%.*]] = sext i32 [[COUNTED_BY_LOAD]] to i64
@@ -235,7 +235,7 @@ size_t test4(struct annotated_ptr *p) {
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 -17179869184, 17179869177) i64 @test5(
 // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR1]] {
-// NO-SANITIZE-WITH-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 16
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[COUNT:%.*]] = sext i32 [[COUNTED_BY_LOAD]] to i64
@@ -246,12 +246,12 @@ size_t test4(struct annotated_ptr *p) {
 //
 // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test5(
 // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    ret i64 -1
 //
 // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test5(
 // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR1]] {
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    ret i64 -1
 //
 size_t test5(struct annotated_ptr *p, int index) {
@@ -260,17 +260,17 @@ size_t test5(struct annotated_ptr *p, int index) {
 
 // SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 0, 17179869177) i64 @test6(
 // SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITH-ATTR-NEXT:  entry:
+// SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
 // SANITIZE-WITH-ATTR-NEXT:    [[DOTCOUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 16
 // SANITIZE-WITH-ATTR-NEXT:    [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOTCOUNTED_BY_GEP]], align 4
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    [[DOTNOT:%.*]] = icmp ugt i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR-NEXT:    br i1 [[DOTNOT]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], label [[CONT8:%.*]], !prof [[PROF15]], !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       handler.out_of_bounds:
+// SANITIZE-WITH-ATTR-NEXT:    br i1 [[DOTNOT]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], label %[[CONT8:.*]], !prof [[PROF15]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR:       [[HANDLER_OUT_OF_BOUNDS]]:
 // SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB6:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       cont8:
+// SANITIZE-WITH-ATTR:       [[CONT8]]:
 // SANITIZE-WITH-ATTR-NEXT:    [[COUNT:%.*]] = sext i32 [[DOTCOUNTED_BY_LOAD]] to i64
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = sub nsw i64 [[COUNT]], [[IDXPROM]]
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP2:%.*]] = tail call i64 @llvm.smax.i64(i64 [[TMP1]], i64 0)
@@ -279,7 +279,7 @@ size_t test5(struct annotated_ptr *p, int index) {
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 -34359738360, 34359738361) i64 @test6(
 // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR1]] {
-// NO-SANITIZE-WITH-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 16
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4
@@ -294,12 +294,12 @@ size_t test5(struct annotated_ptr *p, int index) {
 //
 // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test6(
 // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    ret i64 -1
 //
 // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test6(
 // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR1]] {
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    ret i64 -1
 //
 size_t test6(struct annotated_ptr *p, int index) {
@@ -308,32 +308,32 @@ size_t test6(struct annotated_ptr *p, int index) {
 
 // SANITIZE-WITH-ATTR-LABEL: define dso_local i64 @test7(
 // SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITH-ATTR-NEXT:  entry:
+// SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
 // SANITIZE-WITH-ATTR-NEXT:    [[DOTCOUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 16
 // SANITIZE-WITH-ATTR-NEXT:    [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOTCOUNTED_BY_GEP]], align 8
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP1]], label [[CONT10:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       handler.out_of_bounds:
+// SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP1]], label %[[CONT10:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR:       [[HANDLER_OUT_OF_BOUNDS]]:
 // SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB7:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       cont10:
+// SANITIZE-WITH-ATTR:       [[CONT10]]:
 // SANITIZE-WITH-ATTR-NEXT:    ret i64 -1
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local i64 @test7(
 // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] {
-// NO-SANITIZE-WITH-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    ret i64 -1
 //
 // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test7(
 // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    ret i64 -1
 //
 // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test7(
 // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] {
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    ret i64 -1
 //
 size_t test7(struct annotated_ptr *p, int index) {
@@ -348,7 +348,7 @@ struct annotated_sized_ptr {
 
 // SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 0, 2147483648) i64 @test8(
 // SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITH-ATTR-NEXT:  entry:
+// SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 16
 // SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4
 // SANITIZE-WITH-ATTR-NEXT:    [[NARROW:%.*]] = tail call i32 @llvm.smax.i32(i32 [[COUNTED_BY_LOAD]], i32 0)
@@ -357,7 +357,7 @@ struct annotated_sized_ptr {
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 0, 2147483648) i64 @test8(
 // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR1]] {
-// NO-SANITIZE-WITH-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 16
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[NARROW:%.*]] = tail call i32 @llvm.smax.i32(i32 [[COUNTED_BY_LOAD]], i32 0)
@@ -366,12 +366,12 @@ struct annotated_sized_ptr {
 //
 // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test8(
 // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    ret i64 -1
 //
 // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test8(
 // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR1]] {
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    ret i64 -1
 //
 size_t test8(struct annotated_sized_ptr *p, int index) {
@@ -380,17 +380,17 @@ size_t test8(struct annotated_sized_ptr *p, int index) {
 
 // SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 0, 2147483648) i64 @test9(
 // SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITH-ATTR-NEXT:  entry:
+// SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
 // SANITIZE-WITH-ATTR-NEXT:    [[DOTCOUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 16
 // SANITIZE-WITH-ATTR-NEXT:    [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOTCOUNTED_BY_GEP]], align 4
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    [[DOTNOT:%.*]] = icmp ugt i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR-NEXT:    br i1 [[DOTNOT]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], label [[CONT8:%.*]], !prof [[PROF15]], !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       handler.out_of_bounds:
+// SANITIZE-WITH-ATTR-NEXT:    br i1 [[DOTNOT]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], label %[[CONT8:.*]], !prof [[PROF15]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR:       [[HANDLER_OUT_OF_BOUNDS]]:
 // SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB9:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       cont8:
+// SANITIZE-WITH-ATTR:       [[CONT8]]:
 // SANITIZE-WITH-ATTR-NEXT:    [[COUNT:%.*]] = sext i32 [[DOTCOUNTED_BY_LOAD]] to i64
 // SANITIZE-WITH-ATTR-NEXT:    [[RESULT:%.*]] = sub nsw i64 [[COUNT]], [[IDXPROM]]
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.smax.i64(i64 [[RESULT]], i64 0)
@@ -398,7 +398,7 @@ size_t test8(struct annotated_sized_ptr *p, int index) {
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 -4294967295, 4294967296) i64 @test9(
 // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR1]] {
-// NO-SANITIZE-WITH-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 16
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4
@@ -412,12 +412,12 @@ size_t test8(struct annotated_sized_ptr *p, int index) {
 //
 // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test9(
 // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    ret i64 -1
 //
 // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test9(
 // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR1]] {
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    ret i64 -1
 //
 size_t test9(struct annotated_sized_ptr *p, int index) {
@@ -426,17 +426,17 @@ size_t test9(struct annotated_sized_ptr *p, int index) {
 
 // SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 0, 2147483648) i64 @test10(
 // SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITH-ATTR-NEXT:  entry:
+// SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
 // SANITIZE-WITH-ATTR-NEXT:    [[DOTCOUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 16
 // SANITIZE-WITH-ATTR-NEXT:    [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOTCOUNTED_BY_GEP]], align 4
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    [[DOTNOT:%.*]] = icmp ugt i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR-NEXT:    br i1 [[DOTNOT]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], label [[CONT8:%.*]], !prof [[PROF15]], !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       handler.out_of_bounds:
+// SANITIZE-WITH-ATTR-NEXT:    br i1 [[DOTNOT]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], label %[[CONT8:.*]], !prof [[PROF15]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR:       [[HANDLER_OUT_OF_BOUNDS]]:
 // SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB11:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       cont8:
+// SANITIZE-WITH-ATTR:       [[CONT8]]:
 // SANITIZE-WITH-ATTR-NEXT:    [[COUNT:%.*]] = sext i32 [[DOTCOUNTED_BY_LOAD]] to i64
 // SANITIZE-WITH-ATTR-NEXT:    [[INDEX_SIZE:%.*]] = shl nuw nsw i64 [[IDXPROM]], 2
 // SANITIZE-WITH-ATTR-NEXT:    [[RESULT:%.*]] = sub nsw i64 [[COUNT]], [[INDEX_SIZE]]
@@ -445,7 +445,7 @@ size_t test9(struct annotated_sized_ptr *p, int index) {
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 -10737418236, 10737418240) i64 @test10(
 // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR1]] {
-// NO-SANITIZE-WITH-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 16
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4
@@ -460,12 +460,12 @@ size_t test9(struct annotated_sized_ptr *p, int index) {
 //
 // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test10(
 // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    ret i64 -1
 //
 // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test10(
 // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR1]] {
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    ret i64 -1
 //
 size_t test10(struct annotated_sized_ptr *p, int index) {
@@ -479,7 +479,7 @@ struct pr151236_struct {
 
 // SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 -262144, 262137) i64 @test11(
 // SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITH-ATTR-NEXT:  entry:
+// SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
 // SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_LOAD:%.*]] = load i16, ptr [[COUNTED_BY_GEP]], align 4
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = icmp sgt i16 [[COUNTED_BY_LOAD]], -1
@@ -490,7 +490,7 @@ struct pr151236_struct {
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 -262144, 262137) i64 @test11(
 // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR1]] {
-// NO-SANITIZE-WITH-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_LOAD:%.*]] = load i16, ptr [[COUNTED_BY_GEP]], align 4
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[COUNT:%.*]] = sext i16 [[COUNTED_BY_LOAD]] to i64
@@ -501,12 +501,12 @@ struct pr151236_struct {
 //
 // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local range(i64 0, -1) i64 @test11(
 // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    ret i64 -2
 //
 // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local range(i64 0, -1) i64 @test11(
 // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR1]] {
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    ret i64 -2
 //
 size_t test11(struct pr151236_struct *p) {
@@ -515,7 +515,7 @@ size_t test11(struct pr151236_struct *p) {
 
 // SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 -262144, 262137) i64 @test12(
 // SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITH-ATTR-NEXT:  entry:
+// SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
 // SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_LOAD:%.*]] = load i16, ptr [[COUNTED_BY_GEP]], align 4
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = icmp sgt i16 [[COUNTED_BY_LOAD]], -1
@@ -526,7 +526,7 @@ size_t test11(struct pr151236_struct *p) {
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 -262144, 262137) i64 @test12(
 // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR1]] {
-// NO-SANITIZE-WITH-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_LOAD:%.*]] = load i16, ptr [[COUNTED_BY_GEP]], align 4
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[COUNT:%.*]] = sext i16 [[COUNTED_BY_LOAD]] to i64
@@ -537,14 +537,66 @@ size_t test11(struct pr151236_struct *p) {
 //
 // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local range(i64 0, -1) i64 @test12(
 // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    ret i64 -2
 //
 // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local range(i64 0, -1) i64 @test12(
 // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR1]] {
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    ret i64 -2
 //
 size_t test12(struct pr151236_struct *p) {
   return __bdos(p->a) + __bdos(((int *)p->a));
 }
+//.
+// SANITIZE-WITH-ATTR: [[META2]] = !{}
+// SANITIZE-WITH-ATTR: [[PROF3]] = !{!"branch_weights", i32 1048575, i32 1}
+// SANITIZE-WITH-ATTR: [[_ZTS3FOOPTR_TBAA4]] = !{[[META5:![0-9]+]], [[META9:![0-9]+]], i64 8}
+// SANITIZE-WITH-ATTR: [[META5]] = !{!"annotated_ptr", [[META6:![0-9]+]], i64 0, [[META9]], i64 8, [[META12:![0-9]+]], i64 16}
+// SANITIZE-WITH-ATTR: [[META6]] = !{!"long", [[META7:![0-9]+]], i64 0}
+// SANITIZE-WITH-ATTR: [[META7]] = !{!"omnipotent char", [[META8:![0-9]+]], i64 0}
+// SANITIZE-WITH-ATTR: [[META8]] = !{!"Simple C/C++ TBAA"}
+// SANITIZE-WITH-ATTR: [[META9]] = !{!"p2 _ZTS3foo", [[META10:![0-9]+]], i64 0}
+// SANITIZE-WITH-ATTR: [[META10]] = !{!"any p2 pointer", [[META11:![0-9]+]], i64 0}
+// SANITIZE-WITH-ATTR: [[META11]] = !{!"any pointer", [[META7]], i64 0}
+// SANITIZE-WITH-ATTR: [[META12]] = !{!"int", [[META7]], i64 0}
+// SANITIZE-WITH-ATTR: [[_ZTS3FOOPTR_TBAA13]] = !{[[META14:![0-9]+]], [[META14]], i64 0}
+// SANITIZE-WITH-ATTR: [[META14]] = !{!"p1 _ZTS3foo", [[META11]], i64 0}
+// SANITIZE-WITH-ATTR: [[PROF15]] = !{!"branch_weights", i32 1, i32 1048575}
+//.
+// NO-SANITIZE-WITH-ATTR: [[_ZTS3FOOPTR_TBAA2]] = !{[[META3:![0-9]+]], [[META7:![0-9]+]], i64 8}
+// NO-SANITIZE-WITH-ATTR: [[META3]] = !{!"annotated_ptr", [[META4:![0-9]+]], i64 0, [[META7]], i64 8, [[META10:![0-9]+]], i64 16}
+// NO-SANITIZE-WITH-ATTR: [[META4]] = !{!"long", [[META5:![0-9]+]], i64 0}
+// NO-SANITIZE-WITH-ATTR: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0}
+// NO-SANITIZE-WITH-ATTR: [[META6]] = !{!"Simple C/C++ TBAA"}
+// NO-SANITIZE-WITH-ATTR: [[META7]] = !{!"p2 _ZTS3foo", [[META8:![0-9]+]], i64 0}
+// NO-SANITIZE-WITH-ATTR: [[META8]] = !{!"any p2 pointer", [[META9:![0-9]+]], i64 0}
+// NO-SANITIZE-WITH-ATTR: [[META9]] = !{!"any pointer", [[META5]], i64 0}
+// NO-SANITIZE-WITH-ATTR: [[META10]] = !{!"int", [[META5]], i64 0}
+// NO-SANITIZE-WITH-ATTR: [[_ZTS3FOOPTR_TBAA11]] = !{[[META12:![0-9]+]], [[META12]], i64 0}
+// NO-SANITIZE-WITH-ATTR: [[META12]] = !{!"p1 _ZTS3foo", [[META9]], i64 0}
+//.
+// SANITIZE-WITHOUT-ATTR: [[_ZTS3FOOPTR_TBAA2]] = !{[[META3:![0-9]+]], [[META7:![0-9]+]], i64 8}
+// SANITIZE-WITHOUT-ATTR: [[META3]] = !{!"annotated_ptr", [[META4:![0-9]+]], i64 0, [[META7]], i64 8, [[META10:![0-9]+]], i64 16}
+// SANITIZE-WITHOUT-ATTR: [[META4]] = !{!"long", [[META5:![0-9]+]], i64 0}
+// SANITIZE-WITHOUT-ATTR: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0}
+// SANITIZE-WITHOUT-ATTR: [[META6]] = !{!"Simple C/C++ TBAA"}
+// SANITIZE-WITHOUT-ATTR: [[META7]] = !{!"p2 _ZTS3foo", [[META8:![0-9]+]], i64 0}
+// SANITIZE-WITHOUT-ATTR: [[META8]] = !{!"any p2 pointer", [[META9:![0-9]+]], i64 0}
+// SANITIZE-WITHOUT-ATTR: [[META9]] = !{!"any pointer", [[META5]], i64 0}
+// SANITIZE-WITHOUT-ATTR: [[META10]] = !{!"int", [[META5]], i64 0}
+// SANITIZE-WITHOUT-ATTR: [[_ZTS3FOOPTR_TBAA11]] = !{[[META12:![0-9]+]], [[META12]], i64 0}
+// SANITIZE-WITHOUT-ATTR: [[META12]] = !{!"p1 _ZTS3foo", [[META9]], i64 0}
+//.
+// NO-SANITIZE-WITHOUT-ATTR: [[_ZTS3FOOPTR_TBAA2]] = !{[[META3:![0-9]+]], [[META7:![0-9]+]], i64 8}
+// NO-SANITIZE-WITHOUT-ATTR: [[META3]] = !{!"annotated_ptr", [[META4:![0-9]+]], i64 0, [[META7]], i64 8, [[META10:![0-9]+]], i64 16}
+// NO-SANITIZE-WITHOUT-ATTR: [[META4]] = !{!"long", [[META5:![0-9]+]], i64 0}
+// NO-SANITIZE-WITHOUT-ATTR: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0}
+// NO-SANITIZE-WITHOUT-ATTR: [[META6]] = !{!"Simple C/C++ TBAA"}
+// NO-SANITIZE-WITHOUT-ATTR: [[META7]] = !{!"p2 _ZTS3foo", [[META8:![0-9]+]], i64 0}
+// NO-SANITIZE-WITHOUT-ATTR: [[META8]] = !{!"any p2 pointer", [[META9:![0-9]+]], i64 0}
+// NO-SANITIZE-WITHOUT-ATTR: [[META9]] = !{!"any pointer", [[META5]], i64 0}
+// NO-SANITIZE-WITHOUT-ATTR: [[META10]] = !{!"int", [[META5]], i64 0}
+// NO-SANITIZE-WITHOUT-ATTR: [[_ZTS3FOOPTR_TBAA11]] = !{[[META12:![0-9]+]], [[META12]], i64 0}
+// NO-SANITIZE-WITHOUT-ATTR: [[META12]] = !{!"p1 _ZTS3foo", [[META9]], i64 0}
+//.
diff --git a/clang/test/CodeGen/attr-counted-by-pr110385.c b/clang/test/CodeGen/attr-counted-by-pr110385.c
index 412c12c..32ee1c8 100644
--- a/clang/test/CodeGen/attr-counted-by-pr110385.c
+++ b/clang/test/CodeGen/attr-counted-by-pr110385.c
@@ -1,4 +1,4 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
 // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -Wno-missing-declarations -emit-llvm -o - %s | FileCheck %s
 
 // See #110385
@@ -27,17 +27,17 @@ void init(void * __attribute__((pass_dynamic_object_size(0))));
 
 // CHECK-LABEL: define dso_local void @test1(
 // CHECK-SAME: ptr noundef readonly captures(none) [[FOO:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
-// CHECK-NEXT:  entry:
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[GROWABLE:%.*]] = getelementptr inbounds nuw i8, ptr [[FOO]], i64 8
-// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[GROWABLE]], align 8, !tbaa [[TBAA2:![0-9]+]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[GROWABLE]], align 8, !tbaa [[_ZTS8VARIABLEPTR_TBAA2:![0-9]+]]
 // CHECK-NEXT:    [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 12
 // CHECK-NEXT:    [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 8
 // CHECK-NEXT:    [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4
-// CHECK-NEXT:    [[TMP1:%.*]] = sext i32 [[COUNTED_BY_LOAD]] to i64
-// CHECK-NEXT:    [[TMP2:%.*]] = shl nsw i64 [[TMP1]], 1
-// CHECK-NEXT:    [[TMP3:%.*]] = icmp sgt i32 [[COUNTED_BY_LOAD]], -1
-// CHECK-NEXT:    [[TMP4:%.*]] = select i1 [[TMP3]], i64 [[TMP2]], i64 0
-// CHECK-NEXT:    tail call void @init(ptr noundef nonnull [[ARRAY]], i64 noundef [[TMP4]]) #[[ATTR2:[0-9]+]]
+// CHECK-NEXT:    [[COUNT:%.*]] = sext i32 [[COUNTED_BY_LOAD]] to i64
+// CHECK-NEXT:    [[FLEXIBLE_ARRAY_MEMBER_SIZE:%.*]] = shl nsw i64 [[COUNT]], 1
+// CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[COUNTED_BY_LOAD]], -1
+// CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[FLEXIBLE_ARRAY_MEMBER_SIZE]], i64 0
+// CHECK-NEXT:    tail call void @init(ptr noundef nonnull [[ARRAY]], i64 noundef [[TMP2]]) #[[ATTR2:[0-9]+]]
 // CHECK-NEXT:    ret void
 //
 void test1(struct bucket *foo) {
@@ -46,22 +46,22 @@ void test1(struct bucket *foo) {
 
 // CHECK-LABEL: define dso_local void @test2(
 // CHECK-SAME: ptr noundef [[FOO:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  entry:
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[FOO]], i64 16
 // CHECK-NEXT:    [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[FOO]], i64 12
 // CHECK-NEXT:    [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4
-// CHECK-NEXT:    [[TMP0:%.*]] = sext i32 [[COUNTED_BY_LOAD]] to i64
-// CHECK-NEXT:    [[TMP1:%.*]] = shl nsw i64 [[TMP0]], 1
-// CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt i32 [[COUNTED_BY_LOAD]], -1
-// CHECK-NEXT:    [[TMP3:%.*]] = select i1 [[TMP2]], i64 [[TMP1]], i64 0
-// CHECK-NEXT:    tail call void @init(ptr noundef nonnull [[ARRAY]], i64 noundef [[TMP3]]) #[[ATTR2]]
+// CHECK-NEXT:    [[COUNT:%.*]] = sext i32 [[COUNTED_BY_LOAD]] to i64
+// CHECK-NEXT:    [[FLEXIBLE_ARRAY_MEMBER_SIZE:%.*]] = shl nsw i64 [[COUNT]], 1
+// CHECK-NEXT:    [[TMP0:%.*]] = icmp sgt i32 [[COUNTED_BY_LOAD]], -1
+// CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[TMP0]], i64 [[FLEXIBLE_ARRAY_MEMBER_SIZE]], i64 0
+// CHECK-NEXT:    tail call void @init(ptr noundef nonnull [[ARRAY]], i64 noundef [[TMP1]]) #[[ATTR2]]
 // CHECK-NEXT:    ret void
 //
 void test2(struct bucket2 *foo) {
         init(foo->growable.array);
 }
 //.
-// CHECK: [[TBAA2]] = !{[[META3:![0-9]+]], [[META7:![0-9]+]], i64 8}
+// CHECK: [[_ZTS8VARIABLEPTR_TBAA2]] = !{[[META3:![0-9]+]], [[META7:![0-9]+]], i64 8}
 // CHECK: [[META3]] = !{!"bucket", [[META4:![0-9]+]], i64 0, [[META7]], i64 8, [[META4]], i64 16}
 // CHECK: [[META4]] = !{!"int", [[META5:![0-9]+]], i64 0}
 // CHECK: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0}
diff --git a/clang/test/CodeGen/attr-counted-by.c b/clang/test/CodeGen/attr-counted-by.c
index cb23efd..9675fe2 100644
--- a/clang/test/CodeGen/attr-counted-by.c
+++ b/clang/test/CodeGen/attr-counted-by.c
@@ -1,4 +1,4 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 3
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
 // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -DCOUNTED_BY -O2 -Wall -fsanitize=array-bounds,object-size,local-bounds -fstrict-flex-arrays=3 -emit-llvm -o - %s | FileCheck --check-prefix=SANITIZE-WITH-ATTR %s
 // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -DCOUNTED_BY -O2 -Wall -fstrict-flex-arrays=3 -emit-llvm -o - %s | FileCheck --check-prefix=NO-SANITIZE-WITH-ATTR %s
 // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -Wall -fsanitize=array-bounds,object-size,local-bounds -fstrict-flex-arrays=3 -emit-llvm -o - %s | FileCheck --check-prefix=SANITIZE-WITHOUT-ATTR %s
@@ -60,47 +60,47 @@ struct anon_struct {
 
 // SANITIZE-WITH-ATTR-LABEL: define dso_local void @test1(
 // SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]], i32 noundef [[VAL:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
-// SANITIZE-WITH-ATTR-NEXT:  entry:
+// SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
 // SANITIZE-WITH-ATTR-NEXT:    [[DOTCOUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
 // SANITIZE-WITH-ATTR-NEXT:    [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOTCOUNTED_BY_GEP]], align 4
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META2:![0-9]+]]
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP1]], label [[CONT3:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3:![0-9]+]], !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       handler.out_of_bounds:
+// SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP1]], label %[[CONT3:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3:![0-9]+]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR:       [[HANDLER_OUT_OF_BOUNDS]]:
 // SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB1:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8:[0-9]+]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       cont3:
+// SANITIZE-WITH-ATTR:       [[CONT3]]:
 // SANITIZE-WITH-ATTR-NEXT:    [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12
 // SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAY]], i64 [[IDXPROM]]
-// SANITIZE-WITH-ATTR-NEXT:    store i32 [[VAL]], ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA4:![0-9]+]]
+// SANITIZE-WITH-ATTR-NEXT:    store i32 [[VAL]], ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA4:![0-9]+]]
 // SANITIZE-WITH-ATTR-NEXT:    ret void
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local void @test1(
 // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef writeonly captures(none) [[P:%.*]], i32 noundef [[INDEX:%.*]], i32 noundef [[VAL:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
-// NO-SANITIZE-WITH-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[ARRAY]], i64 [[IDXPROM]]
-// NO-SANITIZE-WITH-ATTR-NEXT:    store i32 [[VAL]], ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2:![0-9]+]]
+// NO-SANITIZE-WITH-ATTR-NEXT:    store i32 [[VAL]], ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2:![0-9]+]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    ret void
 //
 // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test1(
 // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]], i32 noundef [[VAL:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
-// SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[ARRAY]], i64 [[IDXPROM]]
-// SANITIZE-WITHOUT-ATTR-NEXT:    store i32 [[VAL]], ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2:![0-9]+]]
+// SANITIZE-WITHOUT-ATTR-NEXT:    store i32 [[VAL]], ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2:![0-9]+]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    ret void
 //
 // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test1(
 // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef writeonly captures(none) [[P:%.*]], i32 noundef [[INDEX:%.*]], i32 noundef [[VAL:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[ARRAY]], i64 [[IDXPROM]]
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:    store i32 [[VAL]], ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2:![0-9]+]]
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:    store i32 [[VAL]], ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2:![0-9]+]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    ret void
 //
 void test1(struct annotated *p, int index, int val) {
@@ -109,49 +109,49 @@ void test1(struct annotated *p, int index, int val) {
 
 // SANITIZE-WITH-ATTR-LABEL: define dso_local void @test2(
 // SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]], i64 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITH-ATTR-NEXT:  entry:
+// SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
 // SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = zext i32 [[COUNTED_BY_LOAD]] to i64, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[INDEX]], [[TMP0]], !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP1]], label [[CONT6:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       handler.out_of_bounds:
+// SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP1]], label %[[CONT6:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR:       [[HANDLER_OUT_OF_BOUNDS]]:
 // SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB3:[0-9]+]], i64 [[INDEX]]) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       cont6:
+// SANITIZE-WITH-ATTR:       [[CONT6]]:
 // SANITIZE-WITH-ATTR-NEXT:    [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12
 // SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAY]], i64 [[INDEX]]
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP2:%.*]] = tail call i32 @llvm.smax.i32(i32 [[COUNTED_BY_LOAD]], i32 0)
 // SANITIZE-WITH-ATTR-NEXT:    [[CONV:%.*]] = shl i32 [[TMP2]], 2
-// SANITIZE-WITH-ATTR-NEXT:    store i32 [[CONV]], ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA4]]
+// SANITIZE-WITH-ATTR-NEXT:    store i32 [[CONV]], ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA4]]
 // SANITIZE-WITH-ATTR-NEXT:    ret void
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local void @test2(
 // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef captures(none) [[P:%.*]], i64 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] {
-// NO-SANITIZE-WITH-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.smax.i32(i32 [[COUNTED_BY_LOAD]], i32 0)
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[CONV:%.*]] = shl i32 [[TMP0]], 2
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAY]], i64 [[INDEX]]
-// NO-SANITIZE-WITH-ATTR-NEXT:    store i32 [[CONV]], ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]]
+// NO-SANITIZE-WITH-ATTR-NEXT:    store i32 [[CONV]], ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    ret void
 //
 // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test2(
 // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i64 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAY]], i64 [[INDEX]]
-// SANITIZE-WITHOUT-ATTR-NEXT:    store i32 -1, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]]
+// SANITIZE-WITHOUT-ATTR-NEXT:    store i32 -1, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    ret void
 //
 // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test2(
 // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i64 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAY]], i64 [[INDEX]]
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:    store i32 -1, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]]
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:    store i32 -1, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    ret void
 //
 void test2(struct annotated *p, size_t index) {
@@ -160,7 +160,7 @@ void test2(struct annotated *p, size_t index) {
 
 // SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 -8589934592, 8589934589) i64 @test2_bdos(
 // SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITH-ATTR-NEXT:  entry:
+// SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
 // SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4
 // SANITIZE-WITH-ATTR-NEXT:    [[COUNT:%.*]] = sext i32 [[COUNTED_BY_LOAD]] to i64
@@ -171,7 +171,7 @@ void test2(struct annotated *p, size_t index) {
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 -8589934592, 8589934589) i64 @test2_bdos(
 // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] {
-// NO-SANITIZE-WITH-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[COUNT:%.*]] = sext i32 [[COUNTED_BY_LOAD]] to i64
@@ -182,12 +182,12 @@ void test2(struct annotated *p, size_t index) {
 //
 // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test2_bdos(
 // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    ret i64 -1
 //
 // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test2_bdos(
 // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] {
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    ret i64 -1
 //
 size_t test2_bdos(struct annotated *p) {
@@ -196,7 +196,7 @@ size_t test2_bdos(struct annotated *p) {
 
 // SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 -8589934592, 8589934589) i64 @test2_bdos_cast(
 // SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITH-ATTR-NEXT:  entry:
+// SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
 // SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4
 // SANITIZE-WITH-ATTR-NEXT:    [[COUNT:%.*]] = sext i32 [[COUNTED_BY_LOAD]] to i64
@@ -207,7 +207,7 @@ size_t test2_bdos(struct annotated *p) {
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 -8589934592, 8589934589) i64 @test2_bdos_cast(
 // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR2]] {
-// NO-SANITIZE-WITH-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[COUNT:%.*]] = sext i32 [[COUNTED_BY_LOAD]] to i64
@@ -218,12 +218,12 @@ size_t test2_bdos(struct annotated *p) {
 //
 // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test2_bdos_cast(
 // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    ret i64 -1
 //
 // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test2_bdos_cast(
 // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR1]] {
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    ret i64 -1
 //
 size_t test2_bdos_cast(struct annotated *p) {
@@ -232,43 +232,43 @@ size_t test2_bdos_cast(struct annotated *p) {
 
 // SANITIZE-WITH-ATTR-LABEL: define dso_local void @test3(
 // SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]], i64 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITH-ATTR-NEXT:  entry:
+// SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITH-ATTR-NEXT:    [[DOTCOUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
 // SANITIZE-WITH-ATTR-NEXT:    [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOTCOUNTED_BY_GEP]], align 4
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[INDEX]], [[TMP0]], !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP1]], label [[CONT3:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       handler.out_of_bounds:
+// SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP1]], label %[[CONT3:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR:       [[HANDLER_OUT_OF_BOUNDS]]:
 // SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB4:[0-9]+]], i64 [[INDEX]]) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       cont3:
+// SANITIZE-WITH-ATTR:       [[CONT3]]:
 // SANITIZE-WITH-ATTR-NEXT:    [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12
 // SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAY]], i64 [[INDEX]]
-// SANITIZE-WITH-ATTR-NEXT:    store i32 -1, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA4]]
+// SANITIZE-WITH-ATTR-NEXT:    store i32 -1, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA4]]
 // SANITIZE-WITH-ATTR-NEXT:    ret void
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local void @test3(
 // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]], i64 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// NO-SANITIZE-WITH-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAY]], i64 [[INDEX]]
-// NO-SANITIZE-WITH-ATTR-NEXT:    store i32 -1, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]]
+// NO-SANITIZE-WITH-ATTR-NEXT:    store i32 -1, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    ret void
 //
 // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test3(
 // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i64 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAY]], i64 [[INDEX]]
-// SANITIZE-WITHOUT-ATTR-NEXT:    store i32 -1, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]]
+// SANITIZE-WITHOUT-ATTR-NEXT:    store i32 -1, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    ret void
 //
 // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test3(
 // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i64 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAY]], i64 [[INDEX]]
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:    store i32 -1, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]]
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:    store i32 -1, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    ret void
 //
 void test3(struct annotated *p, size_t index) {
@@ -279,22 +279,22 @@ void test3(struct annotated *p, size_t index) {
 
 // SANITIZE-WITH-ATTR-LABEL: define dso_local i64 @test3_bdos(
 // SANITIZE-WITH-ATTR-SAME: ptr noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] {
-// SANITIZE-WITH-ATTR-NEXT:  entry:
+// SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITH-ATTR-NEXT:    ret i64 -1
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local i64 @test3_bdos(
 // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR3:[0-9]+]] {
-// NO-SANITIZE-WITH-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    ret i64 -1
 //
 // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test3_bdos(
 // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] {
-// SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    ret i64 -1
 //
 // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test3_bdos(
 // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR1]] {
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    ret i64 -1
 //
 size_t test3_bdos(struct annotated *p) {
@@ -303,22 +303,22 @@ size_t test3_bdos(struct annotated *p) {
 
 // SANITIZE-WITH-ATTR-LABEL: define dso_local i64 @test3_bdos_cast(
 // SANITIZE-WITH-ATTR-SAME: ptr noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR2]] {
-// SANITIZE-WITH-ATTR-NEXT:  entry:
+// SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITH-ATTR-NEXT:    ret i64 -1
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local i64 @test3_bdos_cast(
 // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR3]] {
-// NO-SANITIZE-WITH-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    ret i64 -1
 //
 // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test3_bdos_cast(
 // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR2]] {
-// SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    ret i64 -1
 //
 // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test3_bdos_cast(
 // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR1]] {
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    ret i64 -1
 //
 size_t test3_bdos_cast(struct annotated *p) {
@@ -327,68 +327,68 @@ size_t test3_bdos_cast(struct annotated *p) {
 
 // SANITIZE-WITH-ATTR-LABEL: define dso_local void @test4(
 // SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]], i32 noundef [[FAM_IDX:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITH-ATTR-NEXT:  entry:
+// SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITH-ATTR-NEXT:    [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12
 // SANITIZE-WITH-ATTR-NEXT:    [[DOTCOUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
 // SANITIZE-WITH-ATTR-NEXT:    [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOTCOUNTED_BY_GEP]], align 4
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = icmp ugt i32 [[DOTCOUNTED_BY_LOAD]], 2
-// SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP1]], label [[CONT1:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       handler.out_of_bounds:
+// SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP1]], label %[[CONT1:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR:       [[HANDLER_OUT_OF_BOUNDS]]:
 // SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB5:[0-9]+]], i64 3) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       cont1:
+// SANITIZE-WITH-ATTR:       [[CONT1]]:
 // SANITIZE-WITH-ATTR-NEXT:    [[FLEXIBLE_ARRAY_MEMBER_SIZE:%.*]] = shl i32 [[DOTCOUNTED_BY_LOAD]], 2
 // SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP2:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP2]], label [[CONT12:%.*]], label [[HANDLER_OUT_OF_BOUNDS8:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       handler.out_of_bounds8:
+// SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP2]], label %[[CONT12:.*]], label %[[HANDLER_OUT_OF_BOUNDS8:.*]], !prof [[PROF3]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR:       [[HANDLER_OUT_OF_BOUNDS8]]:
 // SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB6:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       cont12:
+// SANITIZE-WITH-ATTR:       [[CONT12]]:
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP3:%.*]] = icmp sgt i32 [[DOTCOUNTED_BY_LOAD]], 2
 // SANITIZE-WITH-ATTR-NEXT:    [[RESULT:%.*]] = add i32 [[FLEXIBLE_ARRAY_MEMBER_SIZE]], 244
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP4:%.*]] = and i32 [[RESULT]], 252
 // SANITIZE-WITH-ATTR-NEXT:    [[CONV2:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 0
 // SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAY]], i64 [[IDXPROM]]
-// SANITIZE-WITH-ATTR-NEXT:    store i32 [[CONV2]], ptr [[ARRAYIDX10]], align 4, !tbaa [[TBAA4]]
+// SANITIZE-WITH-ATTR-NEXT:    store i32 [[CONV2]], ptr [[ARRAYIDX10]], align 4, !tbaa [[INT_TBAA4]]
 // SANITIZE-WITH-ATTR-NEXT:    [[DOTNOT81:%.*]] = icmp eq i32 [[DOTCOUNTED_BY_LOAD]], 3
-// SANITIZE-WITH-ATTR-NEXT:    br i1 [[DOTNOT81]], label [[HANDLER_OUT_OF_BOUNDS18:%.*]], label [[CONT19:%.*]], !prof [[PROF8:![0-9]+]], !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       handler.out_of_bounds18:
+// SANITIZE-WITH-ATTR-NEXT:    br i1 [[DOTNOT81]], label %[[HANDLER_OUT_OF_BOUNDS18:.*]], label %[[CONT19:.*]], !prof [[PROF8:![0-9]+]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR:       [[HANDLER_OUT_OF_BOUNDS18]]:
 // SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB7:[0-9]+]], i64 4) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       cont19:
+// SANITIZE-WITH-ATTR:       [[CONT19]]:
 // SANITIZE-WITH-ATTR-NEXT:    [[ADD:%.*]] = add nsw i32 [[INDEX]], 1
 // SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM31:%.*]] = sext i32 [[ADD]] to i64
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP5:%.*]] = icmp ult i64 [[IDXPROM31]], [[TMP0]], !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP5]], label [[CONT38:%.*]], label [[HANDLER_OUT_OF_BOUNDS34:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       handler.out_of_bounds34:
+// SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP5]], label %[[CONT38:.*]], label %[[HANDLER_OUT_OF_BOUNDS34:.*]], !prof [[PROF3]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR:       [[HANDLER_OUT_OF_BOUNDS34]]:
 // SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB8:[0-9]+]], i64 [[IDXPROM31]]) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       cont38:
+// SANITIZE-WITH-ATTR:       [[CONT38]]:
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP6:%.*]] = icmp sgt i32 [[DOTCOUNTED_BY_LOAD]], 3
 // SANITIZE-WITH-ATTR-NEXT:    [[RESULT25:%.*]] = add i32 [[FLEXIBLE_ARRAY_MEMBER_SIZE]], 240
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP7:%.*]] = and i32 [[RESULT25]], 252
 // SANITIZE-WITH-ATTR-NEXT:    [[CONV27:%.*]] = select i1 [[TMP6]], i32 [[TMP7]], i32 0
 // SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX36:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAY]], i64 [[IDXPROM31]]
-// SANITIZE-WITH-ATTR-NEXT:    store i32 [[CONV27]], ptr [[ARRAYIDX36]], align 4, !tbaa [[TBAA4]]
+// SANITIZE-WITH-ATTR-NEXT:    store i32 [[CONV27]], ptr [[ARRAYIDX36]], align 4, !tbaa [[INT_TBAA4]]
 // SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM42:%.*]] = sext i32 [[FAM_IDX]] to i64
 // SANITIZE-WITH-ATTR-NEXT:    [[DOTCOUNTED_BY_LOAD44:%.*]] = load i32, ptr [[DOTCOUNTED_BY_GEP]], align 4
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP8:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD44]] to i64, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    [[DOTNOT:%.*]] = icmp ugt i64 [[IDXPROM42]], [[TMP8]], !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR-NEXT:    br i1 [[DOTNOT]], label [[HANDLER_OUT_OF_BOUNDS45:%.*]], label [[CONT46:%.*]], !prof [[PROF8]], !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       handler.out_of_bounds45:
+// SANITIZE-WITH-ATTR-NEXT:    br i1 [[DOTNOT]], label %[[HANDLER_OUT_OF_BOUNDS45:.*]], label %[[CONT46:.*]], !prof [[PROF8]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR:       [[HANDLER_OUT_OF_BOUNDS45]]:
 // SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB9:[0-9]+]], i64 [[IDXPROM42]]) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       cont46:
+// SANITIZE-WITH-ATTR:       [[CONT46]]:
 // SANITIZE-WITH-ATTR-NEXT:    [[ADD59:%.*]] = add nsw i32 [[INDEX]], 2
 // SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM60:%.*]] = sext i32 [[ADD59]] to i64
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP9:%.*]] = icmp ult i64 [[IDXPROM60]], [[TMP8]], !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP9]], label [[CONT67:%.*]], label [[HANDLER_OUT_OF_BOUNDS63:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       handler.out_of_bounds63:
+// SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP9]], label %[[CONT67:.*]], label %[[HANDLER_OUT_OF_BOUNDS63:.*]], !prof [[PROF3]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR:       [[HANDLER_OUT_OF_BOUNDS63]]:
 // SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB10:[0-9]+]], i64 [[IDXPROM60]]) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       cont67:
+// SANITIZE-WITH-ATTR:       [[CONT67]]:
 // SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX65:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAY]], i64 [[IDXPROM60]]
 // SANITIZE-WITH-ATTR-NEXT:    [[COUNT50:%.*]] = sext i32 [[DOTCOUNTED_BY_LOAD44]] to i64
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP10:%.*]] = sub nsw i64 [[COUNT50]], [[IDXPROM42]]
@@ -396,12 +396,12 @@ size_t test3_bdos_cast(struct annotated *p) {
 // SANITIZE-WITH-ATTR-NEXT:    [[DOTTR:%.*]] = trunc nuw nsw i64 [[TMP11]] to i32
 // SANITIZE-WITH-ATTR-NEXT:    [[CONV54:%.*]] = shl i32 [[DOTTR]], 2
 // SANITIZE-WITH-ATTR-NEXT:    [[CONV55:%.*]] = and i32 [[CONV54]], 252
-// SANITIZE-WITH-ATTR-NEXT:    store i32 [[CONV55]], ptr [[ARRAYIDX65]], align 4, !tbaa [[TBAA4]]
+// SANITIZE-WITH-ATTR-NEXT:    store i32 [[CONV55]], ptr [[ARRAYIDX65]], align 4, !tbaa [[INT_TBAA4]]
 // SANITIZE-WITH-ATTR-NEXT:    ret void
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local void @test4(
 // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef captures(none) [[P:%.*]], i32 noundef [[INDEX:%.*]], i32 noundef [[FAM_IDX:%.*]]) local_unnamed_addr #[[ATTR1]] {
-// NO-SANITIZE-WITH-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4
@@ -412,7 +412,7 @@ size_t test3_bdos_cast(struct annotated *p) {
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[CONV1:%.*]] = select i1 [[TMP0]], i32 [[TMP1]], i32 0
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[ARRAY]], i64 [[IDXPROM]]
-// NO-SANITIZE-WITH-ATTR-NEXT:    store i32 [[CONV1]], ptr [[ARRAYIDX3]], align 4, !tbaa [[TBAA2]]
+// NO-SANITIZE-WITH-ATTR-NEXT:    store i32 [[CONV1]], ptr [[ARRAYIDX3]], align 4, !tbaa [[INT_TBAA2]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_LOAD7:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[FLEXIBLE_ARRAY_MEMBER_SIZE9:%.*]] = shl i32 [[COUNTED_BY_LOAD7]], 2
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[RESULT10:%.*]] = add i32 [[FLEXIBLE_ARRAY_MEMBER_SIZE9]], 240
@@ -420,7 +420,7 @@ size_t test3_bdos_cast(struct annotated *p) {
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP3:%.*]] = and i32 [[RESULT10]], 252
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[CONV12:%.*]] = select i1 [[TMP2]], i32 [[TMP3]], i32 0
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr i8, ptr [[ARRAYIDX3]], i64 4
-// NO-SANITIZE-WITH-ATTR-NEXT:    store i32 [[CONV12]], ptr [[ARRAYIDX15]], align 4, !tbaa [[TBAA2]]
+// NO-SANITIZE-WITH-ATTR-NEXT:    store i32 [[CONV12]], ptr [[ARRAYIDX15]], align 4, !tbaa [[INT_TBAA2]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM17:%.*]] = sext i32 [[FAM_IDX]] to i64
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_LOAD20:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[COUNT21:%.*]] = sext i32 [[COUNTED_BY_LOAD20]] to i64
@@ -433,33 +433,33 @@ size_t test3_bdos_cast(struct annotated *p) {
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP9:%.*]] = and i32 [[TMP8]], 252
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[CONV26:%.*]] = select i1 [[TMP7]], i32 [[TMP9]], i32 0
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX30:%.*]] = getelementptr i8, ptr [[ARRAYIDX3]], i64 8
-// NO-SANITIZE-WITH-ATTR-NEXT:    store i32 [[CONV26]], ptr [[ARRAYIDX30]], align 4, !tbaa [[TBAA2]]
+// NO-SANITIZE-WITH-ATTR-NEXT:    store i32 [[CONV26]], ptr [[ARRAYIDX30]], align 4, !tbaa [[INT_TBAA2]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    ret void
 //
 // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test4(
 // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]], i32 noundef [[FAM_IDX:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr i32, ptr [[ARRAY]], i64 [[IDXPROM]]
-// SANITIZE-WITHOUT-ATTR-NEXT:    store i32 255, ptr [[ARRAYIDX5]], align 4, !tbaa [[TBAA2]]
+// SANITIZE-WITHOUT-ATTR-NEXT:    store i32 255, ptr [[ARRAYIDX5]], align 4, !tbaa [[INT_TBAA2]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAYIDX18:%.*]] = getelementptr i8, ptr [[ARRAYIDX5]], i64 4
-// SANITIZE-WITHOUT-ATTR-NEXT:    store i32 255, ptr [[ARRAYIDX18]], align 4, !tbaa [[TBAA2]]
+// SANITIZE-WITHOUT-ATTR-NEXT:    store i32 255, ptr [[ARRAYIDX18]], align 4, !tbaa [[INT_TBAA2]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAYIDX33:%.*]] = getelementptr i8, ptr [[ARRAYIDX5]], i64 8
-// SANITIZE-WITHOUT-ATTR-NEXT:    store i32 255, ptr [[ARRAYIDX33]], align 4, !tbaa [[TBAA2]]
+// SANITIZE-WITHOUT-ATTR-NEXT:    store i32 255, ptr [[ARRAYIDX33]], align 4, !tbaa [[INT_TBAA2]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    ret void
 //
 // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test4(
 // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]], i32 noundef [[FAM_IDX:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[ARRAY]], i64 [[IDXPROM]]
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:    store i32 255, ptr [[ARRAYIDX3]], align 4, !tbaa [[TBAA2]]
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:    store i32 255, ptr [[ARRAYIDX3]], align 4, !tbaa [[INT_TBAA2]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr i8, ptr [[ARRAYIDX3]], i64 4
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:    store i32 255, ptr [[ARRAYIDX10]], align 4, !tbaa [[TBAA2]]
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:    store i32 255, ptr [[ARRAYIDX10]], align 4, !tbaa [[INT_TBAA2]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAYIDX19:%.*]] = getelementptr i8, ptr [[ARRAYIDX3]], i64 8
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:    store i32 255, ptr [[ARRAYIDX19]], align 4, !tbaa [[TBAA2]]
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:    store i32 255, ptr [[ARRAYIDX19]], align 4, !tbaa [[INT_TBAA2]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    ret void
 //
 void test4(struct annotated *p, int index, int fam_idx) {
@@ -471,17 +471,17 @@ void test4(struct annotated *p, int index, int fam_idx) {
 
 // SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 0, 8589934589) i64 @test4_bdos(
 // SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITH-ATTR-NEXT:  entry:
+// SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
 // SANITIZE-WITH-ATTR-NEXT:    [[DOTCOUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
 // SANITIZE-WITH-ATTR-NEXT:    [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOTCOUNTED_BY_GEP]], align 4
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    [[DOTNOT:%.*]] = icmp ugt i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR-NEXT:    br i1 [[DOTNOT]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], label [[CONT1:%.*]], !prof [[PROF8]], !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       handler.out_of_bounds:
+// SANITIZE-WITH-ATTR-NEXT:    br i1 [[DOTNOT]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], label %[[CONT1:.*]], !prof [[PROF8]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR:       [[HANDLER_OUT_OF_BOUNDS]]:
 // SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB11:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       cont1:
+// SANITIZE-WITH-ATTR:       [[CONT1]]:
 // SANITIZE-WITH-ATTR-NEXT:    [[COUNT:%.*]] = sext i32 [[DOTCOUNTED_BY_LOAD]] to i64
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = sub nsw i64 [[COUNT]], [[IDXPROM]]
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP2:%.*]] = tail call i64 @llvm.smax.i64(i64 [[TMP1]], i64 0)
@@ -490,7 +490,7 @@ void test4(struct annotated *p, int index, int fam_idx) {
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 -17179869180, 17179869181) i64 @test4_bdos(
 // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR2]] {
-// NO-SANITIZE-WITH-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4
@@ -505,12 +505,12 @@ void test4(struct annotated *p, int index, int fam_idx) {
 //
 // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test4_bdos(
 // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    ret i64 -1
 //
 // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test4_bdos(
 // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readnone [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR1]] {
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    ret i64 -1
 //
 size_t test4_bdos(struct annotated *p, int index) {
@@ -519,7 +519,7 @@ size_t test4_bdos(struct annotated *p, int index) {
 
 // SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 -12884901886, 12884901885) i64 @test4_bdos_cast1(
 // SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITH-ATTR-NEXT:  entry:
+// SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
 // SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
 // SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4
@@ -535,7 +535,7 @@ size_t test4_bdos(struct annotated *p, int index) {
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 -12884901886, 12884901885) i64 @test4_bdos_cast1(
 // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR2]] {
-// NO-SANITIZE-WITH-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4
@@ -551,12 +551,12 @@ size_t test4_bdos(struct annotated *p, int index) {
 //
 // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test4_bdos_cast1(
 // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    ret i64 -1
 //
 // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test4_bdos_cast1(
 // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readnone [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR1]] {
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    ret i64 -1
 //
 size_t test4_bdos_cast1(struct annotated *p, int index) {
@@ -565,7 +565,7 @@ size_t test4_bdos_cast1(struct annotated *p, int index) {
 
 // SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 -10737418239, 10737418237) i64 @test4_bdos_cast2(
 // SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITH-ATTR-NEXT:  entry:
+// SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
 // SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
 // SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4
@@ -580,7 +580,7 @@ size_t test4_bdos_cast1(struct annotated *p, int index) {
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 -10737418239, 10737418237) i64 @test4_bdos_cast2(
 // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR2]] {
-// NO-SANITIZE-WITH-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4
@@ -595,12 +595,12 @@ size_t test4_bdos_cast1(struct annotated *p, int index) {
 //
 // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test4_bdos_cast2(
 // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    ret i64 -1
 //
 // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test4_bdos_cast2(
 // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readnone [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR1]] {
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    ret i64 -1
 //
 size_t test4_bdos_cast2(struct annotated *p, int index) {
@@ -609,46 +609,46 @@ size_t test4_bdos_cast2(struct annotated *p, int index) {
 
 // SANITIZE-WITH-ATTR-LABEL: define dso_local void @test5(
 // SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITH-ATTR-NEXT:  entry:
+// SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
 // SANITIZE-WITH-ATTR-NEXT:    [[DOTCOUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
 // SANITIZE-WITH-ATTR-NEXT:    [[DOTCOUNTED_BY_LOAD:%.*]] = load i64, ptr [[DOTCOUNTED_BY_GEP]], align 4
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = icmp ugt i64 [[DOTCOUNTED_BY_LOAD]], [[IDXPROM]], !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP0]], label [[CONT3:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       handler.out_of_bounds:
+// SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP0]], label %[[CONT3:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR:       [[HANDLER_OUT_OF_BOUNDS]]:
 // SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB12:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       cont3:
+// SANITIZE-WITH-ATTR:       [[CONT3]]:
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 16
 // SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM]]
-// SANITIZE-WITH-ATTR-NEXT:    store i32 -1, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA4]]
+// SANITIZE-WITH-ATTR-NEXT:    store i32 -1, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA4]]
 // SANITIZE-WITH-ATTR-NEXT:    ret void
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local void @test5(
 // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// NO-SANITIZE-WITH-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 16
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 [[IDXPROM]]
-// NO-SANITIZE-WITH-ATTR-NEXT:    store i32 -1, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]]
+// NO-SANITIZE-WITH-ATTR-NEXT:    store i32 -1, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    ret void
 //
 // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test5(
 // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 16
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 [[IDXPROM]]
-// SANITIZE-WITHOUT-ATTR-NEXT:    store i32 -1, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]]
+// SANITIZE-WITHOUT-ATTR-NEXT:    store i32 -1, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    ret void
 //
 // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test5(
 // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 16
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 [[IDXPROM]]
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:    store i32 -1, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]]
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:    store i32 -1, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    ret void
 //
 void test5(struct anon_struct *p, int index) {
@@ -657,22 +657,22 @@ void test5(struct anon_struct *p, int index) {
 
 // SANITIZE-WITH-ATTR-LABEL: define dso_local i64 @test5_bdos(
 // SANITIZE-WITH-ATTR-SAME: ptr noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR2]] {
-// SANITIZE-WITH-ATTR-NEXT:  entry:
+// SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITH-ATTR-NEXT:    ret i64 -1
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local i64 @test5_bdos(
 // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR3]] {
-// NO-SANITIZE-WITH-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    ret i64 -1
 //
 // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test5_bdos(
 // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR2]] {
-// SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    ret i64 -1
 //
 // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test5_bdos(
 // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR1]] {
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    ret i64 -1
 //
 size_t test5_bdos(struct anon_struct *p) {
@@ -681,27 +681,27 @@ size_t test5_bdos(struct anon_struct *p) {
 
 // SANITIZE-WITH-ATTR-LABEL: define dso_local void @test6(
 // SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITH-ATTR-NEXT:  entry:
+// SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
 // SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_LOAD:%.*]] = load i64, ptr [[COUNTED_BY_GEP]], align 4
 // SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = icmp ugt i64 [[COUNTED_BY_LOAD]], [[IDXPROM]], !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP0]], label [[CONT6:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       handler.out_of_bounds:
+// SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP0]], label %[[CONT6:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR:       [[HANDLER_OUT_OF_BOUNDS]]:
 // SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB13:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       cont6:
+// SANITIZE-WITH-ATTR:       [[CONT6]]:
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 16
 // SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM]]
 // SANITIZE-WITH-ATTR-NEXT:    [[FLEXIBLE_ARRAY_MEMBER_SIZE:%.*]] = shl nuw i64 [[COUNTED_BY_LOAD]], 2
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP2:%.*]] = tail call i64 @llvm.smax.i64(i64 [[FLEXIBLE_ARRAY_MEMBER_SIZE]], i64 0)
 // SANITIZE-WITH-ATTR-NEXT:    [[CONV:%.*]] = trunc i64 [[TMP2]] to i32
-// SANITIZE-WITH-ATTR-NEXT:    store i32 [[CONV]], ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA4]]
+// SANITIZE-WITH-ATTR-NEXT:    store i32 [[CONV]], ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA4]]
 // SANITIZE-WITH-ATTR-NEXT:    ret void
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local void @test6(
 // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef captures(none) [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR1]] {
-// NO-SANITIZE-WITH-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 16
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_LOAD:%.*]] = load i64, ptr [[COUNTED_BY_GEP]], align 4
@@ -710,25 +710,25 @@ size_t test5_bdos(struct anon_struct *p) {
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[CONV:%.*]] = trunc i64 [[TMP1]] to i32
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 [[IDXPROM]]
-// NO-SANITIZE-WITH-ATTR-NEXT:    store i32 [[CONV]], ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]]
+// NO-SANITIZE-WITH-ATTR-NEXT:    store i32 [[CONV]], ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    ret void
 //
 // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test6(
 // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 16
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 [[IDXPROM]]
-// SANITIZE-WITHOUT-ATTR-NEXT:    store i32 -1, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]]
+// SANITIZE-WITHOUT-ATTR-NEXT:    store i32 -1, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    ret void
 //
 // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test6(
 // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 16
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 [[IDXPROM]]
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:    store i32 -1, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]]
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:    store i32 -1, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    ret void
 //
 void test6(struct anon_struct *p, int index) {
@@ -737,7 +737,7 @@ void test6(struct anon_struct *p, int index) {
 
 // SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 0, -9223372036854775808) i64 @test6_bdos(
 // SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITH-ATTR-NEXT:  entry:
+// SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
 // SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_LOAD:%.*]] = load i64, ptr [[COUNTED_BY_GEP]], align 4
 // SANITIZE-WITH-ATTR-NEXT:    [[FLEXIBLE_ARRAY_MEMBER_SIZE:%.*]] = shl nuw i64 [[COUNTED_BY_LOAD]], 2
@@ -746,7 +746,7 @@ void test6(struct anon_struct *p, int index) {
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 0, -9223372036854775808) i64 @test6_bdos(
 // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR2]] {
-// NO-SANITIZE-WITH-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_LOAD:%.*]] = load i64, ptr [[COUNTED_BY_GEP]], align 4
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[FLEXIBLE_ARRAY_MEMBER_SIZE:%.*]] = shl nuw i64 [[COUNTED_BY_LOAD]], 2
@@ -755,12 +755,12 @@ void test6(struct anon_struct *p, int index) {
 //
 // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test6_bdos(
 // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    ret i64 -1
 //
 // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test6_bdos(
 // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR1]] {
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    ret i64 -1
 //
 size_t test6_bdos(struct anon_struct *p) {
@@ -769,47 +769,47 @@ size_t test6_bdos(struct anon_struct *p) {
 
 // SANITIZE-WITH-ATTR-LABEL: define dso_local void @test7(
 // SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITH-ATTR-NEXT:  entry:
+// SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
 // SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
 // SANITIZE-WITH-ATTR-NEXT:    [[DOTCOUNTED_BY_LOAD:%.*]] = load i8, ptr [[TMP0]], align 4
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = zext i8 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP2:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP1]], !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP2]], label [[CONT7:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       handler.out_of_bounds:
+// SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP2]], label %[[CONT7:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR:       [[HANDLER_OUT_OF_BOUNDS]]:
 // SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB15:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       cont7:
+// SANITIZE-WITH-ATTR:       [[CONT7]]:
 // SANITIZE-WITH-ATTR-NEXT:    [[INTS:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 9
 // SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[INTS]], i64 [[IDXPROM]]
-// SANITIZE-WITH-ATTR-NEXT:    store i8 -1, ptr [[ARRAYIDX]], align 1, !tbaa [[TBAA9:![0-9]+]]
+// SANITIZE-WITH-ATTR-NEXT:    store i8 -1, ptr [[ARRAYIDX]], align 1, !tbaa [[CHAR_TBAA9:![0-9]+]]
 // SANITIZE-WITH-ATTR-NEXT:    ret void
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local void @test7(
 // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// NO-SANITIZE-WITH-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[INTS:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 9
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[INTS]], i64 [[IDXPROM]]
-// NO-SANITIZE-WITH-ATTR-NEXT:    store i8 -1, ptr [[ARRAYIDX]], align 1, !tbaa [[TBAA6:![0-9]+]]
+// NO-SANITIZE-WITH-ATTR-NEXT:    store i8 -1, ptr [[ARRAYIDX]], align 1, !tbaa [[CHAR_TBAA6:![0-9]+]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    ret void
 //
 // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test7(
 // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[INTS:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 9
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[INTS]], i64 [[IDXPROM]]
-// SANITIZE-WITHOUT-ATTR-NEXT:    store i8 -1, ptr [[ARRAYIDX]], align 1, !tbaa [[TBAA6:![0-9]+]]
+// SANITIZE-WITHOUT-ATTR-NEXT:    store i8 -1, ptr [[ARRAYIDX]], align 1, !tbaa [[CHAR_TBAA6:![0-9]+]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    ret void
 //
 // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test7(
 // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[INTS:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 9
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[INTS]], i64 [[IDXPROM]]
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:    store i8 -1, ptr [[ARRAYIDX]], align 1, !tbaa [[TBAA6:![0-9]+]]
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:    store i8 -1, ptr [[ARRAYIDX]], align 1, !tbaa [[CHAR_TBAA6:![0-9]+]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    ret void
 //
 void test7(struct union_of_fams *p, int index) {
@@ -818,22 +818,22 @@ void test7(struct union_of_fams *p, int index) {
 
 // SANITIZE-WITH-ATTR-LABEL: define dso_local i64 @test7_bdos(
 // SANITIZE-WITH-ATTR-SAME: ptr noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR2]] {
-// SANITIZE-WITH-ATTR-NEXT:  entry:
+// SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITH-ATTR-NEXT:    ret i64 -1
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local i64 @test7_bdos(
 // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR3]] {
-// NO-SANITIZE-WITH-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    ret i64 -1
 //
 // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test7_bdos(
 // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR2]] {
-// SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    ret i64 -1
 //
 // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test7_bdos(
 // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR1]] {
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    ret i64 -1
 //
 size_t test7_bdos(struct union_of_fams *p) {
@@ -842,49 +842,49 @@ size_t test7_bdos(struct union_of_fams *p) {
 
 // SANITIZE-WITH-ATTR-LABEL: define dso_local void @test8(
 // SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITH-ATTR-NEXT:  entry:
+// SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
 // SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_LOAD:%.*]] = load i8, ptr [[TMP0]], align 4
 // SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = zext i8 [[COUNTED_BY_LOAD]] to i64, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP2:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP1]], !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP2]], label [[CONT14:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       handler.out_of_bounds:
+// SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP2]], label %[[CONT14:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR:       [[HANDLER_OUT_OF_BOUNDS]]:
 // SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB16:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       cont14:
+// SANITIZE-WITH-ATTR:       [[CONT14]]:
 // SANITIZE-WITH-ATTR-NEXT:    [[INTS:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 9
 // SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[INTS]], i64 [[IDXPROM]]
-// SANITIZE-WITH-ATTR-NEXT:    store i8 [[COUNTED_BY_LOAD]], ptr [[ARRAYIDX]], align 1, !tbaa [[TBAA9]]
+// SANITIZE-WITH-ATTR-NEXT:    store i8 [[COUNTED_BY_LOAD]], ptr [[ARRAYIDX]], align 1, !tbaa [[CHAR_TBAA9]]
 // SANITIZE-WITH-ATTR-NEXT:    ret void
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local void @test8(
 // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef captures(none) [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR1]] {
-// NO-SANITIZE-WITH-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[INTS:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 9
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_LOAD:%.*]] = load i8, ptr [[TMP0]], align 4
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[INTS]], i64 [[IDXPROM]]
-// NO-SANITIZE-WITH-ATTR-NEXT:    store i8 [[COUNTED_BY_LOAD]], ptr [[ARRAYIDX]], align 1, !tbaa [[TBAA6]]
+// NO-SANITIZE-WITH-ATTR-NEXT:    store i8 [[COUNTED_BY_LOAD]], ptr [[ARRAYIDX]], align 1, !tbaa [[CHAR_TBAA6]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    ret void
 //
 // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test8(
 // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[INTS:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 9
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[INTS]], i64 [[IDXPROM]]
-// SANITIZE-WITHOUT-ATTR-NEXT:    store i8 -1, ptr [[ARRAYIDX]], align 1, !tbaa [[TBAA6]]
+// SANITIZE-WITHOUT-ATTR-NEXT:    store i8 -1, ptr [[ARRAYIDX]], align 1, !tbaa [[CHAR_TBAA6]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    ret void
 //
 // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test8(
 // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[INTS:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 9
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[INTS]], i64 [[IDXPROM]]
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:    store i8 -1, ptr [[ARRAYIDX]], align 1, !tbaa [[TBAA6]]
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:    store i8 -1, ptr [[ARRAYIDX]], align 1, !tbaa [[CHAR_TBAA6]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    ret void
 //
 void test8(struct union_of_fams *p, int index) {
@@ -893,7 +893,7 @@ void test8(struct union_of_fams *p, int index) {
 
 // SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 0, 256) i64 @test8_bdos(
 // SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITH-ATTR-NEXT:  entry:
+// SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
 // SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_LOAD:%.*]] = load i8, ptr [[TMP0]], align 4
 // SANITIZE-WITH-ATTR-NEXT:    [[COUNT:%.*]] = zext i8 [[COUNTED_BY_LOAD]] to i64
@@ -901,7 +901,7 @@ void test8(struct union_of_fams *p, int index) {
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 0, 256) i64 @test8_bdos(
 // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR2]] {
-// NO-SANITIZE-WITH-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_LOAD:%.*]] = load i8, ptr [[TMP0]], align 4
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[COUNT:%.*]] = zext i8 [[COUNTED_BY_LOAD]] to i64
@@ -909,12 +909,12 @@ void test8(struct union_of_fams *p, int index) {
 //
 // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test8_bdos(
 // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    ret i64 -1
 //
 // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test8_bdos(
 // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR1]] {
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    ret i64 -1
 //
 size_t test8_bdos(struct union_of_fams *p) {
@@ -923,47 +923,47 @@ size_t test8_bdos(struct union_of_fams *p) {
 
 // SANITIZE-WITH-ATTR-LABEL: define dso_local void @test9(
 // SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITH-ATTR-NEXT:  entry:
+// SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
 // SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
 // SANITIZE-WITH-ATTR-NEXT:    [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[TMP0]], align 4
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP2:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP1]], !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP2]], label [[CONT7:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       handler.out_of_bounds:
+// SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP2]], label %[[CONT7:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR:       [[HANDLER_OUT_OF_BOUNDS]]:
 // SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB18:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       cont7:
+// SANITIZE-WITH-ATTR:       [[CONT7]]:
 // SANITIZE-WITH-ATTR-NEXT:    [[BYTES:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12
 // SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[BYTES]], i64 [[IDXPROM]]
-// SANITIZE-WITH-ATTR-NEXT:    store i8 -1, ptr [[ARRAYIDX]], align 1, !tbaa [[TBAA9]]
+// SANITIZE-WITH-ATTR-NEXT:    store i8 -1, ptr [[ARRAYIDX]], align 1, !tbaa [[CHAR_TBAA9]]
 // SANITIZE-WITH-ATTR-NEXT:    ret void
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local void @test9(
 // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// NO-SANITIZE-WITH-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[BYTES:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[BYTES]], i64 [[IDXPROM]]
-// NO-SANITIZE-WITH-ATTR-NEXT:    store i8 -1, ptr [[ARRAYIDX]], align 1, !tbaa [[TBAA6]]
+// NO-SANITIZE-WITH-ATTR-NEXT:    store i8 -1, ptr [[ARRAYIDX]], align 1, !tbaa [[CHAR_TBAA6]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    ret void
 //
 // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test9(
 // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[BYTES:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[BYTES]], i64 [[IDXPROM]]
-// SANITIZE-WITHOUT-ATTR-NEXT:    store i8 -1, ptr [[ARRAYIDX]], align 1, !tbaa [[TBAA6]]
+// SANITIZE-WITHOUT-ATTR-NEXT:    store i8 -1, ptr [[ARRAYIDX]], align 1, !tbaa [[CHAR_TBAA6]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    ret void
 //
 // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test9(
 // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[BYTES:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[BYTES]], i64 [[IDXPROM]]
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:    store i8 -1, ptr [[ARRAYIDX]], align 1, !tbaa [[TBAA6]]
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:    store i8 -1, ptr [[ARRAYIDX]], align 1, !tbaa [[CHAR_TBAA6]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    ret void
 //
 void test9(struct union_of_fams *p, int index) {
@@ -972,22 +972,22 @@ void test9(struct union_of_fams *p, int index) {
 
 // SANITIZE-WITH-ATTR-LABEL: define dso_local i64 @test9_bdos(
 // SANITIZE-WITH-ATTR-SAME: ptr noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR2]] {
-// SANITIZE-WITH-ATTR-NEXT:  entry:
+// SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITH-ATTR-NEXT:    ret i64 -1
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local i64 @test9_bdos(
 // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR3]] {
-// NO-SANITIZE-WITH-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    ret i64 -1
 //
 // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test9_bdos(
 // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR2]] {
-// SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    ret i64 -1
 //
 // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test9_bdos(
 // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR1]] {
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    ret i64 -1
 //
 size_t test9_bdos(struct union_of_fams *p) {
@@ -996,27 +996,27 @@ size_t test9_bdos(struct union_of_fams *p) {
 
 // SANITIZE-WITH-ATTR-LABEL: define dso_local void @test10(
 // SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITH-ATTR-NEXT:  entry:
+// SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
 // SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[TMP0]], align 4
 // SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = zext i32 [[COUNTED_BY_LOAD]] to i64, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP2:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP1]], !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP2]], label [[CONT14:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       handler.out_of_bounds:
+// SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP2]], label %[[CONT14:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR:       [[HANDLER_OUT_OF_BOUNDS]]:
 // SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB19:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       cont14:
+// SANITIZE-WITH-ATTR:       [[CONT14]]:
 // SANITIZE-WITH-ATTR-NEXT:    [[BYTES:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12
 // SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[BYTES]], i64 [[IDXPROM]]
 // SANITIZE-WITH-ATTR-NEXT:    [[NARROW:%.*]] = tail call i32 @llvm.smax.i32(i32 [[COUNTED_BY_LOAD]], i32 0)
 // SANITIZE-WITH-ATTR-NEXT:    [[CONV:%.*]] = trunc i32 [[NARROW]] to i8
-// SANITIZE-WITH-ATTR-NEXT:    store i8 [[CONV]], ptr [[ARRAYIDX]], align 1, !tbaa [[TBAA9]]
+// SANITIZE-WITH-ATTR-NEXT:    store i8 [[CONV]], ptr [[ARRAYIDX]], align 1, !tbaa [[CHAR_TBAA9]]
 // SANITIZE-WITH-ATTR-NEXT:    ret void
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local void @test10(
 // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef captures(none) [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR1]] {
-// NO-SANITIZE-WITH-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[BYTES:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[TMP0]], align 4
@@ -1024,25 +1024,25 @@ size_t test9_bdos(struct union_of_fams *p) {
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[CONV:%.*]] = trunc i32 [[NARROW]] to i8
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[BYTES]], i64 [[IDXPROM]]
-// NO-SANITIZE-WITH-ATTR-NEXT:    store i8 [[CONV]], ptr [[ARRAYIDX]], align 1, !tbaa [[TBAA6]]
+// NO-SANITIZE-WITH-ATTR-NEXT:    store i8 [[CONV]], ptr [[ARRAYIDX]], align 1, !tbaa [[CHAR_TBAA6]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    ret void
 //
 // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test10(
 // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[BYTES:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[BYTES]], i64 [[IDXPROM]]
-// SANITIZE-WITHOUT-ATTR-NEXT:    store i8 -1, ptr [[ARRAYIDX]], align 1, !tbaa [[TBAA6]]
+// SANITIZE-WITHOUT-ATTR-NEXT:    store i8 -1, ptr [[ARRAYIDX]], align 1, !tbaa [[CHAR_TBAA6]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    ret void
 //
 // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test10(
 // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[BYTES:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[BYTES]], i64 [[IDXPROM]]
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:    store i8 -1, ptr [[ARRAYIDX]], align 1, !tbaa [[TBAA6]]
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:    store i8 -1, ptr [[ARRAYIDX]], align 1, !tbaa [[CHAR_TBAA6]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    ret void
 //
 void test10(struct union_of_fams *p, int index) {
@@ -1051,7 +1051,7 @@ void test10(struct union_of_fams *p, int index) {
 
 // SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 0, 2147483648) i64 @test10_bdos(
 // SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITH-ATTR-NEXT:  entry:
+// SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
 // SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[TMP0]], align 4
 // SANITIZE-WITH-ATTR-NEXT:    [[NARROW:%.*]] = tail call i32 @llvm.smax.i32(i32 [[COUNTED_BY_LOAD]], i32 0)
@@ -1060,7 +1060,7 @@ void test10(struct union_of_fams *p, int index) {
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 0, 2147483648) i64 @test10_bdos(
 // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR2]] {
-// NO-SANITIZE-WITH-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[TMP0]], align 4
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[NARROW:%.*]] = tail call i32 @llvm.smax.i32(i32 [[COUNTED_BY_LOAD]], i32 0)
@@ -1069,12 +1069,12 @@ void test10(struct union_of_fams *p, int index) {
 //
 // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test10_bdos(
 // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    ret i64 -1
 //
 // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test10_bdos(
 // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR1]] {
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    ret i64 -1
 //
 size_t test10_bdos(struct union_of_fams *p) {
@@ -1083,29 +1083,29 @@ size_t test10_bdos(struct union_of_fams *p) {
 
 // SANITIZE-WITH-ATTR-LABEL: define dso_local void @test11(
 // SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITH-ATTR-NEXT:  entry:
+// SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
 // SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4
 // SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = zext i32 [[COUNTED_BY_LOAD]] to i64, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP1]], label [[CONT6:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       handler.out_of_bounds:
+// SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP1]], label %[[CONT6:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR:       [[HANDLER_OUT_OF_BOUNDS]]:
 // SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB20:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       cont6:
+// SANITIZE-WITH-ATTR:       [[CONT6]]:
 // SANITIZE-WITH-ATTR-NEXT:    [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12
 // SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAY]], i64 [[IDXPROM]]
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP2:%.*]] = icmp sgt i32 [[COUNTED_BY_LOAD]], -3
 // SANITIZE-WITH-ATTR-NEXT:    [[FLEXIBLE_ARRAY_MEMBER_SIZE:%.*]] = shl i32 [[COUNTED_BY_LOAD]], 2
 // SANITIZE-WITH-ATTR-NEXT:    [[RESULT:%.*]] = add i32 [[FLEXIBLE_ARRAY_MEMBER_SIZE]], 8
 // SANITIZE-WITH-ATTR-NEXT:    [[CONV:%.*]] = select i1 [[TMP2]], i32 [[RESULT]], i32 0
-// SANITIZE-WITH-ATTR-NEXT:    store i32 [[CONV]], ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA4]]
+// SANITIZE-WITH-ATTR-NEXT:    store i32 [[CONV]], ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA4]]
 // SANITIZE-WITH-ATTR-NEXT:    ret void
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local void @test11(
 // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef captures(none) [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR1]] {
-// NO-SANITIZE-WITH-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[FLEXIBLE_ARRAY_MEMBER_SIZE:%.*]] = shl i32 [[COUNTED_BY_LOAD]], 2
@@ -1115,25 +1115,25 @@ size_t test10_bdos(struct union_of_fams *p) {
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[ARRAY]], i64 [[IDXPROM]]
-// NO-SANITIZE-WITH-ATTR-NEXT:    store i32 [[CONV]], ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]]
+// NO-SANITIZE-WITH-ATTR-NEXT:    store i32 [[CONV]], ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    ret void
 //
 // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test11(
 // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[ARRAY]], i64 [[IDXPROM]]
-// SANITIZE-WITHOUT-ATTR-NEXT:    store i32 -1, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]]
+// SANITIZE-WITHOUT-ATTR-NEXT:    store i32 -1, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    ret void
 //
 // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test11(
 // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[ARRAY]], i64 [[IDXPROM]]
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:    store i32 -1, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]]
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:    store i32 -1, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    ret void
 //
 void test11(struct annotated *p, int index) {
@@ -1142,7 +1142,7 @@ void test11(struct annotated *p, int index) {
 
 // SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 -8589934584, 8589934597) i64 @test11_bdos(
 // SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITH-ATTR-NEXT:  entry:
+// SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
 // SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4
 // SANITIZE-WITH-ATTR-NEXT:    [[COUNT1:%.*]] = sext i32 [[COUNTED_BY_LOAD]] to i64
@@ -1154,7 +1154,7 @@ void test11(struct annotated *p, int index) {
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 -8589934584, 8589934597) i64 @test11_bdos(
 // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR2]] {
-// NO-SANITIZE-WITH-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[COUNT1:%.*]] = sext i32 [[COUNTED_BY_LOAD]] to i64
@@ -1166,12 +1166,12 @@ void test11(struct annotated *p, int index) {
 //
 // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test11_bdos(
 // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    ret i64 -1
 //
 // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test11_bdos(
 // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR1]] {
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    ret i64 -1
 //
 size_t test11_bdos(struct annotated *p) {
@@ -1195,87 +1195,87 @@ int test12_a, test12_b;
 
 // SANITIZE-WITH-ATTR-LABEL: define dso_local noundef i32 @test12(
 // SANITIZE-WITH-ATTR-SAME: i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR3:[0-9]+]] {
-// SANITIZE-WITH-ATTR-NEXT:  entry:
+// SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITH-ATTR-NEXT:    [[BAZ:%.*]] = alloca [[STRUCT_HANG:%.*]], align 4
 // SANITIZE-WITH-ATTR-NEXT:    call void @llvm.lifetime.start.p0(ptr nonnull [[BAZ]]) #[[ATTR9:[0-9]+]]
 // SANITIZE-WITH-ATTR-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 4 dereferenceable(24) [[BAZ]], ptr noundef nonnull align 4 dereferenceable(24) @test12_bar, i64 24, i1 false), !tbaa.struct [[TBAA_STRUCT10:![0-9]+]]
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[INDEX]], 6
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = zext i32 [[INDEX]] to i64
-// SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP0]], label [[CONT:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       handler.out_of_bounds:
+// SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP0]], label %[[CONT:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR:       [[HANDLER_OUT_OF_BOUNDS]]:
 // SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB22:[0-9]+]], i64 [[TMP1]]) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       cont:
+// SANITIZE-WITH-ATTR:       [[CONT]]:
 // SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[BAZ]], i64 [[TMP1]]
-// SANITIZE-WITH-ATTR-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA4]]
-// SANITIZE-WITH-ATTR-NEXT:    store i32 [[TMP2]], ptr @test12_b, align 4, !tbaa [[TBAA4]]
+// SANITIZE-WITH-ATTR-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA4]]
+// SANITIZE-WITH-ATTR-NEXT:    store i32 [[TMP2]], ptr @test12_b, align 4, !tbaa [[INT_TBAA4]]
 // SANITIZE-WITH-ATTR-NEXT:    [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr @test12_foo, align 4
 // SANITIZE-WITH-ATTR-NEXT:    [[DOTNOT:%.*]] = icmp eq i32 [[DOTCOUNTED_BY_LOAD]], 0
-// SANITIZE-WITH-ATTR-NEXT:    br i1 [[DOTNOT]], label [[HANDLER_OUT_OF_BOUNDS4:%.*]], label [[HANDLER_TYPE_MISMATCH6:%.*]], !prof [[PROF8]], !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       handler.out_of_bounds4:
+// SANITIZE-WITH-ATTR-NEXT:    br i1 [[DOTNOT]], label %[[HANDLER_OUT_OF_BOUNDS4:.*]], label %[[HANDLER_TYPE_MISMATCH6:.*]], !prof [[PROF8]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR:       [[HANDLER_OUT_OF_BOUNDS4]]:
 // SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB24:[0-9]+]], i64 0) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       handler.type_mismatch6:
+// SANITIZE-WITH-ATTR:       [[HANDLER_TYPE_MISMATCH6]]:
 // SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_type_mismatch_v1_abort(ptr nonnull @[[GLOB25:[0-9]+]], i64 ptrtoint (ptr getelementptr inbounds nuw (i8, ptr @test12_foo, i64 4) to i64)) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local noundef i32 @test12(
 // NO-SANITIZE-WITH-ATTR-SAME: i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR4:[0-9]+]] {
-// NO-SANITIZE-WITH-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[BAZ:%.*]] = alloca [[STRUCT_HANG:%.*]], align 4
 // NO-SANITIZE-WITH-ATTR-NEXT:    call void @llvm.lifetime.start.p0(ptr nonnull [[BAZ]]) #[[ATTR12:[0-9]+]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 4 dereferenceable(24) [[BAZ]], ptr noundef nonnull align 4 dereferenceable(24) @test12_bar, i64 24, i1 false), !tbaa.struct [[TBAA_STRUCT7:![0-9]+]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[BAZ]], i64 [[IDXPROM]]
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]]
-// NO-SANITIZE-WITH-ATTR-NEXT:    store i32 [[TMP0]], ptr @test12_b, align 4, !tbaa [[TBAA2]]
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = load i32, ptr getelementptr inbounds nuw (i8, ptr @test12_foo, i64 4), align 4, !tbaa [[TBAA2]]
-// NO-SANITIZE-WITH-ATTR-NEXT:    store i32 [[TMP1]], ptr @test12_a, align 4, !tbaa [[TBAA2]]
-// NO-SANITIZE-WITH-ATTR-NEXT:    br label [[FOR_COND:%.*]]
-// NO-SANITIZE-WITH-ATTR:       for.cond:
-// NO-SANITIZE-WITH-ATTR-NEXT:    br label [[FOR_COND]]
+// NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2]]
+// NO-SANITIZE-WITH-ATTR-NEXT:    store i32 [[TMP0]], ptr @test12_b, align 4, !tbaa [[INT_TBAA2]]
+// NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = load i32, ptr getelementptr inbounds nuw (i8, ptr @test12_foo, i64 4), align 4, !tbaa [[INT_TBAA2]]
+// NO-SANITIZE-WITH-ATTR-NEXT:    store i32 [[TMP1]], ptr @test12_a, align 4, !tbaa [[INT_TBAA2]]
+// NO-SANITIZE-WITH-ATTR-NEXT:    br label %[[FOR_COND:.*]]
+// NO-SANITIZE-WITH-ATTR:       [[FOR_COND]]:
+// NO-SANITIZE-WITH-ATTR-NEXT:    br label %[[FOR_COND]]
 //
 // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local noundef i32 @test12(
 // SANITIZE-WITHOUT-ATTR-SAME: i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR3:[0-9]+]] {
-// SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[BAZ:%.*]] = alloca [[STRUCT_HANG:%.*]], align 4
 // SANITIZE-WITHOUT-ATTR-NEXT:    call void @llvm.lifetime.start.p0(ptr nonnull [[BAZ]]) #[[ATTR7:[0-9]+]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 4 dereferenceable(24) [[BAZ]], ptr noundef nonnull align 4 dereferenceable(24) @test12_bar, i64 24, i1 false), !tbaa.struct [[TBAA_STRUCT7:![0-9]+]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[INDEX]], 6
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP1:%.*]] = zext i32 [[INDEX]] to i64
-// SANITIZE-WITHOUT-ATTR-NEXT:    br i1 [[TMP0]], label [[CONT:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF8:![0-9]+]], !nosanitize [[META9:![0-9]+]]
-// SANITIZE-WITHOUT-ATTR:       handler.out_of_bounds:
+// SANITIZE-WITHOUT-ATTR-NEXT:    br i1 [[TMP0]], label %[[CONT:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF8:![0-9]+]], !nosanitize [[META9:![0-9]+]]
+// SANITIZE-WITHOUT-ATTR:       [[HANDLER_OUT_OF_BOUNDS]]:
 // SANITIZE-WITHOUT-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB2:[0-9]+]], i64 [[TMP1]]) #[[ATTR8:[0-9]+]], !nosanitize [[META9]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    unreachable, !nosanitize [[META9]]
-// SANITIZE-WITHOUT-ATTR:       cont:
+// SANITIZE-WITHOUT-ATTR:       [[CONT]]:
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[BAZ]], i64 [[TMP1]]
-// SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]]
-// SANITIZE-WITHOUT-ATTR-NEXT:    store i32 [[TMP2]], ptr @test12_b, align 4, !tbaa [[TBAA2]]
+// SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2]]
+// SANITIZE-WITHOUT-ATTR-NEXT:    store i32 [[TMP2]], ptr @test12_b, align 4, !tbaa [[INT_TBAA2]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr @test12_foo, align 4
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[DOTNOT:%.*]] = icmp eq i32 [[DOTCOUNTED_BY_LOAD]], 0
-// SANITIZE-WITHOUT-ATTR-NEXT:    br i1 [[DOTNOT]], label [[HANDLER_OUT_OF_BOUNDS4:%.*]], label [[HANDLER_TYPE_MISMATCH6:%.*]], !prof [[PROF10:![0-9]+]], !nosanitize [[META9]]
-// SANITIZE-WITHOUT-ATTR:       handler.out_of_bounds4:
+// SANITIZE-WITHOUT-ATTR-NEXT:    br i1 [[DOTNOT]], label %[[HANDLER_OUT_OF_BOUNDS4:.*]], label %[[HANDLER_TYPE_MISMATCH6:.*]], !prof [[PROF10:![0-9]+]], !nosanitize [[META9]]
+// SANITIZE-WITHOUT-ATTR:       [[HANDLER_OUT_OF_BOUNDS4]]:
 // SANITIZE-WITHOUT-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB4:[0-9]+]], i64 0) #[[ATTR8]], !nosanitize [[META9]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    unreachable, !nosanitize [[META9]]
-// SANITIZE-WITHOUT-ATTR:       handler.type_mismatch6:
+// SANITIZE-WITHOUT-ATTR:       [[HANDLER_TYPE_MISMATCH6]]:
 // SANITIZE-WITHOUT-ATTR-NEXT:    tail call void @__ubsan_handle_type_mismatch_v1_abort(ptr nonnull @[[GLOB5:[0-9]+]], i64 ptrtoint (ptr getelementptr inbounds nuw (i8, ptr @test12_foo, i64 4) to i64)) #[[ATTR8]], !nosanitize [[META9]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    unreachable, !nosanitize [[META9]]
 //
 // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local noundef i32 @test12(
 // NO-SANITIZE-WITHOUT-ATTR-SAME: i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] {
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[BAZ:%.*]] = alloca [[STRUCT_HANG:%.*]], align 4
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    call void @llvm.lifetime.start.p0(ptr nonnull [[BAZ]]) #[[ATTR10:[0-9]+]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 4 dereferenceable(24) [[BAZ]], ptr noundef nonnull align 4 dereferenceable(24) @test12_bar, i64 24, i1 false), !tbaa.struct [[TBAA_STRUCT7:![0-9]+]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[BAZ]], i64 [[IDXPROM]]
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]]
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:    store i32 [[TMP0]], ptr @test12_b, align 4, !tbaa [[TBAA2]]
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP1:%.*]] = load i32, ptr getelementptr inbounds nuw (i8, ptr @test12_foo, i64 4), align 4, !tbaa [[TBAA2]]
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:    store i32 [[TMP1]], ptr @test12_a, align 4, !tbaa [[TBAA2]]
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:    br label [[FOR_COND:%.*]]
-// NO-SANITIZE-WITHOUT-ATTR:       for.cond:
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:    br label [[FOR_COND]]
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2]]
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:    store i32 [[TMP0]], ptr @test12_b, align 4, !tbaa [[INT_TBAA2]]
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP1:%.*]] = load i32, ptr getelementptr inbounds nuw (i8, ptr @test12_foo, i64 4), align 4, !tbaa [[INT_TBAA2]]
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:    store i32 [[TMP1]], ptr @test12_a, align 4, !tbaa [[INT_TBAA2]]
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:    br label %[[FOR_COND:.*]]
+// NO-SANITIZE-WITHOUT-ATTR:       [[FOR_COND]]:
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:    br label %[[FOR_COND]]
 //
 int test12(int index) {
   struct hang baz = test12_bar;
@@ -1298,56 +1298,56 @@ struct test13_bar {
 
 // SANITIZE-WITH-ATTR-LABEL: define dso_local noundef i32 @test13(
 // SANITIZE-WITH-ATTR-SAME: i64 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITH-ATTR-NEXT:  entry:
-// SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = load ptr, ptr @test13_f, align 8, !tbaa [[TBAA11:![0-9]+]]
+// SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
+// SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = load ptr, ptr @test13_f, align 8, !tbaa [[_ZTS10TEST13_BARPTR_TBAA11:![0-9]+]]
 // SANITIZE-WITH-ATTR-NEXT:    [[DOTCOUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 8
 // SANITIZE-WITH-ATTR-NEXT:    [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOTCOUNTED_BY_GEP]], align 4
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP2:%.*]] = icmp ult i64 [[INDEX]], [[TMP1]], !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP2]], label [[CONT5:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       handler.out_of_bounds:
+// SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP2]], label %[[CONT5:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR:       [[HANDLER_OUT_OF_BOUNDS]]:
 // SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB28:[0-9]+]], i64 [[INDEX]]) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       cont5:
+// SANITIZE-WITH-ATTR:       [[CONT5]]:
 // SANITIZE-WITH-ATTR-NEXT:    [[REVMAP:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 16
 // SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw ptr, ptr [[REVMAP]], i64 [[INDEX]]
-// SANITIZE-WITH-ATTR-NEXT:    store ptr null, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA15:![0-9]+]]
+// SANITIZE-WITH-ATTR-NEXT:    store ptr null, ptr [[ARRAYIDX]], align 8, !tbaa [[_ZTS10TEST13_FOOPTR_TBAA15:![0-9]+]]
 // SANITIZE-WITH-ATTR-NEXT:    ret i32 0
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local noundef i32 @test13(
 // NO-SANITIZE-WITH-ATTR-SAME: i64 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR7:[0-9]+]] {
-// NO-SANITIZE-WITH-ATTR-NEXT:  entry:
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = load ptr, ptr @test13_f, align 8, !tbaa [[TBAA8:![0-9]+]]
+// NO-SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
+// NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = load ptr, ptr @test13_f, align 8, !tbaa [[_ZTS10TEST13_BARPTR_TBAA8:![0-9]+]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[REVMAP:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 16
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[REVMAP]], i64 [[INDEX]]
-// NO-SANITIZE-WITH-ATTR-NEXT:    store ptr null, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA12:![0-9]+]]
+// NO-SANITIZE-WITH-ATTR-NEXT:    store ptr null, ptr [[ARRAYIDX]], align 8, !tbaa [[_ZTS10TEST13_FOOPTR_TBAA12:![0-9]+]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    ret i32 0
 //
 // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local noundef i32 @test13(
 // SANITIZE-WITHOUT-ATTR-SAME: i64 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITHOUT-ATTR-NEXT:  entry:
-// SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP0:%.*]] = load ptr, ptr @test13_f, align 8, !tbaa [[TBAA11:![0-9]+]]
+// SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
+// SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP0:%.*]] = load ptr, ptr @test13_f, align 8, !tbaa [[_ZTS10TEST13_BARPTR_TBAA11:![0-9]+]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[DOTCOUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 8
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOTCOUNTED_BY_GEP]], align 4
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP1:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META9]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP2:%.*]] = icmp ult i64 [[INDEX]], [[TMP1]], !nosanitize [[META9]]
-// SANITIZE-WITHOUT-ATTR-NEXT:    br i1 [[TMP2]], label [[CONT5:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF8]], !nosanitize [[META9]]
-// SANITIZE-WITHOUT-ATTR:       handler.out_of_bounds:
+// SANITIZE-WITHOUT-ATTR-NEXT:    br i1 [[TMP2]], label %[[CONT5:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF8]], !nosanitize [[META9]]
+// SANITIZE-WITHOUT-ATTR:       [[HANDLER_OUT_OF_BOUNDS]]:
 // SANITIZE-WITHOUT-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB8:[0-9]+]], i64 [[INDEX]]) #[[ATTR8]], !nosanitize [[META9]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    unreachable, !nosanitize [[META9]]
-// SANITIZE-WITHOUT-ATTR:       cont5:
+// SANITIZE-WITHOUT-ATTR:       [[CONT5]]:
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[REVMAP:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 16
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw ptr, ptr [[REVMAP]], i64 [[INDEX]]
-// SANITIZE-WITHOUT-ATTR-NEXT:    store ptr null, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA15:![0-9]+]]
+// SANITIZE-WITHOUT-ATTR-NEXT:    store ptr null, ptr [[ARRAYIDX]], align 8, !tbaa [[_ZTS10TEST13_FOOPTR_TBAA15:![0-9]+]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    ret i32 0
 //
 // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local noundef i32 @test13(
 // NO-SANITIZE-WITHOUT-ATTR-SAME: i64 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR5:[0-9]+]] {
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:  entry:
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP0:%.*]] = load ptr, ptr @test13_f, align 8, !tbaa [[TBAA8:![0-9]+]]
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP0:%.*]] = load ptr, ptr @test13_f, align 8, !tbaa [[_ZTS10TEST13_BARPTR_TBAA8:![0-9]+]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[REVMAP:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 16
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[REVMAP]], i64 [[INDEX]]
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:    store ptr null, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA12:![0-9]+]]
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:    store ptr null, ptr [[ARRAYIDX]], align 8, !tbaa [[_ZTS10TEST13_FOOPTR_TBAA12:![0-9]+]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    ret i32 0
 //
 int test13(long index) {
@@ -1362,52 +1362,52 @@ struct test14_foo {
 
 // SANITIZE-WITH-ATTR-LABEL: define dso_local i32 @test14(
 // SANITIZE-WITH-ATTR-SAME: i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITH-ATTR-NEXT:  entry:
+// SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = icmp eq i32 [[IDX]], 0
-// SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP0]], label [[CONT3:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       handler.out_of_bounds:
+// SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP0]], label %[[CONT3:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR:       [[HANDLER_OUT_OF_BOUNDS]]:
 // SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[IDX]] to i64
 // SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB29:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       cont3:
+// SANITIZE-WITH-ATTR:       [[CONT3]]:
 // SANITIZE-WITH-ATTR-NEXT:    ret i32 undef
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local i32 @test14(
 // NO-SANITIZE-WITH-ATTR-SAME: i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR3]] {
-// NO-SANITIZE-WITH-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[DOTCOMPOUNDLITERAL:%.*]] = alloca [[STRUCT_TEST14_FOO:%.*]], align 4
-// NO-SANITIZE-WITH-ATTR-NEXT:    store i32 1, ptr [[DOTCOMPOUNDLITERAL]], align 4, !tbaa [[TBAA2]]
+// NO-SANITIZE-WITH-ATTR-NEXT:    store i32 1, ptr [[DOTCOMPOUNDLITERAL]], align 4, !tbaa [[INT_TBAA2]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[DOTCOMPOUNDLITERAL]], i64 4
-// NO-SANITIZE-WITH-ATTR-NEXT:    store i32 2, ptr [[Y]], align 4, !tbaa [[TBAA2]]
+// NO-SANITIZE-WITH-ATTR-NEXT:    store i32 2, ptr [[Y]], align 4, !tbaa [[INT_TBAA2]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[BLAH:%.*]] = getelementptr inbounds nuw i8, ptr [[DOTCOMPOUNDLITERAL]], i64 8
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[IDX]] to i64
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[BLAH]], i64 [[IDXPROM]]
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]]
+// NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    ret i32 [[TMP0]]
 //
 // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i32 @test14(
 // SANITIZE-WITHOUT-ATTR-SAME: i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP0:%.*]] = icmp eq i32 [[IDX]], 0
-// SANITIZE-WITHOUT-ATTR-NEXT:    br i1 [[TMP0]], label [[CONT3:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF8]], !nosanitize [[META9]]
-// SANITIZE-WITHOUT-ATTR:       handler.out_of_bounds:
+// SANITIZE-WITHOUT-ATTR-NEXT:    br i1 [[TMP0]], label %[[CONT3:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF8]], !nosanitize [[META9]]
+// SANITIZE-WITHOUT-ATTR:       [[HANDLER_OUT_OF_BOUNDS]]:
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[IDX]] to i64
 // SANITIZE-WITHOUT-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB9:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META9]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    unreachable, !nosanitize [[META9]]
-// SANITIZE-WITHOUT-ATTR:       cont3:
+// SANITIZE-WITHOUT-ATTR:       [[CONT3]]:
 // SANITIZE-WITHOUT-ATTR-NEXT:    ret i32 undef
 //
 // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i32 @test14(
 // NO-SANITIZE-WITHOUT-ATTR-SAME: i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR1]] {
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[DOTCOMPOUNDLITERAL:%.*]] = alloca [[STRUCT_TEST14_FOO:%.*]], align 4
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:    store i32 1, ptr [[DOTCOMPOUNDLITERAL]], align 4, !tbaa [[TBAA2]]
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:    store i32 1, ptr [[DOTCOMPOUNDLITERAL]], align 4, !tbaa [[INT_TBAA2]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[DOTCOMPOUNDLITERAL]], i64 4
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:    store i32 2, ptr [[Y]], align 4, !tbaa [[TBAA2]]
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:    store i32 2, ptr [[Y]], align 4, !tbaa [[INT_TBAA2]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[BLAH:%.*]] = getelementptr inbounds nuw i8, ptr [[DOTCOMPOUNDLITERAL]], i64 8
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[IDX]] to i64
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[BLAH]], i64 [[IDXPROM]]
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]]
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    ret i32 [[TMP0]]
 //
 int test14(int idx) {
@@ -1416,42 +1416,42 @@ int test14(int idx) {
 
 // SANITIZE-WITH-ATTR-LABEL: define dso_local i32 @test15(
 // SANITIZE-WITH-ATTR-SAME: i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITH-ATTR-NEXT:  entry:
+// SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = icmp eq i32 [[IDX]], 0
-// SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP0]], label [[CONT1:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       handler.out_of_bounds:
+// SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP0]], label %[[CONT1:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR:       [[HANDLER_OUT_OF_BOUNDS]]:
 // SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[IDX]] to i64
 // SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB31:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       cont1:
+// SANITIZE-WITH-ATTR:       [[CONT1]]:
 // SANITIZE-WITH-ATTR-NEXT:    ret i32 undef
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local i32 @test15(
 // NO-SANITIZE-WITH-ATTR-SAME: i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR3]] {
-// NO-SANITIZE-WITH-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[IDX]] to i64
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds nuw (i8, ptr @__const.test15.foo, i64 8), i64 [[IDXPROM]]
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]]
+// NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    ret i32 [[TMP0]]
 //
 // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i32 @test15(
 // SANITIZE-WITHOUT-ATTR-SAME: i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP0:%.*]] = icmp eq i32 [[IDX]], 0
-// SANITIZE-WITHOUT-ATTR-NEXT:    br i1 [[TMP0]], label [[CONT1:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF8]], !nosanitize [[META9]]
-// SANITIZE-WITHOUT-ATTR:       handler.out_of_bounds:
+// SANITIZE-WITHOUT-ATTR-NEXT:    br i1 [[TMP0]], label %[[CONT1:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF8]], !nosanitize [[META9]]
+// SANITIZE-WITHOUT-ATTR:       [[HANDLER_OUT_OF_BOUNDS]]:
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[IDX]] to i64
 // SANITIZE-WITHOUT-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB11:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META9]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    unreachable, !nosanitize [[META9]]
-// SANITIZE-WITHOUT-ATTR:       cont1:
+// SANITIZE-WITHOUT-ATTR:       [[CONT1]]:
 // SANITIZE-WITHOUT-ATTR-NEXT:    ret i32 undef
 //
 // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i32 @test15(
 // NO-SANITIZE-WITHOUT-ATTR-SAME: i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR1]] {
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[IDX]] to i64
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds nuw (i8, ptr @__const.test15.foo, i64 8), i64 [[IDXPROM]]
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]]
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    ret i32 [[TMP0]]
 //
 int test15(int idx) {
@@ -1465,30 +1465,30 @@ int test15(int idx) {
 
 // SANITIZE-WITH-ATTR-LABEL: define dso_local i64 @test19(
 // SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITH-ATTR-NEXT:  entry:
+// SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITH-ATTR-NEXT:    [[DOTCOUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 680
 // SANITIZE-WITH-ATTR-NEXT:    [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOTCOUNTED_BY_GEP]], align 4
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = icmp ugt i32 [[DOTCOUNTED_BY_LOAD]], 1
-// SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP0]], label [[CONT1:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       handler.out_of_bounds:
+// SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP0]], label %[[CONT1:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR:       [[HANDLER_OUT_OF_BOUNDS]]:
 // SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB32:[0-9]+]], i64 2) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       cont1:
+// SANITIZE-WITH-ATTR:       [[CONT1]]:
 // SANITIZE-WITH-ATTR-NEXT:    ret i64 -1
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local i64 @test19(
 // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR3]] {
-// NO-SANITIZE-WITH-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    ret i64 -1
 //
 // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test19(
 // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    ret i64 -1
 //
 // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test19(
 // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR1]] {
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    ret i64 -1
 //
 size_t test19(struct annotated *p) {
@@ -1498,22 +1498,22 @@ size_t test19(struct annotated *p) {
 
 // SANITIZE-WITH-ATTR-LABEL: define dso_local noundef i64 @test20(
 // SANITIZE-WITH-ATTR-SAME: ptr noundef readnone captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR2]] {
-// SANITIZE-WITH-ATTR-NEXT:  entry:
+// SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITH-ATTR-NEXT:    ret i64 -1
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local noundef i64 @test20(
 // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readnone captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR3]] {
-// NO-SANITIZE-WITH-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    ret i64 -1
 //
 // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local noundef i64 @test20(
 // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readnone captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR2]] {
-// SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    ret i64 -1
 //
 // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local noundef i64 @test20(
 // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readnone captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR1]] {
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    ret i64 -1
 //
 size_t test20(struct annotated *p) {
@@ -1523,22 +1523,22 @@ size_t test20(struct annotated *p) {
 
 // SANITIZE-WITH-ATTR-LABEL: define dso_local noundef i64 @test21(
 // SANITIZE-WITH-ATTR-SAME: ptr noundef readnone captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR2]] {
-// SANITIZE-WITH-ATTR-NEXT:  entry:
+// SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITH-ATTR-NEXT:    ret i64 -1
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local noundef i64 @test21(
 // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readnone captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR3]] {
-// NO-SANITIZE-WITH-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    ret i64 -1
 //
 // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local noundef i64 @test21(
 // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readnone captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR2]] {
-// SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    ret i64 -1
 //
 // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local noundef i64 @test21(
 // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readnone captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR1]] {
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    ret i64 -1
 //
 size_t test21(struct annotated *p) {
@@ -1548,22 +1548,22 @@ size_t test21(struct annotated *p) {
 
 // SANITIZE-WITH-ATTR-LABEL: define dso_local noundef i64 @test22(
 // SANITIZE-WITH-ATTR-SAME: ptr noundef readnone captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR2]] {
-// SANITIZE-WITH-ATTR-NEXT:  entry:
+// SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITH-ATTR-NEXT:    ret i64 -1
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local noundef i64 @test22(
 // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readnone captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR3]] {
-// NO-SANITIZE-WITH-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    ret i64 -1
 //
 // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local noundef i64 @test22(
 // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readnone captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR2]] {
-// SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    ret i64 -1
 //
 // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local noundef i64 @test22(
 // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readnone captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR1]] {
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    ret i64 -1
 //
 size_t test22(struct annotated *p) {
@@ -1573,22 +1573,22 @@ size_t test22(struct annotated *p) {
 
 // SANITIZE-WITH-ATTR-LABEL: define dso_local noundef i64 @test23(
 // SANITIZE-WITH-ATTR-SAME: ptr noundef readnone captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR2]] {
-// SANITIZE-WITH-ATTR-NEXT:  entry:
+// SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITH-ATTR-NEXT:    ret i64 -1
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local noundef i64 @test23(
 // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readnone captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR3]] {
-// NO-SANITIZE-WITH-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    ret i64 -1
 //
 // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local noundef i64 @test23(
 // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readnone captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR2]] {
-// SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    ret i64 -1
 //
 // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local noundef i64 @test23(
 // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readnone captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR1]] {
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    ret i64 -1
 //
 size_t test23(struct annotated *p) {
@@ -1603,38 +1603,38 @@ struct tests_foo {
 
 // SANITIZE-WITH-ATTR-LABEL: define dso_local i32 @test24(
 // SANITIZE-WITH-ATTR-SAME: i32 noundef [[C:%.*]], ptr noundef [[VAR:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITH-ATTR-NEXT:  entry:
+// SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[VAR]], i64 40
 // SANITIZE-WITH-ATTR-NEXT:    [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = icmp ugt i32 [[DOTCOUNTED_BY_LOAD]], 10
-// SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP0]], label [[CONT4:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       handler.out_of_bounds:
+// SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP0]], label %[[CONT4:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR:       [[HANDLER_OUT_OF_BOUNDS]]:
 // SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB33:[0-9]+]], i64 10) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       cont4:
+// SANITIZE-WITH-ATTR:       [[CONT4]]:
 // SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[VAR]], i64 84
-// SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4, !tbaa [[TBAA4]]
+// SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4, !tbaa [[INT_TBAA4]]
 // SANITIZE-WITH-ATTR-NEXT:    ret i32 [[TMP1]]
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local i32 @test24(
 // NO-SANITIZE-WITH-ATTR-SAME: i32 noundef [[C:%.*]], ptr noundef readonly captures(none) [[VAR:%.*]]) local_unnamed_addr #[[ATTR2]] {
-// NO-SANITIZE-WITH-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw i8, ptr [[VAR]], i64 84
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4, !tbaa [[TBAA2]]
+// NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4, !tbaa [[INT_TBAA2]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    ret i32 [[TMP0]]
 //
 // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i32 @test24(
 // SANITIZE-WITHOUT-ATTR-SAME: i32 noundef [[C:%.*]], ptr noundef [[VAR:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw i8, ptr [[VAR]], i64 84
-// SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4, !tbaa [[TBAA2]]
+// SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4, !tbaa [[INT_TBAA2]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    ret i32 [[TMP0]]
 //
 // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i32 @test24(
 // NO-SANITIZE-WITHOUT-ATTR-SAME: i32 noundef [[C:%.*]], ptr noundef readonly captures(none) [[VAR:%.*]]) local_unnamed_addr #[[ATTR6:[0-9]+]] {
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw i8, ptr [[VAR]], i64 84
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4, !tbaa [[TBAA2]]
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4, !tbaa [[INT_TBAA2]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    ret i32 [[TMP0]]
 //
 int test24(int c, struct tests_foo *var) {
@@ -1644,41 +1644,41 @@ int test24(int c, struct tests_foo *var) {
 
 // SANITIZE-WITH-ATTR-LABEL: define dso_local i32 @test25(
 // SANITIZE-WITH-ATTR-SAME: i32 noundef [[C:%.*]], ptr noundef [[VAR:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITH-ATTR-NEXT:  entry:
-// SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[VAR]], align 8, !tbaa [[TBAA17:![0-9]+]]
+// SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
+// SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[VAR]], align 8, !tbaa [[_ZTS9TESTS_FOOPTR_TBAA17:![0-9]+]]
 // SANITIZE-WITH-ATTR-NEXT:    [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[TMP0]], align 4
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = icmp ugt i32 [[DOTCOUNTED_BY_LOAD]], 10
-// SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP1]], label [[CONT5:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       handler.out_of_bounds:
+// SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP1]], label %[[CONT5:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR:       [[HANDLER_OUT_OF_BOUNDS]]:
 // SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB34:[0-9]+]], i64 10) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       cont5:
+// SANITIZE-WITH-ATTR:       [[CONT5]]:
 // SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 44
-// SANITIZE-WITH-ATTR-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA4]]
+// SANITIZE-WITH-ATTR-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA4]]
 // SANITIZE-WITH-ATTR-NEXT:    ret i32 [[TMP2]]
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local i32 @test25(
 // NO-SANITIZE-WITH-ATTR-SAME: i32 noundef [[C:%.*]], ptr noundef readonly captures(none) [[VAR:%.*]]) local_unnamed_addr #[[ATTR8:[0-9]+]] {
-// NO-SANITIZE-WITH-ATTR-NEXT:  entry:
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[VAR]], align 8, !tbaa [[TBAA14:![0-9]+]]
+// NO-SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
+// NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[VAR]], align 8, !tbaa [[_ZTS9TESTS_FOOPTR_TBAA14:![0-9]+]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 44
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]]
+// NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    ret i32 [[TMP1]]
 //
 // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i32 @test25(
 // SANITIZE-WITHOUT-ATTR-SAME: i32 noundef [[C:%.*]], ptr noundef [[VAR:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITHOUT-ATTR-NEXT:  entry:
-// SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[VAR]], align 8, !tbaa [[TBAA17:![0-9]+]]
+// SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
+// SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[VAR]], align 8, !tbaa [[_ZTS9TESTS_FOOPTR_TBAA17:![0-9]+]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 44
-// SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]]
+// SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    ret i32 [[TMP1]]
 //
 // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i32 @test25(
 // NO-SANITIZE-WITHOUT-ATTR-SAME: i32 noundef [[C:%.*]], ptr noundef readonly captures(none) [[VAR:%.*]]) local_unnamed_addr #[[ATTR7:[0-9]+]] {
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:  entry:
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[VAR]], align 8, !tbaa [[TBAA14:![0-9]+]]
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[VAR]], align 8, !tbaa [[_ZTS9TESTS_FOOPTR_TBAA14:![0-9]+]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 44
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]]
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    ret i32 [[TMP1]]
 //
 int test25(int c, struct tests_foo **var) {
@@ -1694,47 +1694,47 @@ struct test26_foo {
 
 // SANITIZE-WITH-ATTR-LABEL: define dso_local i32 @test26(
 // SANITIZE-WITH-ATTR-SAME: i32 noundef [[C:%.*]], ptr noundef [[FOO:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITH-ATTR-NEXT:  entry:
+// SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITH-ATTR-NEXT:    [[S:%.*]] = getelementptr inbounds nuw i8, ptr [[FOO]], i64 4
 // SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[C]] to i64
 // SANITIZE-WITH-ATTR-NEXT:    [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[S]], align 4
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP1]], label [[CONT5:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       handler.out_of_bounds:
+// SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP1]], label %[[CONT5:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR:       [[HANDLER_OUT_OF_BOUNDS]]:
 // SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB35:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       cont5:
+// SANITIZE-WITH-ATTR:       [[CONT5]]:
 // SANITIZE-WITH-ATTR-NEXT:    [[ARR:%.*]] = getelementptr inbounds nuw i8, ptr [[FOO]], i64 8
 // SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[ARR]], i64 [[IDXPROM]]
-// SANITIZE-WITH-ATTR-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA4]]
+// SANITIZE-WITH-ATTR-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA4]]
 // SANITIZE-WITH-ATTR-NEXT:    ret i32 [[TMP2]]
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local i32 @test26(
 // NO-SANITIZE-WITH-ATTR-SAME: i32 noundef [[C:%.*]], ptr noundef readonly captures(none) [[FOO:%.*]]) local_unnamed_addr #[[ATTR2]] {
-// NO-SANITIZE-WITH-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[ARR:%.*]] = getelementptr inbounds nuw i8, ptr [[FOO]], i64 8
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[C]] to i64
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[IDXPROM]]
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]]
+// NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    ret i32 [[TMP0]]
 //
 // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i32 @test26(
 // SANITIZE-WITHOUT-ATTR-SAME: i32 noundef [[C:%.*]], ptr noundef [[FOO:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[ARR:%.*]] = getelementptr inbounds nuw i8, ptr [[FOO]], i64 8
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[C]] to i64
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[IDXPROM]]
-// SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]]
+// SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    ret i32 [[TMP0]]
 //
 // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i32 @test26(
 // NO-SANITIZE-WITHOUT-ATTR-SAME: i32 noundef [[C:%.*]], ptr noundef readonly captures(none) [[FOO:%.*]]) local_unnamed_addr #[[ATTR6]] {
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[ARR:%.*]] = getelementptr inbounds nuw i8, ptr [[FOO]], i64 8
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[C]] to i64
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[IDXPROM]]
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]]
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    ret i32 [[TMP0]]
 //
 int test26(int c, struct test26_foo *foo) {
@@ -1765,53 +1765,53 @@ struct test27_foo {
 
 // SANITIZE-WITH-ATTR-LABEL: define dso_local ptr @test27(
 // SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[I:%.*]], i32 noundef [[J:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITH-ATTR-NEXT:  entry:
+// SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[I]] to i64
 // SANITIZE-WITH-ATTR-NEXT:    [[DOTCOUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12
 // SANITIZE-WITH-ATTR-NEXT:    [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOTCOUNTED_BY_GEP]], align 4
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP1]], label [[CONT3:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       handler.out_of_bounds:
+// SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP1]], label %[[CONT3:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR:       [[HANDLER_OUT_OF_BOUNDS]]:
 // SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB37:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       cont3:
+// SANITIZE-WITH-ATTR:       [[CONT3]]:
 // SANITIZE-WITH-ATTR-NEXT:    [[ENTRIES:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 24
 // SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw ptr, ptr [[ENTRIES]], i64 [[IDXPROM]]
-// SANITIZE-WITH-ATTR-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA19:![0-9]+]]
+// SANITIZE-WITH-ATTR-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8, !tbaa [[_ZTS10TEST27_BARPTR_TBAA19:![0-9]+]]
 // SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM4:%.*]] = sext i32 [[J]] to i64
 // SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds [[STRUCT_TEST27_BAR:%.*]], ptr [[TMP2]], i64 [[IDXPROM4]]
 // SANITIZE-WITH-ATTR-NEXT:    ret ptr [[ARRAYIDX5]]
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local ptr @test27(
 // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]], i32 noundef [[I:%.*]], i32 noundef [[J:%.*]]) local_unnamed_addr #[[ATTR2]] {
-// NO-SANITIZE-WITH-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[ENTRIES:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 24
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[I]] to i64
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[ENTRIES]], i64 [[IDXPROM]]
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA16:![0-9]+]]
+// NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8, !tbaa [[_ZTS10TEST27_BARPTR_TBAA16:![0-9]+]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM1:%.*]] = sext i32 [[J]] to i64
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds [[STRUCT_TEST27_BAR:%.*]], ptr [[TMP0]], i64 [[IDXPROM1]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    ret ptr [[ARRAYIDX2]]
 //
 // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local ptr @test27(
 // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[I:%.*]], i32 noundef [[J:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[ENTRIES:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 24
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[I]] to i64
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[ENTRIES]], i64 [[IDXPROM]]
-// SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA19:![0-9]+]]
+// SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8, !tbaa [[_ZTS10TEST27_BARPTR_TBAA19:![0-9]+]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[IDXPROM3:%.*]] = sext i32 [[J]] to i64
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds [[STRUCT_TEST27_BAR:%.*]], ptr [[TMP0]], i64 [[IDXPROM3]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    ret ptr [[ARRAYIDX4]]
 //
 // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local ptr @test27(
 // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]], i32 noundef [[I:%.*]], i32 noundef [[J:%.*]]) local_unnamed_addr #[[ATTR6]] {
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[ENTRIES:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 24
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[I]] to i64
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[ENTRIES]], i64 [[IDXPROM]]
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA16:![0-9]+]]
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8, !tbaa [[_ZTS10TEST27_BARPTR_TBAA16:![0-9]+]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[IDXPROM1:%.*]] = sext i32 [[J]] to i64
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds [[STRUCT_TEST27_BAR:%.*]], ptr [[TMP0]], i64 [[IDXPROM1]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    ret ptr [[ARRAYIDX2]]
@@ -1828,59 +1828,59 @@ struct test28_foo {
 
 // SANITIZE-WITH-ATTR-LABEL: define dso_local i32 @test28(
 // SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[I:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITH-ATTR-NEXT:  entry:
-// SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[P]], align 8, !tbaa [[TBAA21:![0-9]+]]
-// SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[TBAA21]]
-// SANITIZE-WITH-ATTR-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !tbaa [[TBAA21]]
+// SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
+// SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[P]], align 8, !tbaa [[_ZTS10TEST28_FOOPTR_TBAA21:![0-9]+]]
+// SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[_ZTS10TEST28_FOOPTR_TBAA21]]
+// SANITIZE-WITH-ATTR-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !tbaa [[_ZTS10TEST28_FOOPTR_TBAA21]]
 // SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[I]] to i64
 // SANITIZE-WITH-ATTR-NEXT:    [[DOTCOUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i64 8
 // SANITIZE-WITH-ATTR-NEXT:    [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOTCOUNTED_BY_GEP]], align 4
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP3:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP4:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP3]], !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP4]], label [[CONT17:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       handler.out_of_bounds:
+// SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP4]], label %[[CONT17:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR:       [[HANDLER_OUT_OF_BOUNDS]]:
 // SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB39:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       cont17:
+// SANITIZE-WITH-ATTR:       [[CONT17]]:
 // SANITIZE-WITH-ATTR-NEXT:    [[ARR:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i64 12
 // SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[ARR]], i64 [[IDXPROM]]
-// SANITIZE-WITH-ATTR-NEXT:    [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA4]]
+// SANITIZE-WITH-ATTR-NEXT:    [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA4]]
 // SANITIZE-WITH-ATTR-NEXT:    ret i32 [[TMP5]]
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local i32 @test28(
 // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]], i32 noundef [[I:%.*]]) local_unnamed_addr #[[ATTR8]] {
-// NO-SANITIZE-WITH-ATTR-NEXT:  entry:
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[P]], align 8, !tbaa [[TBAA18:![0-9]+]]
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[TBAA18]]
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !tbaa [[TBAA18]]
+// NO-SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
+// NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[P]], align 8, !tbaa [[_ZTS10TEST28_FOOPTR_TBAA18:![0-9]+]]
+// NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[_ZTS10TEST28_FOOPTR_TBAA18]]
+// NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !tbaa [[_ZTS10TEST28_FOOPTR_TBAA18]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[ARR:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i64 12
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[I]] to i64
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[IDXPROM]]
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]]
+// NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    ret i32 [[TMP3]]
 //
 // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i32 @test28(
 // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[I:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITHOUT-ATTR-NEXT:  entry:
-// SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[P]], align 8, !tbaa [[TBAA21:![0-9]+]]
-// SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[TBAA21]]
-// SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !tbaa [[TBAA21]]
+// SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
+// SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[P]], align 8, !tbaa [[_ZTS10TEST28_FOOPTR_TBAA21:![0-9]+]]
+// SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[_ZTS10TEST28_FOOPTR_TBAA21]]
+// SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !tbaa [[_ZTS10TEST28_FOOPTR_TBAA21]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[ARR:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i64 12
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[I]] to i64
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[IDXPROM]]
-// SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]]
+// SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    ret i32 [[TMP3]]
 //
 // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i32 @test28(
 // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]], i32 noundef [[I:%.*]]) local_unnamed_addr #[[ATTR7]] {
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:  entry:
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[P]], align 8, !tbaa [[TBAA18:![0-9]+]]
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[TBAA18]]
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !tbaa [[TBAA18]]
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[P]], align 8, !tbaa [[_ZTS10TEST28_FOOPTR_TBAA18:![0-9]+]]
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[_ZTS10TEST28_FOOPTR_TBAA18]]
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !tbaa [[_ZTS10TEST28_FOOPTR_TBAA18]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[ARR:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i64 12
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[I]] to i64
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[IDXPROM]]
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]]
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    ret i32 [[TMP3]]
 //
 int test28(struct test28_foo *p, int i) {
@@ -1896,39 +1896,39 @@ struct annotated_struct_array {
 
 // SANITIZE-WITH-ATTR-LABEL: define dso_local void @test29(
 // SANITIZE-WITH-ATTR-SAME: ptr noundef [[ANN:%.*]], i32 noundef [[IDX1:%.*]], i32 noundef [[IDX2:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITH-ATTR-NEXT:  entry:
+// SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[IDX1]], 10
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = zext i32 [[IDX1]] to i64
-// SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP0]], label [[CONT3:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       handler.out_of_bounds:
+// SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP0]], label %[[CONT3:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR:       [[HANDLER_OUT_OF_BOUNDS]]:
 // SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB41:[0-9]+]], i64 [[TMP1]]) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       cont3:
+// SANITIZE-WITH-ATTR:       [[CONT3]]:
 // SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw ptr, ptr [[ANN]], i64 [[TMP1]]
-// SANITIZE-WITH-ATTR-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA23:![0-9]+]]
+// SANITIZE-WITH-ATTR-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8, !tbaa [[_ZTS9ANNOTATEDPTR_TBAA23:![0-9]+]]
 // SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i64 8
 // SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4
 // SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM27:%.*]] = sext i32 [[IDX2]] to i64
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP3:%.*]] = zext i32 [[COUNTED_BY_LOAD]] to i64, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP4:%.*]] = icmp ult i64 [[IDXPROM27]], [[TMP3]], !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP4]], label [[CONT32:%.*]], label [[HANDLER_OUT_OF_BOUNDS28:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       handler.out_of_bounds28:
+// SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP4]], label %[[CONT32:.*]], label %[[HANDLER_OUT_OF_BOUNDS28:.*]], !prof [[PROF3]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR:       [[HANDLER_OUT_OF_BOUNDS28]]:
 // SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB42:[0-9]+]], i64 [[IDXPROM27]]) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       cont32:
+// SANITIZE-WITH-ATTR:       [[CONT32]]:
 // SANITIZE-WITH-ATTR-NEXT:    [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i64 12
 // SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX30:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAY]], i64 [[IDXPROM27]]
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP5:%.*]] = tail call i32 @llvm.smax.i32(i32 [[COUNTED_BY_LOAD]], i32 0)
 // SANITIZE-WITH-ATTR-NEXT:    [[CONV:%.*]] = shl i32 [[TMP5]], 2
-// SANITIZE-WITH-ATTR-NEXT:    store i32 [[CONV]], ptr [[ARRAYIDX30]], align 4, !tbaa [[TBAA4]]
+// SANITIZE-WITH-ATTR-NEXT:    store i32 [[CONV]], ptr [[ARRAYIDX30]], align 4, !tbaa [[INT_TBAA4]]
 // SANITIZE-WITH-ATTR-NEXT:    ret void
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local void @test29(
 // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readonly captures(none) [[ANN:%.*]], i32 noundef [[IDX1:%.*]], i32 noundef [[IDX2:%.*]]) local_unnamed_addr #[[ATTR9:[0-9]+]] {
-// NO-SANITIZE-WITH-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[IDX1]] to i64
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[ANN]], i64 [[IDXPROM]]
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA20:![0-9]+]]
+// NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8, !tbaa [[_ZTS9ANNOTATEDPTR_TBAA20:![0-9]+]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 12
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 8
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4
@@ -1936,37 +1936,37 @@ struct annotated_struct_array {
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[CONV:%.*]] = shl i32 [[TMP1]], 2
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM8:%.*]] = sext i32 [[IDX2]] to i64
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, ptr [[ARRAY]], i64 [[IDXPROM8]]
-// NO-SANITIZE-WITH-ATTR-NEXT:    store i32 [[CONV]], ptr [[ARRAYIDX9]], align 4, !tbaa [[TBAA2]]
+// NO-SANITIZE-WITH-ATTR-NEXT:    store i32 [[CONV]], ptr [[ARRAYIDX9]], align 4, !tbaa [[INT_TBAA2]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    ret void
 //
 // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test29(
 // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[ANN:%.*]], i32 noundef [[IDX1:%.*]], i32 noundef [[IDX2:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[IDX1]], 10
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP1:%.*]] = zext i32 [[IDX1]] to i64
-// SANITIZE-WITHOUT-ATTR-NEXT:    br i1 [[TMP0]], label [[CONT21:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF8]], !nosanitize [[META9]]
-// SANITIZE-WITHOUT-ATTR:       handler.out_of_bounds:
+// SANITIZE-WITHOUT-ATTR-NEXT:    br i1 [[TMP0]], label %[[CONT21:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF8]], !nosanitize [[META9]]
+// SANITIZE-WITHOUT-ATTR:       [[HANDLER_OUT_OF_BOUNDS]]:
 // SANITIZE-WITHOUT-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB13:[0-9]+]], i64 [[TMP1]]) #[[ATTR8]], !nosanitize [[META9]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    unreachable, !nosanitize [[META9]]
-// SANITIZE-WITHOUT-ATTR:       cont21:
+// SANITIZE-WITHOUT-ATTR:       [[CONT21]]:
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw ptr, ptr [[ANN]], i64 [[TMP1]]
-// SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA23:![0-9]+]]
+// SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8, !tbaa [[_ZTS9ANNOTATEDPTR_TBAA23:![0-9]+]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i64 12
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[IDXPROM18:%.*]] = sext i32 [[IDX2]] to i64
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAYIDX19:%.*]] = getelementptr inbounds i32, ptr [[ARRAY]], i64 [[IDXPROM18]]
-// SANITIZE-WITHOUT-ATTR-NEXT:    store i32 -1, ptr [[ARRAYIDX19]], align 4, !tbaa [[TBAA2]]
+// SANITIZE-WITHOUT-ATTR-NEXT:    store i32 -1, ptr [[ARRAYIDX19]], align 4, !tbaa [[INT_TBAA2]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    ret void
 //
 // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test29(
 // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readonly captures(none) [[ANN:%.*]], i32 noundef [[IDX1:%.*]], i32 noundef [[IDX2:%.*]]) local_unnamed_addr #[[ATTR8:[0-9]+]] {
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[IDX1]] to i64
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[ANN]], i64 [[IDXPROM]]
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA20:![0-9]+]]
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8, !tbaa [[_ZTS9ANNOTATEDPTR_TBAA20:![0-9]+]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 12
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[IDXPROM5:%.*]] = sext i32 [[IDX2]] to i64
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[ARRAY]], i64 [[IDXPROM5]]
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:    store i32 -1, ptr [[ARRAYIDX6]], align 4, !tbaa [[TBAA2]]
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:    store i32 -1, ptr [[ARRAYIDX6]], align 4, !tbaa [[INT_TBAA2]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    ret void
 //
 void test29(struct annotated_struct_array *ann, int idx1, int idx2) {
@@ -1986,34 +1986,34 @@ struct test30_struct {
 
 // SANITIZE-WITH-ATTR-LABEL: define dso_local void @test30(
 // SANITIZE-WITH-ATTR-SAME: ptr noundef [[PTR:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR3]] {
-// SANITIZE-WITH-ATTR-NEXT:  entry:
+// SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = zext i32 [[IDX]] to i64, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB44:[0-9]+]], i64 [[TMP0]]) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local void @test30(
 // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef [[PTR:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// NO-SANITIZE-WITH-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[PCPU_REFCNT:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR]], i64 12
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[IDX]] to i64
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[PCPU_REFCNT]], i64 [[IDXPROM]]
-// NO-SANITIZE-WITH-ATTR-NEXT:    store i8 -1, ptr [[ARRAYIDX]], align 1, !tbaa [[TBAA6]]
+// NO-SANITIZE-WITH-ATTR-NEXT:    store i8 -1, ptr [[ARRAYIDX]], align 1, !tbaa [[CHAR_TBAA6]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    ret void
 //
 // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test30(
 // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[PTR:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR3]] {
-// SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP0:%.*]] = zext i32 [[IDX]] to i64, !nosanitize [[META9]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB15:[0-9]+]], i64 [[TMP0]]) #[[ATTR8]], !nosanitize [[META9]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    unreachable, !nosanitize [[META9]]
 //
 // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test30(
 // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[PTR:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[PCPU_REFCNT:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR]], i64 12
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[IDX]] to i64
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[PCPU_REFCNT]], i64 [[IDXPROM]]
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:    store i8 -1, ptr [[ARRAYIDX]], align 1, !tbaa [[TBAA6]]
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:    store i8 -1, ptr [[ARRAYIDX]], align 1, !tbaa [[CHAR_TBAA6]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    ret void
 //
 void test30(struct test30_struct *ptr, int idx) {
@@ -2030,22 +2030,22 @@ struct test31_struct {
 
 // SANITIZE-WITH-ATTR-LABEL: define dso_local i32 @test31(
 // SANITIZE-WITH-ATTR-SAME: ptr noundef [[PTR:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR2]] {
-// SANITIZE-WITH-ATTR-NEXT:  entry:
+// SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITH-ATTR-NEXT:    ret i32 -1
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local i32 @test31(
 // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef [[PTR:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR3]] {
-// NO-SANITIZE-WITH-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    ret i32 -1
 //
 // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i32 @test31(
 // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[PTR:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR2]] {
-// SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    ret i32 -1
 //
 // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i32 @test31(
 // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[PTR:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR1]] {
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    ret i32 -1
 //
 int test31(struct test31_struct *ptr, int idx) {
@@ -2060,24 +2060,24 @@ struct annotated_with_array {
 
 // SANITIZE-WITH-ATTR-LABEL: define dso_local void @test32(
 // SANITIZE-WITH-ATTR-SAME: ptr noundef [[PTR:%.*]], i32 noundef [[IDX1:%.*]], i32 noundef [[IDX2:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITH-ATTR-NEXT:  entry:
+// SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[IDX2]], 43
-// SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP0]], label [[CONT1:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       handler.out_of_bounds:
+// SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP0]], label %[[CONT1:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR:       [[HANDLER_OUT_OF_BOUNDS]]:
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = zext i32 [[IDX2]] to i64, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB46:[0-9]+]], i64 [[TMP1]]) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       cont1:
+// SANITIZE-WITH-ATTR:       [[CONT1]]:
 // SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR]], i64 336
 // SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4
 // SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM4:%.*]] = sext i32 [[IDX1]] to i64
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP2:%.*]] = zext i32 [[COUNTED_BY_LOAD]] to i64, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP3:%.*]] = icmp ult i64 [[IDXPROM4]], [[TMP2]], !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP3]], label [[CONT9:%.*]], label [[HANDLER_OUT_OF_BOUNDS5:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       handler.out_of_bounds5:
+// SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP3]], label %[[CONT9:.*]], label %[[HANDLER_OUT_OF_BOUNDS5:.*]], !prof [[PROF3]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR:       [[HANDLER_OUT_OF_BOUNDS5]]:
 // SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB48:[0-9]+]], i64 [[IDXPROM4]]) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       cont9:
+// SANITIZE-WITH-ATTR:       [[CONT9]]:
 // SANITIZE-WITH-ATTR-NEXT:    [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR]], i64 344
 // SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw i64, ptr [[ARRAY]], i64 [[IDXPROM4]]
 // SANITIZE-WITH-ATTR-NEXT:    [[COUNT:%.*]] = sext i32 [[COUNTED_BY_LOAD]] to i64
@@ -2087,12 +2087,12 @@ struct annotated_with_array {
 // SANITIZE-WITH-ATTR-NEXT:    [[REASS_SUB:%.*]] = sub nsw i64 [[FLEXIBLE_ARRAY_MEMBER_SIZE]], [[FIELD_OFFSET]]
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP5:%.*]] = tail call i64 @llvm.smax.i64(i64 [[REASS_SUB]], i64 -344)
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP6:%.*]] = add nsw i64 [[TMP5]], 344
-// SANITIZE-WITH-ATTR-NEXT:    store i64 [[TMP6]], ptr [[ARRAYIDX7]], align 8, !tbaa [[TBAA25:![0-9]+]]
+// SANITIZE-WITH-ATTR-NEXT:    store i64 [[TMP6]], ptr [[ARRAYIDX7]], align 8, !tbaa [[LONG_TBAA25:![0-9]+]]
 // SANITIZE-WITH-ATTR-NEXT:    ret void
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local void @test32(
 // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef captures(none) [[PTR:%.*]], i32 noundef [[IDX1:%.*]], i32 noundef [[IDX2:%.*]]) local_unnamed_addr #[[ATTR1]] {
-// NO-SANITIZE-WITH-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[IDX2]] to i64
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR]], i64 336
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4
@@ -2107,32 +2107,32 @@ struct annotated_with_array {
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR]], i64 344
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM1:%.*]] = sext i32 [[IDX1]] to i64
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, ptr [[ARRAY]], i64 [[IDXPROM1]]
-// NO-SANITIZE-WITH-ATTR-NEXT:    store i64 [[TMP4]], ptr [[ARRAYIDX2]], align 8, !tbaa [[TBAA22:![0-9]+]]
+// NO-SANITIZE-WITH-ATTR-NEXT:    store i64 [[TMP4]], ptr [[ARRAYIDX2]], align 8, !tbaa [[LONG_TBAA22:![0-9]+]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    ret void
 //
 // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test32(
 // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[PTR:%.*]], i32 noundef [[IDX1:%.*]], i32 noundef [[IDX2:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[IDX2]], 43
-// SANITIZE-WITHOUT-ATTR-NEXT:    br i1 [[TMP0]], label [[CONT7:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF8]], !nosanitize [[META9]]
-// SANITIZE-WITHOUT-ATTR:       handler.out_of_bounds:
+// SANITIZE-WITHOUT-ATTR-NEXT:    br i1 [[TMP0]], label %[[CONT7:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF8]], !nosanitize [[META9]]
+// SANITIZE-WITHOUT-ATTR:       [[HANDLER_OUT_OF_BOUNDS]]:
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP1:%.*]] = zext i32 [[IDX2]] to i64, !nosanitize [[META9]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB17:[0-9]+]], i64 [[TMP1]]) #[[ATTR8]], !nosanitize [[META9]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    unreachable, !nosanitize [[META9]]
-// SANITIZE-WITHOUT-ATTR:       cont7:
+// SANITIZE-WITHOUT-ATTR:       [[CONT7]]:
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR]], i64 344
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[IDXPROM4:%.*]] = sext i32 [[IDX1]] to i64
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i64, ptr [[ARRAY]], i64 [[IDXPROM4]]
-// SANITIZE-WITHOUT-ATTR-NEXT:    store i64 -1, ptr [[ARRAYIDX5]], align 8, !tbaa [[TBAA25:![0-9]+]]
+// SANITIZE-WITHOUT-ATTR-NEXT:    store i64 -1, ptr [[ARRAYIDX5]], align 8, !tbaa [[LONG_TBAA25:![0-9]+]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    ret void
 //
 // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test32(
 // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[PTR:%.*]], i32 noundef [[IDX1:%.*]], i32 noundef [[IDX2:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR]], i64 344
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[IDXPROM1:%.*]] = sext i32 [[IDX1]] to i64
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, ptr [[ARRAY]], i64 [[IDXPROM1]]
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:    store i64 -1, ptr [[ARRAYIDX2]], align 8, !tbaa [[TBAA22:![0-9]+]]
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:    store i64 -1, ptr [[ARRAYIDX2]], align 8, !tbaa [[LONG_TBAA22:![0-9]+]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    ret void
 //
 void test32(struct annotated_with_array *ptr, int idx1, int idx2) {
@@ -2141,14 +2141,14 @@ void test32(struct annotated_with_array *ptr, int idx1, int idx2) {
 
 // SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 0, 17179869521) i64 @test32_bdos(
 // SANITIZE-WITH-ATTR-SAME: ptr noundef [[PTR:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITH-ATTR-NEXT:  entry:
+// SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[INDEX]], 43
-// SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP0]], label [[CONT1:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       handler.out_of_bounds:
+// SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP0]], label %[[CONT1:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR:       [[HANDLER_OUT_OF_BOUNDS]]:
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = zext i32 [[INDEX]] to i64, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB49:[0-9]+]], i64 [[TMP1]]) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       cont1:
+// SANITIZE-WITH-ATTR:       [[CONT1]]:
 // SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR]], i64 336
 // SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4
 // SANITIZE-WITH-ATTR-NEXT:    [[COUNT:%.*]] = sext i32 [[COUNTED_BY_LOAD]] to i64
@@ -2162,7 +2162,7 @@ void test32(struct annotated_with_array *ptr, int idx1, int idx2) {
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 -34359738016, 34359738705) i64 @test32_bdos(
 // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readonly captures(none) [[PTR:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR2]] {
-// NO-SANITIZE-WITH-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR]], i64 336
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4
@@ -2178,19 +2178,19 @@ void test32(struct annotated_with_array *ptr, int idx1, int idx2) {
 //
 // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test32_bdos(
 // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[PTR:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[INDEX]], 43
-// SANITIZE-WITHOUT-ATTR-NEXT:    br i1 [[TMP0]], label [[CONT1:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF8]], !nosanitize [[META9]]
-// SANITIZE-WITHOUT-ATTR:       handler.out_of_bounds:
+// SANITIZE-WITHOUT-ATTR-NEXT:    br i1 [[TMP0]], label %[[CONT1:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF8]], !nosanitize [[META9]]
+// SANITIZE-WITHOUT-ATTR:       [[HANDLER_OUT_OF_BOUNDS]]:
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP1:%.*]] = zext i32 [[INDEX]] to i64
 // SANITIZE-WITHOUT-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB18:[0-9]+]], i64 [[TMP1]]) #[[ATTR8]], !nosanitize [[META9]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    unreachable, !nosanitize [[META9]]
-// SANITIZE-WITHOUT-ATTR:       cont1:
+// SANITIZE-WITHOUT-ATTR:       [[CONT1]]:
 // SANITIZE-WITHOUT-ATTR-NEXT:    ret i64 -1
 //
 // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test32_bdos(
 // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readnone [[PTR:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR1]] {
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    ret i64 -1
 //
 size_t test32_bdos(struct annotated_with_array *ptr, int index) {
@@ -2199,7 +2199,7 @@ size_t test32_bdos(struct annotated_with_array *ptr, int index) {
 
 // SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 -21474836134, 21474836817) i64 @test32_bdos_cast(
 // SANITIZE-WITH-ATTR-SAME: ptr noundef [[PTR:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITH-ATTR-NEXT:  entry:
+// SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
 // SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR]], i64 336
 // SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4
@@ -2216,7 +2216,7 @@ size_t test32_bdos(struct annotated_with_array *ptr, int index) {
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 -21474836134, 21474836817) i64 @test32_bdos_cast(
 // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readonly captures(none) [[PTR:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR2]] {
-// NO-SANITIZE-WITH-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR]], i64 336
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4
@@ -2233,12 +2233,12 @@ size_t test32_bdos(struct annotated_with_array *ptr, int index) {
 //
 // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test32_bdos_cast(
 // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[PTR:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    ret i64 -1
 //
 // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test32_bdos_cast(
 // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readnone [[PTR:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR1]] {
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    ret i64 -1
 //
 size_t test32_bdos_cast(struct annotated_with_array *ptr, int index) {
@@ -2247,22 +2247,22 @@ size_t test32_bdos_cast(struct annotated_with_array *ptr, int index) {
 
 // SANITIZE-WITH-ATTR-LABEL: define dso_local i64 @test33(
 // SANITIZE-WITH-ATTR-SAME: ptr noundef [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITH-ATTR-NEXT:  entry:
+// SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITH-ATTR-NEXT:    ret i64 -1
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local i64 @test33(
 // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR3]] {
-// NO-SANITIZE-WITH-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    ret i64 -1
 //
 // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test33(
 // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    ret i64 -1
 //
 // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test33(
 // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR1]] {
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    ret i64 -1
 //
 size_t test33(struct annotated *ptr) {
@@ -2278,50 +2278,50 @@ struct multi_subscripts {
 
 // SANITIZE-WITH-ATTR-LABEL: define dso_local i64 @test34(
 // SANITIZE-WITH-ATTR-SAME: ptr noundef [[PTR:%.*]], i32 noundef [[IDX1:%.*]], i32 noundef [[IDX2:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITH-ATTR-NEXT:  entry:
+// SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[IDX1]], 42
-// SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP0]], label [[CONT1:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       handler.out_of_bounds:
+// SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP0]], label %[[CONT1:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR:       [[HANDLER_OUT_OF_BOUNDS]]:
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = zext i32 [[IDX1]] to i64, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB51:[0-9]+]], i64 [[TMP1]]) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       cont1:
+// SANITIZE-WITH-ATTR:       [[CONT1]]:
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP2:%.*]] = icmp ult i32 [[IDX2]], 43
-// SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP2]], label [[CONT3:%.*]], label [[HANDLER_OUT_OF_BOUNDS2:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       handler.out_of_bounds2:
+// SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP2]], label %[[CONT3:.*]], label %[[HANDLER_OUT_OF_BOUNDS2:.*]], !prof [[PROF3]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR:       [[HANDLER_OUT_OF_BOUNDS2]]:
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP3:%.*]] = zext i32 [[IDX2]] to i64
 // SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB52:[0-9]+]], i64 [[TMP3]]) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       cont3:
+// SANITIZE-WITH-ATTR:       [[CONT3]]:
 // SANITIZE-WITH-ATTR-NEXT:    ret i64 -1
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local i64 @test34(
 // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readnone [[PTR:%.*]], i32 noundef [[IDX1:%.*]], i32 noundef [[IDX2:%.*]]) local_unnamed_addr #[[ATTR3]] {
-// NO-SANITIZE-WITH-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    ret i64 -1
 //
 // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test34(
 // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[PTR:%.*]], i32 noundef [[IDX1:%.*]], i32 noundef [[IDX2:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[IDX1]], 42
-// SANITIZE-WITHOUT-ATTR-NEXT:    br i1 [[TMP0]], label [[CONT1:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF8]], !nosanitize [[META9]]
-// SANITIZE-WITHOUT-ATTR:       handler.out_of_bounds:
+// SANITIZE-WITHOUT-ATTR-NEXT:    br i1 [[TMP0]], label %[[CONT1:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF8]], !nosanitize [[META9]]
+// SANITIZE-WITHOUT-ATTR:       [[HANDLER_OUT_OF_BOUNDS]]:
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP1:%.*]] = zext i32 [[IDX1]] to i64, !nosanitize [[META9]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB20:[0-9]+]], i64 [[TMP1]]) #[[ATTR8]], !nosanitize [[META9]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    unreachable, !nosanitize [[META9]]
-// SANITIZE-WITHOUT-ATTR:       cont1:
+// SANITIZE-WITHOUT-ATTR:       [[CONT1]]:
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP2:%.*]] = icmp ult i32 [[IDX2]], 43
-// SANITIZE-WITHOUT-ATTR-NEXT:    br i1 [[TMP2]], label [[CONT3:%.*]], label [[HANDLER_OUT_OF_BOUNDS2:%.*]], !prof [[PROF8]], !nosanitize [[META9]]
-// SANITIZE-WITHOUT-ATTR:       handler.out_of_bounds2:
+// SANITIZE-WITHOUT-ATTR-NEXT:    br i1 [[TMP2]], label %[[CONT3:.*]], label %[[HANDLER_OUT_OF_BOUNDS2:.*]], !prof [[PROF8]], !nosanitize [[META9]]
+// SANITIZE-WITHOUT-ATTR:       [[HANDLER_OUT_OF_BOUNDS2]]:
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP3:%.*]] = zext i32 [[IDX2]] to i64
 // SANITIZE-WITHOUT-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB21:[0-9]+]], i64 [[TMP3]]) #[[ATTR8]], !nosanitize [[META9]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    unreachable, !nosanitize [[META9]]
-// SANITIZE-WITHOUT-ATTR:       cont3:
+// SANITIZE-WITHOUT-ATTR:       [[CONT3]]:
 // SANITIZE-WITHOUT-ATTR-NEXT:    ret i64 -1
 //
 // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test34(
 // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readnone [[PTR:%.*]], i32 noundef [[IDX1:%.*]], i32 noundef [[IDX2:%.*]]) local_unnamed_addr #[[ATTR1]] {
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    ret i64 -1
 //
 size_t test34(struct multi_subscripts *ptr, int idx1, int idx2) {
@@ -2330,43 +2330,43 @@ size_t test34(struct multi_subscripts *ptr, int idx1, int idx2) {
 
 // SANITIZE-WITH-ATTR-LABEL: define dso_local void @test35(
 // SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]], i64 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITH-ATTR-NEXT:  entry:
+// SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITH-ATTR-NEXT:    [[DOTCOUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
 // SANITIZE-WITH-ATTR-NEXT:    [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOTCOUNTED_BY_GEP]], align 4
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[INDEX]], [[TMP0]], !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP1]], label [[CONT3:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       handler.out_of_bounds:
+// SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP1]], label %[[CONT3:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR:       [[HANDLER_OUT_OF_BOUNDS]]:
 // SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB53:[0-9]+]], i64 [[INDEX]]) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       cont3:
+// SANITIZE-WITH-ATTR:       [[CONT3]]:
 // SANITIZE-WITH-ATTR-NEXT:    [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12
 // SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAY]], i64 [[INDEX]]
-// SANITIZE-WITH-ATTR-NEXT:    store i32 0, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA4]]
+// SANITIZE-WITH-ATTR-NEXT:    store i32 0, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA4]]
 // SANITIZE-WITH-ATTR-NEXT:    ret void
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local void @test35(
 // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef writeonly captures(none) [[P:%.*]], i64 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// NO-SANITIZE-WITH-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAY]], i64 [[INDEX]]
-// NO-SANITIZE-WITH-ATTR-NEXT:    store i32 0, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]]
+// NO-SANITIZE-WITH-ATTR-NEXT:    store i32 0, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    ret void
 //
 // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test35(
 // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i64 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAY]], i64 [[INDEX]]
-// SANITIZE-WITHOUT-ATTR-NEXT:    store i32 0, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]]
+// SANITIZE-WITHOUT-ATTR-NEXT:    store i32 0, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    ret void
 //
 // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test35(
 // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef writeonly captures(none) [[P:%.*]], i64 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAY]], i64 [[INDEX]]
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:    store i32 0, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]]
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:    store i32 0, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    ret void
 //
 void test35(struct annotated *p, size_t index) {
@@ -2375,22 +2375,22 @@ void test35(struct annotated *p, size_t index) {
 
 // SANITIZE-WITH-ATTR-LABEL: define dso_local noundef i64 @test35_bdos(
 // SANITIZE-WITH-ATTR-SAME: ptr noundef readnone captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR2]] {
-// SANITIZE-WITH-ATTR-NEXT:  entry:
+// SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITH-ATTR-NEXT:    ret i64 0
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local noundef i64 @test35_bdos(
 // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readnone captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR3]] {
-// NO-SANITIZE-WITH-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    ret i64 0
 //
 // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local noundef i64 @test35_bdos(
 // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readnone captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR2]] {
-// SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    ret i64 0
 //
 // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local noundef i64 @test35_bdos(
 // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readnone captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR1]] {
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    ret i64 0
 //
 size_t test35_bdos(struct annotated *p) {
@@ -2412,22 +2412,22 @@ struct {
 
 // SANITIZE-WITH-ATTR-LABEL: define dso_local i64 @test36(
 // SANITIZE-WITH-ATTR-SAME: ) local_unnamed_addr #[[ATTR6:[0-9]+]] {
-// SANITIZE-WITH-ATTR-NEXT:  entry:
+// SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITH-ATTR-NEXT:    ret i64 -1
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local i64 @test36(
 // NO-SANITIZE-WITH-ATTR-SAME: ) local_unnamed_addr #[[ATTR10:[0-9]+]] {
-// NO-SANITIZE-WITH-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    ret i64 -1
 //
 // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test36(
 // SANITIZE-WITHOUT-ATTR-SAME: ) local_unnamed_addr #[[ATTR6:[0-9]+]] {
-// SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    ret i64 -1
 //
 // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test36(
 // NO-SANITIZE-WITHOUT-ATTR-SAME: ) local_unnamed_addr #[[ATTR9:[0-9]+]] {
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    ret i64 -1
 //
 size_t test36() {
@@ -2436,7 +2436,7 @@ size_t test36() {
 
 // SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 -8589934592, 8589934589) i64 @test37(
 // SANITIZE-WITH-ATTR-SAME: ptr noundef [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITH-ATTR-NEXT:  entry:
+// SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR]], i64 8
 // SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4
 // SANITIZE-WITH-ATTR-NEXT:    [[COUNT:%.*]] = sext i32 [[COUNTED_BY_LOAD]] to i64
@@ -2447,7 +2447,7 @@ size_t test36() {
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 -8589934592, 8589934589) i64 @test37(
 // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readonly captures(none) [[PTR:%.*]]) local_unnamed_addr #[[ATTR2]] {
-// NO-SANITIZE-WITH-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR]], i64 8
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[COUNT:%.*]] = sext i32 [[COUNTED_BY_LOAD]] to i64
@@ -2458,14 +2458,113 @@ size_t test36() {
 //
 // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test37(
 // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    ret i64 -1
 //
 // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test37(
 // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR1]] {
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    ret i64 -1
 //
 size_t test37(struct annotated *ptr) {
   return __builtin_dynamic_object_size((1, 2, (4, 5, (7, 8, 9, (10, ptr->array)))), 1);
 }
+//.
+// SANITIZE-WITH-ATTR: [[META2]] = !{}
+// SANITIZE-WITH-ATTR: [[PROF3]] = !{!"branch_weights", i32 1048575, i32 1}
+// SANITIZE-WITH-ATTR: [[INT_TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0}
+// SANITIZE-WITH-ATTR: [[META5]] = !{!"int", [[META6:![0-9]+]], i64 0}
+// SANITIZE-WITH-ATTR: [[META6]] = !{!"omnipotent char", [[META7:![0-9]+]], i64 0}
+// SANITIZE-WITH-ATTR: [[META7]] = !{!"Simple C/C++ TBAA"}
+// SANITIZE-WITH-ATTR: [[PROF8]] = !{!"branch_weights", i32 1, i32 1048575}
+// SANITIZE-WITH-ATTR: [[CHAR_TBAA9]] = !{[[META6]], [[META6]], i64 0}
+// SANITIZE-WITH-ATTR: [[TBAA_STRUCT10]] = !{i64 0, i64 24, [[CHAR_TBAA9]]}
+// SANITIZE-WITH-ATTR: [[_ZTS10TEST13_BARPTR_TBAA11]] = !{[[META12:![0-9]+]], [[META13:![0-9]+]], i64 0}
+// SANITIZE-WITH-ATTR: [[META12]] = !{!"test13_foo", [[META13]], i64 0}
+// SANITIZE-WITH-ATTR: [[META13]] = !{!"p1 _ZTS10test13_bar", [[META14:![0-9]+]], i64 0}
+// SANITIZE-WITH-ATTR: [[META14]] = !{!"any pointer", [[META6]], i64 0}
+// SANITIZE-WITH-ATTR: [[_ZTS10TEST13_FOOPTR_TBAA15]] = !{[[META16:![0-9]+]], [[META16]], i64 0}
+// SANITIZE-WITH-ATTR: [[META16]] = !{!"p1 _ZTS10test13_foo", [[META14]], i64 0}
+// SANITIZE-WITH-ATTR: [[_ZTS9TESTS_FOOPTR_TBAA17]] = !{[[META18:![0-9]+]], [[META18]], i64 0}
+// SANITIZE-WITH-ATTR: [[META18]] = !{!"p1 _ZTS9tests_foo", [[META14]], i64 0}
+// SANITIZE-WITH-ATTR: [[_ZTS10TEST27_BARPTR_TBAA19]] = !{[[META20:![0-9]+]], [[META20]], i64 0}
+// SANITIZE-WITH-ATTR: [[META20]] = !{!"p1 _ZTS10test27_bar", [[META14]], i64 0}
+// SANITIZE-WITH-ATTR: [[_ZTS10TEST28_FOOPTR_TBAA21]] = !{[[META22:![0-9]+]], [[META22]], i64 0}
+// SANITIZE-WITH-ATTR: [[META22]] = !{!"p1 _ZTS10test28_foo", [[META14]], i64 0}
+// SANITIZE-WITH-ATTR: [[_ZTS9ANNOTATEDPTR_TBAA23]] = !{[[META24:![0-9]+]], [[META24]], i64 0}
+// SANITIZE-WITH-ATTR: [[META24]] = !{!"p1 _ZTS9annotated", [[META14]], i64 0}
+// SANITIZE-WITH-ATTR: [[LONG_TBAA25]] = !{[[META26:![0-9]+]], [[META26]], i64 0}
+// SANITIZE-WITH-ATTR: [[META26]] = !{!"long", [[META6]], i64 0}
+//.
+// NO-SANITIZE-WITH-ATTR: [[INT_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0}
+// NO-SANITIZE-WITH-ATTR: [[META3]] = !{!"int", [[META4:![0-9]+]], i64 0}
+// NO-SANITIZE-WITH-ATTR: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0}
+// NO-SANITIZE-WITH-ATTR: [[META5]] = !{!"Simple C/C++ TBAA"}
+// NO-SANITIZE-WITH-ATTR: [[CHAR_TBAA6]] = !{[[META4]], [[META4]], i64 0}
+// NO-SANITIZE-WITH-ATTR: [[TBAA_STRUCT7]] = !{i64 0, i64 24, [[CHAR_TBAA6]]}
+// NO-SANITIZE-WITH-ATTR: [[_ZTS10TEST13_BARPTR_TBAA8]] = !{[[META9:![0-9]+]], [[META10:![0-9]+]], i64 0}
+// NO-SANITIZE-WITH-ATTR: [[META9]] = !{!"test13_foo", [[META10]], i64 0}
+// NO-SANITIZE-WITH-ATTR: [[META10]] = !{!"p1 _ZTS10test13_bar", [[META11:![0-9]+]], i64 0}
+// NO-SANITIZE-WITH-ATTR: [[META11]] = !{!"any pointer", [[META4]], i64 0}
+// NO-SANITIZE-WITH-ATTR: [[_ZTS10TEST13_FOOPTR_TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0}
+// NO-SANITIZE-WITH-ATTR: [[META13]] = !{!"p1 _ZTS10test13_foo", [[META11]], i64 0}
+// NO-SANITIZE-WITH-ATTR: [[_ZTS9TESTS_FOOPTR_TBAA14]] = !{[[META15:![0-9]+]], [[META15]], i64 0}
+// NO-SANITIZE-WITH-ATTR: [[META15]] = !{!"p1 _ZTS9tests_foo", [[META11]], i64 0}
+// NO-SANITIZE-WITH-ATTR: [[_ZTS10TEST27_BARPTR_TBAA16]] = !{[[META17:![0-9]+]], [[META17]], i64 0}
+// NO-SANITIZE-WITH-ATTR: [[META17]] = !{!"p1 _ZTS10test27_bar", [[META11]], i64 0}
+// NO-SANITIZE-WITH-ATTR: [[_ZTS10TEST28_FOOPTR_TBAA18]] = !{[[META19:![0-9]+]], [[META19]], i64 0}
+// NO-SANITIZE-WITH-ATTR: [[META19]] = !{!"p1 _ZTS10test28_foo", [[META11]], i64 0}
+// NO-SANITIZE-WITH-ATTR: [[_ZTS9ANNOTATEDPTR_TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0}
+// NO-SANITIZE-WITH-ATTR: [[META21]] = !{!"p1 _ZTS9annotated", [[META11]], i64 0}
+// NO-SANITIZE-WITH-ATTR: [[LONG_TBAA22]] = !{[[META23:![0-9]+]], [[META23]], i64 0}
+// NO-SANITIZE-WITH-ATTR: [[META23]] = !{!"long", [[META4]], i64 0}
+//.
+// SANITIZE-WITHOUT-ATTR: [[INT_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0}
+// SANITIZE-WITHOUT-ATTR: [[META3]] = !{!"int", [[META4:![0-9]+]], i64 0}
+// SANITIZE-WITHOUT-ATTR: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0}
+// SANITIZE-WITHOUT-ATTR: [[META5]] = !{!"Simple C/C++ TBAA"}
+// SANITIZE-WITHOUT-ATTR: [[CHAR_TBAA6]] = !{[[META4]], [[META4]], i64 0}
+// SANITIZE-WITHOUT-ATTR: [[TBAA_STRUCT7]] = !{i64 0, i64 24, [[CHAR_TBAA6]]}
+// SANITIZE-WITHOUT-ATTR: [[PROF8]] = !{!"branch_weights", i32 1048575, i32 1}
+// SANITIZE-WITHOUT-ATTR: [[META9]] = !{}
+// SANITIZE-WITHOUT-ATTR: [[PROF10]] = !{!"branch_weights", i32 1, i32 1048575}
+// SANITIZE-WITHOUT-ATTR: [[_ZTS10TEST13_BARPTR_TBAA11]] = !{[[META12:![0-9]+]], [[META13:![0-9]+]], i64 0}
+// SANITIZE-WITHOUT-ATTR: [[META12]] = !{!"test13_foo", [[META13]], i64 0}
+// SANITIZE-WITHOUT-ATTR: [[META13]] = !{!"p1 _ZTS10test13_bar", [[META14:![0-9]+]], i64 0}
+// SANITIZE-WITHOUT-ATTR: [[META14]] = !{!"any pointer", [[META4]], i64 0}
+// SANITIZE-WITHOUT-ATTR: [[_ZTS10TEST13_FOOPTR_TBAA15]] = !{[[META16:![0-9]+]], [[META16]], i64 0}
+// SANITIZE-WITHOUT-ATTR: [[META16]] = !{!"p1 _ZTS10test13_foo", [[META14]], i64 0}
+// SANITIZE-WITHOUT-ATTR: [[_ZTS9TESTS_FOOPTR_TBAA17]] = !{[[META18:![0-9]+]], [[META18]], i64 0}
+// SANITIZE-WITHOUT-ATTR: [[META18]] = !{!"p1 _ZTS9tests_foo", [[META14]], i64 0}
+// SANITIZE-WITHOUT-ATTR: [[_ZTS10TEST27_BARPTR_TBAA19]] = !{[[META20:![0-9]+]], [[META20]], i64 0}
+// SANITIZE-WITHOUT-ATTR: [[META20]] = !{!"p1 _ZTS10test27_bar", [[META14]], i64 0}
+// SANITIZE-WITHOUT-ATTR: [[_ZTS10TEST28_FOOPTR_TBAA21]] = !{[[META22:![0-9]+]], [[META22]], i64 0}
+// SANITIZE-WITHOUT-ATTR: [[META22]] = !{!"p1 _ZTS10test28_foo", [[META14]], i64 0}
+// SANITIZE-WITHOUT-ATTR: [[_ZTS9ANNOTATEDPTR_TBAA23]] = !{[[META24:![0-9]+]], [[META24]], i64 0}
+// SANITIZE-WITHOUT-ATTR: [[META24]] = !{!"p1 _ZTS9annotated", [[META14]], i64 0}
+// SANITIZE-WITHOUT-ATTR: [[LONG_TBAA25]] = !{[[META26:![0-9]+]], [[META26]], i64 0}
+// SANITIZE-WITHOUT-ATTR: [[META26]] = !{!"long", [[META4]], i64 0}
+//.
+// NO-SANITIZE-WITHOUT-ATTR: [[INT_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0}
+// NO-SANITIZE-WITHOUT-ATTR: [[META3]] = !{!"int", [[META4:![0-9]+]], i64 0}
+// NO-SANITIZE-WITHOUT-ATTR: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0}
+// NO-SANITIZE-WITHOUT-ATTR: [[META5]] = !{!"Simple C/C++ TBAA"}
+// NO-SANITIZE-WITHOUT-ATTR: [[CHAR_TBAA6]] = !{[[META4]], [[META4]], i64 0}
+// NO-SANITIZE-WITHOUT-ATTR: [[TBAA_STRUCT7]] = !{i64 0, i64 24, [[CHAR_TBAA6]]}
+// NO-SANITIZE-WITHOUT-ATTR: [[_ZTS10TEST13_BARPTR_TBAA8]] = !{[[META9:![0-9]+]], [[META10:![0-9]+]], i64 0}
+// NO-SANITIZE-WITHOUT-ATTR: [[META9]] = !{!"test13_foo", [[META10]], i64 0}
+// NO-SANITIZE-WITHOUT-ATTR: [[META10]] = !{!"p1 _ZTS10test13_bar", [[META11:![0-9]+]], i64 0}
+// NO-SANITIZE-WITHOUT-ATTR: [[META11]] = !{!"any pointer", [[META4]], i64 0}
+// NO-SANITIZE-WITHOUT-ATTR: [[_ZTS10TEST13_FOOPTR_TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0}
+// NO-SANITIZE-WITHOUT-ATTR: [[META13]] = !{!"p1 _ZTS10test13_foo", [[META11]], i64 0}
+// NO-SANITIZE-WITHOUT-ATTR: [[_ZTS9TESTS_FOOPTR_TBAA14]] = !{[[META15:![0-9]+]], [[META15]], i64 0}
+// NO-SANITIZE-WITHOUT-ATTR: [[META15]] = !{!"p1 _ZTS9tests_foo", [[META11]], i64 0}
+// NO-SANITIZE-WITHOUT-ATTR: [[_ZTS10TEST27_BARPTR_TBAA16]] = !{[[META17:![0-9]+]], [[META17]], i64 0}
+// NO-SANITIZE-WITHOUT-ATTR: [[META17]] = !{!"p1 _ZTS10test27_bar", [[META11]], i64 0}
+// NO-SANITIZE-WITHOUT-ATTR: [[_ZTS10TEST28_FOOPTR_TBAA18]] = !{[[META19:![0-9]+]], [[META19]], i64 0}
+// NO-SANITIZE-WITHOUT-ATTR: [[META19]] = !{!"p1 _ZTS10test28_foo", [[META11]], i64 0}
+// NO-SANITIZE-WITHOUT-ATTR: [[_ZTS9ANNOTATEDPTR_TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0}
+// NO-SANITIZE-WITHOUT-ATTR: [[META21]] = !{!"p1 _ZTS9annotated", [[META11]], i64 0}
+// NO-SANITIZE-WITHOUT-ATTR: [[LONG_TBAA22]] = !{[[META23:![0-9]+]], [[META23]], i64 0}
+// NO-SANITIZE-WITHOUT-ATTR: [[META23]] = !{!"long", [[META4]], i64 0}
+//.
diff --git a/clang/test/CodeGen/builtin-maxnum-minnum.c b/clang/test/CodeGen/builtin-maxnum-minnum.c
index 69cec72..2455f3b6 100644
--- a/clang/test/CodeGen/builtin-maxnum-minnum.c
+++ b/clang/test/CodeGen/builtin-maxnum-minnum.c
@@ -1,4 +1,4 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
 // RUN: %clang_cc1 -x c++ -std=c++20 -disable-llvm-passes -O3 -triple x86_64 %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK
 
 typedef _Float16 half8 __attribute__((ext_vector_type(8)));
@@ -12,10 +12,10 @@ typedef long double ldouble2 __attribute__((ext_vector_type(2)));
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca <8 x half>, align 16
 // CHECK-NEXT:    [[B_ADDR:%.*]] = alloca <8 x half>, align 16
-// CHECK-NEXT:    store <8 x half> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA2:![0-9]+]]
-// CHECK-NEXT:    store <8 x half> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP0:%.*]] = load <8 x half>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = load <8 x half>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x half> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[CHAR_TBAA2:![0-9]+]]
+// CHECK-NEXT:    store <8 x half> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <8 x half>, ptr [[A_ADDR]], align 16, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load <8 x half>, ptr [[B_ADDR]], align 16, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[ELT_MINNUM:%.*]] = call <8 x half> @llvm.minnum.v8f16(<8 x half> [[TMP0]], <8 x half> [[TMP1]])
 // CHECK-NEXT:    ret <8 x half> [[ELT_MINNUM]]
 //
@@ -27,10 +27,10 @@ half8 pfmin16(half8 a, half8 b) {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca <8 x bfloat>, align 16
 // CHECK-NEXT:    [[B_ADDR:%.*]] = alloca <8 x bfloat>, align 16
-// CHECK-NEXT:    store <8 x bfloat> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]]
-// CHECK-NEXT:    store <8 x bfloat> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP0:%.*]] = load <8 x bfloat>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = load <8 x bfloat>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x bfloat> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    store <8 x bfloat> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <8 x bfloat>, ptr [[A_ADDR]], align 16, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load <8 x bfloat>, ptr [[B_ADDR]], align 16, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[ELT_MINNUM:%.*]] = call <8 x bfloat> @llvm.minnum.v8bf16(<8 x bfloat> [[TMP0]], <8 x bfloat> [[TMP1]])
 // CHECK-NEXT:    ret <8 x bfloat> [[ELT_MINNUM]]
 //
@@ -42,10 +42,10 @@ bf16x8 pfmin16b(bf16x8 a, bf16x8 b) {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca <4 x float>, align 16
 // CHECK-NEXT:    [[B_ADDR:%.*]] = alloca <4 x float>, align 16
-// CHECK-NEXT:    store <4 x float> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]]
-// CHECK-NEXT:    store <4 x float> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x float> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    store <4 x float> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A_ADDR]], align 16, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[B_ADDR]], align 16, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[ELT_MINNUM:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
 // CHECK-NEXT:    ret <4 x float> [[ELT_MINNUM]]
 //
@@ -57,10 +57,10 @@ float4 pfmin32(float4 a, float4 b) {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca <2 x double>, align 16
 // CHECK-NEXT:    [[B_ADDR:%.*]] = alloca <2 x double>, align 16
-// CHECK-NEXT:    store <2 x double> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]]
-// CHECK-NEXT:    store <2 x double> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP0:%.*]] = load <2 x double>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <2 x double> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    store <2 x double> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <2 x double>, ptr [[A_ADDR]], align 16, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr [[B_ADDR]], align 16, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[ELT_MINNUM:%.*]] = call <2 x double> @llvm.minnum.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
 // CHECK-NEXT:    ret <2 x double> [[ELT_MINNUM]]
 //
@@ -72,12 +72,12 @@ double2 pfmin64(double2 a, double2 b) {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca <2 x x86_fp80>, align 32
 // CHECK-NEXT:    [[B_ADDR:%.*]] = alloca <2 x x86_fp80>, align 32
-// CHECK-NEXT:    [[A:%.*]] = load <2 x x86_fp80>, ptr [[TMP0]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[B:%.*]] = load <2 x x86_fp80>, ptr [[TMP1]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    store <2 x x86_fp80> [[A]], ptr [[A_ADDR]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    store <2 x x86_fp80> [[B]], ptr [[B_ADDR]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP2:%.*]] = load <2 x x86_fp80>, ptr [[A_ADDR]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP3:%.*]] = load <2 x x86_fp80>, ptr [[B_ADDR]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[A:%.*]] = load <2 x x86_fp80>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[B:%.*]] = load <2 x x86_fp80>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    store <2 x x86_fp80> [[A]], ptr [[A_ADDR]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    store <2 x x86_fp80> [[B]], ptr [[B_ADDR]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[TMP2:%.*]] = load <2 x x86_fp80>, ptr [[A_ADDR]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[TMP3:%.*]] = load <2 x x86_fp80>, ptr [[B_ADDR]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[ELT_MINNUM:%.*]] = call <2 x x86_fp80> @llvm.minnum.v2f80(<2 x x86_fp80> [[TMP2]], <2 x x86_fp80> [[TMP3]])
 // CHECK-NEXT:    ret <2 x x86_fp80> [[ELT_MINNUM]]
 //
@@ -90,10 +90,10 @@ ldouble2 pfmin80(ldouble2 a, ldouble2 b) {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca <8 x half>, align 16
 // CHECK-NEXT:    [[B_ADDR:%.*]] = alloca <8 x half>, align 16
-// CHECK-NEXT:    store <8 x half> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]]
-// CHECK-NEXT:    store <8 x half> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP0:%.*]] = load <8 x half>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = load <8 x half>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x half> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    store <8 x half> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <8 x half>, ptr [[A_ADDR]], align 16, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load <8 x half>, ptr [[B_ADDR]], align 16, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[ELT_MAXNUM:%.*]] = call <8 x half> @llvm.maxnum.v8f16(<8 x half> [[TMP0]], <8 x half> [[TMP1]])
 // CHECK-NEXT:    ret <8 x half> [[ELT_MAXNUM]]
 //
@@ -105,10 +105,10 @@ half8 pfmax16(half8 a, half8 b) {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca <8 x bfloat>, align 16
 // CHECK-NEXT:    [[B_ADDR:%.*]] = alloca <8 x bfloat>, align 16
-// CHECK-NEXT:    store <8 x bfloat> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]]
-// CHECK-NEXT:    store <8 x bfloat> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP0:%.*]] = load <8 x bfloat>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = load <8 x bfloat>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x bfloat> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    store <8 x bfloat> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <8 x bfloat>, ptr [[A_ADDR]], align 16, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load <8 x bfloat>, ptr [[B_ADDR]], align 16, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[ELT_MAXNUM:%.*]] = call <8 x bfloat> @llvm.maxnum.v8bf16(<8 x bfloat> [[TMP0]], <8 x bfloat> [[TMP1]])
 // CHECK-NEXT:    ret <8 x bfloat> [[ELT_MAXNUM]]
 //
@@ -120,10 +120,10 @@ bf16x8 pfmax16b(bf16x8 a, bf16x8 b) {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca <4 x float>, align 16
 // CHECK-NEXT:    [[B_ADDR:%.*]] = alloca <4 x float>, align 16
-// CHECK-NEXT:    store <4 x float> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]]
-// CHECK-NEXT:    store <4 x float> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x float> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    store <4 x float> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A_ADDR]], align 16, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[B_ADDR]], align 16, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[ELT_MAXNUM:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
 // CHECK-NEXT:    ret <4 x float> [[ELT_MAXNUM]]
 //
@@ -135,10 +135,10 @@ float4 pfmax32(float4 a, float4 b) {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca <2 x double>, align 16
 // CHECK-NEXT:    [[B_ADDR:%.*]] = alloca <2 x double>, align 16
-// CHECK-NEXT:    store <2 x double> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]]
-// CHECK-NEXT:    store <2 x double> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP0:%.*]] = load <2 x double>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <2 x double> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    store <2 x double> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <2 x double>, ptr [[A_ADDR]], align 16, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr [[B_ADDR]], align 16, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[ELT_MAXNUM:%.*]] = call <2 x double> @llvm.maxnum.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
 // CHECK-NEXT:    ret <2 x double> [[ELT_MAXNUM]]
 //
@@ -151,12 +151,12 @@ double2 pfmax64(double2 a, double2 b) {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca <2 x x86_fp80>, align 32
 // CHECK-NEXT:    [[B_ADDR:%.*]] = alloca <2 x x86_fp80>, align 32
-// CHECK-NEXT:    [[A:%.*]] = load <2 x x86_fp80>, ptr [[TMP0]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[B:%.*]] = load <2 x x86_fp80>, ptr [[TMP1]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    store <2 x x86_fp80> [[A]], ptr [[A_ADDR]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    store <2 x x86_fp80> [[B]], ptr [[B_ADDR]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP2:%.*]] = load <2 x x86_fp80>, ptr [[A_ADDR]], align 32, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP3:%.*]] = load <2 x x86_fp80>, ptr [[B_ADDR]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[A:%.*]] = load <2 x x86_fp80>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[B:%.*]] = load <2 x x86_fp80>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    store <2 x x86_fp80> [[A]], ptr [[A_ADDR]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    store <2 x x86_fp80> [[B]], ptr [[B_ADDR]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[TMP2:%.*]] = load <2 x x86_fp80>, ptr [[A_ADDR]], align 32, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[TMP3:%.*]] = load <2 x x86_fp80>, ptr [[B_ADDR]], align 32, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[ELT_MINNUM:%.*]] = call <2 x x86_fp80> @llvm.minnum.v2f80(<2 x x86_fp80> [[TMP2]], <2 x x86_fp80> [[TMP3]])
 // CHECK-NEXT:    ret <2 x x86_fp80> [[ELT_MINNUM]]
 //
@@ -165,7 +165,7 @@ ldouble2 pfmax80(ldouble2 a, ldouble2 b) {
 }
 
 //.
-// CHECK: [[TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0}
+// CHECK: [[CHAR_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0}
 // CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0}
 // CHECK: [[META4]] = !{!"Simple C++ TBAA"}
 //.
diff --git a/clang/test/CodeGen/cleanup-destslot-simple.c b/clang/test/CodeGen/cleanup-destslot-simple.c
index 8ace332..23a70d4 100644
--- a/clang/test/CodeGen/cleanup-destslot-simple.c
+++ b/clang/test/CodeGen/cleanup-destslot-simple.c
@@ -1,4 +1,4 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
 // RUN: %clang_cc1 -O1 -triple x86_64-none-linux-gnu -emit-llvm -debug-info-kind=line-tables-only %s -o - | FileCheck %s --check-prefix=CHECK-LIFETIME
 
 // We shouldn't have markers at -O0 or with msan.
@@ -9,22 +9,24 @@
 // There is no exception to handle here, lifetime.end is not a destructor,
 // so there is no need have cleanup dest slot related code
 
-// CHECK-LIFETIME-LABEL: @test(
-// CHECK-LIFETIME-NEXT:  entry:
+// CHECK-LIFETIME-LABEL: define dso_local i32 @test(
+// CHECK-LIFETIME-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] !dbg [[DBG5:![0-9]+]] {
+// CHECK-LIFETIME-NEXT:  [[ENTRY:.*:]]
 // CHECK-LIFETIME-NEXT:    [[X:%.*]] = alloca i32, align 4
 // CHECK-LIFETIME-NEXT:    [[P:%.*]] = alloca ptr, align 8
 // CHECK-LIFETIME-NEXT:    call void @llvm.lifetime.start.p0(ptr nonnull [[X]]) #[[ATTR2:[0-9]+]], !dbg [[DBG9:![0-9]+]]
-// CHECK-LIFETIME-NEXT:    store i32 3, ptr [[X]], align 4, !dbg [[DBG10:![0-9]+]], !tbaa [[TBAA11:![0-9]+]]
+// CHECK-LIFETIME-NEXT:    store i32 3, ptr [[X]], align 4, !dbg [[DBG10:![0-9]+]], !tbaa [[INT_TBAA11:![0-9]+]]
 // CHECK-LIFETIME-NEXT:    call void @llvm.lifetime.start.p0(ptr nonnull [[P]]), !dbg [[DBG15:![0-9]+]]
-// CHECK-LIFETIME-NEXT:    store volatile ptr [[X]], ptr [[P]], align 8, !dbg [[DBG16:![0-9]+]], !tbaa [[TBAA17:![0-9]+]]
-// CHECK-LIFETIME-NEXT:    [[P_0_P_0_P_0_P_0_:%.*]] = load volatile ptr, ptr [[P]], align 8, !dbg [[DBG19:![0-9]+]], !tbaa [[TBAA17]]
-// CHECK-LIFETIME-NEXT:    [[TMP0:%.*]] = load i32, ptr [[P_0_P_0_P_0_P_0_]], align 4, !dbg [[DBG20:![0-9]+]], !tbaa [[TBAA11]]
-// CHECK-LIFETIME-NEXT:    call void @llvm.lifetime.end.p0(ptr nonnull [[P]]), !dbg [[DBG21:![0-9]+]]
-// CHECK-LIFETIME-NEXT:    call void @llvm.lifetime.end.p0(ptr nonnull [[X]]) #[[ATTR2]], !dbg [[DBG21]]
-// CHECK-LIFETIME-NEXT:    ret i32 [[TMP0]], !dbg [[DBG22:![0-9]+]]
+// CHECK-LIFETIME-NEXT:    store volatile ptr [[X]], ptr [[P]], align 8, !dbg [[DBG16:![0-9]+]], !tbaa [[INTPTR_TBAA17:![0-9]+]]
+// CHECK-LIFETIME-NEXT:    [[P_0_P_0_P_0_P_0_:%.*]] = load volatile ptr, ptr [[P]], align 8, !dbg [[DBG20:![0-9]+]], !tbaa [[INTPTR_TBAA17]]
+// CHECK-LIFETIME-NEXT:    [[TMP0:%.*]] = load i32, ptr [[P_0_P_0_P_0_P_0_]], align 4, !dbg [[DBG21:![0-9]+]], !tbaa [[INT_TBAA11]]
+// CHECK-LIFETIME-NEXT:    call void @llvm.lifetime.end.p0(ptr nonnull [[P]]), !dbg [[DBG22:![0-9]+]]
+// CHECK-LIFETIME-NEXT:    call void @llvm.lifetime.end.p0(ptr nonnull [[X]]) #[[ATTR2]], !dbg [[DBG22]]
+// CHECK-LIFETIME-NEXT:    ret i32 [[TMP0]], !dbg [[DBG23:![0-9]+]]
 //
-// CHECK-OPTNONE-LABEL: @test(
-// CHECK-OPTNONE-NEXT:  entry:
+// CHECK-OPTNONE-LABEL: define dso_local i32 @test(
+// CHECK-OPTNONE-SAME: ) #[[ATTR0:[0-9]+]] !dbg [[DBG5:![0-9]+]] {
+// CHECK-OPTNONE-NEXT:  [[ENTRY:.*:]]
 // CHECK-OPTNONE-NEXT:    [[X:%.*]] = alloca i32, align 4
 // CHECK-OPTNONE-NEXT:    [[P:%.*]] = alloca ptr, align 8
 // CHECK-OPTNONE-NEXT:    store i32 3, ptr [[X]], align 4, !dbg [[DBG9:![0-9]+]]
@@ -33,87 +35,89 @@
 // CHECK-OPTNONE-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4, !dbg [[DBG12:![0-9]+]]
 // CHECK-OPTNONE-NEXT:    ret i32 [[TMP1]], !dbg [[DBG13:![0-9]+]]
 //
-// CHECK-MSAN-LABEL: @test(
-// CHECK-MSAN-NEXT:  entry:
+// CHECK-MSAN-LABEL: define dso_local noundef i32 @test(
+// CHECK-MSAN-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] !dbg [[DBG6:![0-9]+]] {
+// CHECK-MSAN-NEXT:  [[ENTRY:.*:]]
 // CHECK-MSAN-NEXT:    [[X:%.*]] = alloca i32, align 4
 // CHECK-MSAN-NEXT:    [[P:%.*]] = alloca ptr, align 8
-// CHECK-MSAN-NEXT:    call void @llvm.lifetime.start.p0(ptr nonnull [[X]]) #[[ATTR2:[0-9]+]], !dbg [[DBG9:![0-9]+]]
-// CHECK-MSAN-NEXT:    [[TMP0:%.*]] = ptrtoint ptr [[X]] to i64, !dbg [[DBG9]]
-// CHECK-MSAN-NEXT:    [[TMP1:%.*]] = xor i64 [[TMP0]], 87960930222080, !dbg [[DBG9]]
-// CHECK-MSAN-NEXT:    [[TMP2:%.*]] = inttoptr i64 [[TMP1]] to ptr, !dbg [[DBG9]]
-// CHECK-MSAN-NEXT:    store i32 0, ptr [[TMP2]], align 4, !dbg [[DBG10:![0-9]+]]
-// CHECK-MSAN-NEXT:    store i32 3, ptr [[X]], align 4, !dbg [[DBG10]], !tbaa [[TBAA11:![0-9]+]]
-// CHECK-MSAN-NEXT:    call void @llvm.lifetime.start.p0(ptr nonnull [[P]]), !dbg [[DBG15:![0-9]+]]
-// CHECK-MSAN-NEXT:    [[TMP3:%.*]] = ptrtoint ptr [[P]] to i64, !dbg [[DBG15]]
-// CHECK-MSAN-NEXT:    [[TMP4:%.*]] = xor i64 [[TMP3]], 87960930222080, !dbg [[DBG15]]
-// CHECK-MSAN-NEXT:    [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr, !dbg [[DBG15]]
-// CHECK-MSAN-NEXT:    store i64 0, ptr [[TMP5]], align 8, !dbg [[DBG16:![0-9]+]]
-// CHECK-MSAN-NEXT:    store volatile ptr [[X]], ptr [[P]], align 8, !dbg [[DBG16]], !tbaa [[TBAA17:![0-9]+]]
-// CHECK-MSAN-NEXT:    [[P_0_P_0_P_0_P_0_:%.*]] = load volatile ptr, ptr [[P]], align 8, !dbg [[DBG19:![0-9]+]], !tbaa [[TBAA17]]
-// CHECK-MSAN-NEXT:    [[_MSLD:%.*]] = load i64, ptr [[TMP5]], align 8, !dbg [[DBG19]]
-// CHECK-MSAN-NEXT:    [[_MSCMP_NOT:%.*]] = icmp eq i64 [[_MSLD]], 0, !dbg [[DBG20:![0-9]+]]
-// CHECK-MSAN-NEXT:    br i1 [[_MSCMP_NOT]], label [[TMP7:%.*]], label [[TMP6:%.*]], !dbg [[DBG20]], !prof [[PROF21:![0-9]+]]
-// CHECK-MSAN:       6:
-// CHECK-MSAN-NEXT:    call void @__msan_warning_noreturn() #[[ATTR3:[0-9]+]], !dbg [[DBG20]]
-// CHECK-MSAN-NEXT:    unreachable, !dbg [[DBG20]]
-// CHECK-MSAN:       7:
-// CHECK-MSAN-NEXT:    [[TMP8:%.*]] = load i32, ptr [[P_0_P_0_P_0_P_0_]], align 4, !dbg [[DBG20]], !tbaa [[TBAA11]]
-// CHECK-MSAN-NEXT:    [[TMP9:%.*]] = ptrtoint ptr [[P_0_P_0_P_0_P_0_]] to i64, !dbg [[DBG20]]
-// CHECK-MSAN-NEXT:    [[TMP10:%.*]] = xor i64 [[TMP9]], 87960930222080, !dbg [[DBG20]]
-// CHECK-MSAN-NEXT:    [[TMP11:%.*]] = inttoptr i64 [[TMP10]] to ptr, !dbg [[DBG20]]
-// CHECK-MSAN-NEXT:    [[_MSLD1:%.*]] = load i32, ptr [[TMP11]], align 4, !dbg [[DBG20]]
-// CHECK-MSAN-NEXT:    call void @llvm.lifetime.end.p0(ptr nonnull [[P]]), !dbg [[DBG22:![0-9]+]]
-// CHECK-MSAN-NEXT:    call void @llvm.lifetime.end.p0(ptr nonnull [[X]]) #[[ATTR2]], !dbg [[DBG22]]
-// CHECK-MSAN-NEXT:    [[_MSCMP2_NOT:%.*]] = icmp eq i32 [[_MSLD1]], 0, !dbg [[DBG23:![0-9]+]]
-// CHECK-MSAN-NEXT:    br i1 [[_MSCMP2_NOT]], label [[TMP13:%.*]], label [[TMP12:%.*]], !dbg [[DBG23]], !prof [[PROF21]]
-// CHECK-MSAN:       12:
-// CHECK-MSAN-NEXT:    call void @__msan_warning_noreturn() #[[ATTR3]], !dbg [[DBG23]]
-// CHECK-MSAN-NEXT:    unreachable, !dbg [[DBG23]]
-// CHECK-MSAN:       13:
-// CHECK-MSAN-NEXT:    ret i32 [[TMP8]], !dbg [[DBG23]]
+// CHECK-MSAN-NEXT:    call void @llvm.lifetime.start.p0(ptr nonnull [[X]]) #[[ATTR3:[0-9]+]], !dbg [[DBG10:![0-9]+]]
+// CHECK-MSAN-NEXT:    [[TMP0:%.*]] = ptrtoint ptr [[X]] to i64, !dbg [[DBG10]]
+// CHECK-MSAN-NEXT:    [[TMP1:%.*]] = xor i64 [[TMP0]], 87960930222080, !dbg [[DBG10]]
+// CHECK-MSAN-NEXT:    [[TMP2:%.*]] = inttoptr i64 [[TMP1]] to ptr, !dbg [[DBG10]]
+// CHECK-MSAN-NEXT:    store i32 0, ptr [[TMP2]], align 4, !dbg [[DBG11:![0-9]+]]
+// CHECK-MSAN-NEXT:    store i32 3, ptr [[X]], align 4, !dbg [[DBG11]], !tbaa [[INT_TBAA12:![0-9]+]]
+// CHECK-MSAN-NEXT:    call void @llvm.lifetime.start.p0(ptr nonnull [[P]]), !dbg [[DBG16:![0-9]+]]
+// CHECK-MSAN-NEXT:    [[TMP3:%.*]] = ptrtoint ptr [[P]] to i64, !dbg [[DBG16]]
+// CHECK-MSAN-NEXT:    [[TMP4:%.*]] = xor i64 [[TMP3]], 87960930222080, !dbg [[DBG16]]
+// CHECK-MSAN-NEXT:    [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr, !dbg [[DBG16]]
+// CHECK-MSAN-NEXT:    store i64 0, ptr [[TMP5]], align 8, !dbg [[DBG17:![0-9]+]]
+// CHECK-MSAN-NEXT:    store volatile ptr [[X]], ptr [[P]], align 8, !dbg [[DBG17]], !tbaa [[INTPTR_TBAA18:![0-9]+]]
+// CHECK-MSAN-NEXT:    [[P_0_P_0_P_0_P_0_:%.*]] = load volatile ptr, ptr [[P]], align 8, !dbg [[DBG21:![0-9]+]], !tbaa [[INTPTR_TBAA18]]
+// CHECK-MSAN-NEXT:    [[_MSLD:%.*]] = load i64, ptr [[TMP5]], align 8, !dbg [[DBG21]]
+// CHECK-MSAN-NEXT:    [[_MSCMP_NOT:%.*]] = icmp eq i64 [[_MSLD]], 0, !dbg [[DBG22:![0-9]+]]
+// CHECK-MSAN-NEXT:    br i1 [[_MSCMP_NOT]], label %[[BB7:.*]], label %[[BB6:.*]], !dbg [[DBG22]], !prof [[PROF23:![0-9]+]]
+// CHECK-MSAN:       [[BB6]]:
+// CHECK-MSAN-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4:[0-9]+]], !dbg [[DBG22]]
+// CHECK-MSAN-NEXT:    unreachable, !dbg [[DBG22]]
+// CHECK-MSAN:       [[BB7]]:
+// CHECK-MSAN-NEXT:    [[TMP8:%.*]] = load i32, ptr [[P_0_P_0_P_0_P_0_]], align 4, !dbg [[DBG22]], !tbaa [[INT_TBAA12]]
+// CHECK-MSAN-NEXT:    [[TMP9:%.*]] = ptrtoint ptr [[P_0_P_0_P_0_P_0_]] to i64, !dbg [[DBG22]]
+// CHECK-MSAN-NEXT:    [[TMP10:%.*]] = xor i64 [[TMP9]], 87960930222080, !dbg [[DBG22]]
+// CHECK-MSAN-NEXT:    [[TMP11:%.*]] = inttoptr i64 [[TMP10]] to ptr, !dbg [[DBG22]]
+// CHECK-MSAN-NEXT:    [[_MSLD1:%.*]] = load i32, ptr [[TMP11]], align 4, !dbg [[DBG22]]
+// CHECK-MSAN-NEXT:    call void @llvm.lifetime.end.p0(ptr nonnull [[P]]), !dbg [[DBG24:![0-9]+]]
+// CHECK-MSAN-NEXT:    call void @llvm.lifetime.end.p0(ptr nonnull [[X]]) #[[ATTR3]], !dbg [[DBG24]]
+// CHECK-MSAN-NEXT:    [[_MSCMP2_NOT:%.*]] = icmp eq i32 [[_MSLD1]], 0, !dbg [[DBG25:![0-9]+]]
+// CHECK-MSAN-NEXT:    br i1 [[_MSCMP2_NOT]], label %[[BB13:.*]], label %[[BB12:.*]], !dbg [[DBG25]], !prof [[PROF23]]
+// CHECK-MSAN:       [[BB12]]:
+// CHECK-MSAN-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]], !dbg [[DBG25]]
+// CHECK-MSAN-NEXT:    unreachable, !dbg [[DBG25]]
+// CHECK-MSAN:       [[BB13]]:
+// CHECK-MSAN-NEXT:    ret i32 [[TMP8]], !dbg [[DBG25]]
 //
-// CHECK-KMSAN-LABEL: @test(
-// CHECK-KMSAN-NEXT:  entry:
+// CHECK-KMSAN-LABEL: define dso_local i32 @test(
+// CHECK-KMSAN-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] !dbg [[DBG6:![0-9]+]] {
+// CHECK-KMSAN-NEXT:  [[ENTRY:.*:]]
 // CHECK-KMSAN-NEXT:    [[TMP0:%.*]] = call ptr @__msan_get_context_state() #[[ATTR2:[0-9]+]]
 // CHECK-KMSAN-NEXT:    [[X:%.*]] = alloca i32, align 4
 // CHECK-KMSAN-NEXT:    [[P:%.*]] = alloca ptr, align 8
-// CHECK-KMSAN-NEXT:    call void @llvm.lifetime.start.p0(ptr nonnull [[X]]) #[[ATTR2]], !dbg [[DBG9:![0-9]+]]
-// CHECK-KMSAN-NEXT:    call void @__msan_poison_alloca(ptr nonnull [[X]], i64 4, ptr nonnull @[[GLOB0:[0-9]+]]) #[[ATTR2]], !dbg [[DBG9]]
-// CHECK-KMSAN-NEXT:    [[TMP1:%.*]] = call { ptr, ptr } @__msan_metadata_ptr_for_store_4(ptr nonnull [[X]]) #[[ATTR2]], !dbg [[DBG10:![0-9]+]]
-// CHECK-KMSAN-NEXT:    [[TMP2:%.*]] = extractvalue { ptr, ptr } [[TMP1]], 0, !dbg [[DBG10]]
-// CHECK-KMSAN-NEXT:    store i32 0, ptr [[TMP2]], align 4, !dbg [[DBG10]]
-// CHECK-KMSAN-NEXT:    store i32 3, ptr [[X]], align 4, !dbg [[DBG10]], !tbaa [[TBAA11:![0-9]+]]
-// CHECK-KMSAN-NEXT:    call void @llvm.lifetime.start.p0(ptr nonnull [[P]]), !dbg [[DBG15:![0-9]+]]
-// CHECK-KMSAN-NEXT:    call void @__msan_poison_alloca(ptr nonnull [[P]], i64 8, ptr nonnull @[[GLOB1:[0-9]+]]) #[[ATTR2]], !dbg [[DBG15]]
-// CHECK-KMSAN-NEXT:    [[TMP3:%.*]] = call { ptr, ptr } @__msan_metadata_ptr_for_store_8(ptr nonnull [[P]]) #[[ATTR2]], !dbg [[DBG16:![0-9]+]]
-// CHECK-KMSAN-NEXT:    [[TMP4:%.*]] = extractvalue { ptr, ptr } [[TMP3]], 0, !dbg [[DBG16]]
-// CHECK-KMSAN-NEXT:    store i64 0, ptr [[TMP4]], align 8, !dbg [[DBG16]]
-// CHECK-KMSAN-NEXT:    store volatile ptr [[X]], ptr [[P]], align 8, !dbg [[DBG16]], !tbaa [[TBAA17:![0-9]+]]
-// CHECK-KMSAN-NEXT:    [[P_0_P_0_P_0_P_0_:%.*]] = load volatile ptr, ptr [[P]], align 8, !dbg [[DBG19:![0-9]+]], !tbaa [[TBAA17]]
-// CHECK-KMSAN-NEXT:    [[TMP5:%.*]] = call { ptr, ptr } @__msan_metadata_ptr_for_load_8(ptr nonnull [[P]]) #[[ATTR2]], !dbg [[DBG19]]
-// CHECK-KMSAN-NEXT:    [[TMP6:%.*]] = extractvalue { ptr, ptr } [[TMP5]], 0, !dbg [[DBG19]]
-// CHECK-KMSAN-NEXT:    [[_MSLD:%.*]] = load i64, ptr [[TMP6]], align 8, !dbg [[DBG19]]
-// CHECK-KMSAN-NEXT:    [[_MSCMP_NOT:%.*]] = icmp eq i64 [[_MSLD]], 0, !dbg [[DBG20:![0-9]+]]
-// CHECK-KMSAN-NEXT:    br i1 [[_MSCMP_NOT]], label [[TMP10:%.*]], label [[TMP7:%.*]], !dbg [[DBG20]], !prof [[PROF21:![0-9]+]]
-// CHECK-KMSAN:       7:
-// CHECK-KMSAN-NEXT:    [[TMP8:%.*]] = extractvalue { ptr, ptr } [[TMP5]], 1, !dbg [[DBG19]]
-// CHECK-KMSAN-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 8, !dbg [[DBG19]]
-// CHECK-KMSAN-NEXT:    call void @__msan_warning(i32 [[TMP9]]) #[[ATTR3:[0-9]+]], !dbg [[DBG20]]
-// CHECK-KMSAN-NEXT:    br label [[TMP10]], !dbg [[DBG20]]
-// CHECK-KMSAN:       10:
+// CHECK-KMSAN-NEXT:    call void @llvm.lifetime.start.p0(ptr nonnull [[X]]) #[[ATTR2]], !dbg [[DBG10:![0-9]+]]
+// CHECK-KMSAN-NEXT:    call void @__msan_poison_alloca(ptr nonnull [[X]], i64 4, ptr nonnull @[[GLOB0:[0-9]+]]) #[[ATTR2]], !dbg [[DBG10]]
+// CHECK-KMSAN-NEXT:    [[TMP1:%.*]] = call { ptr, ptr } @__msan_metadata_ptr_for_store_4(ptr nonnull [[X]]) #[[ATTR2]], !dbg [[DBG11:![0-9]+]]
+// CHECK-KMSAN-NEXT:    [[TMP2:%.*]] = extractvalue { ptr, ptr } [[TMP1]], 0, !dbg [[DBG11]]
+// CHECK-KMSAN-NEXT:    store i32 0, ptr [[TMP2]], align 4, !dbg [[DBG11]]
+// CHECK-KMSAN-NEXT:    store i32 3, ptr [[X]], align 4, !dbg [[DBG11]], !tbaa [[INT_TBAA12:![0-9]+]]
+// CHECK-KMSAN-NEXT:    call void @llvm.lifetime.start.p0(ptr nonnull [[P]]), !dbg [[DBG16:![0-9]+]]
+// CHECK-KMSAN-NEXT:    call void @__msan_poison_alloca(ptr nonnull [[P]], i64 8, ptr nonnull @[[GLOB1:[0-9]+]]) #[[ATTR2]], !dbg [[DBG16]]
+// CHECK-KMSAN-NEXT:    [[TMP3:%.*]] = call { ptr, ptr } @__msan_metadata_ptr_for_store_8(ptr nonnull [[P]]) #[[ATTR2]], !dbg [[DBG17:![0-9]+]]
+// CHECK-KMSAN-NEXT:    [[TMP4:%.*]] = extractvalue { ptr, ptr } [[TMP3]], 0, !dbg [[DBG17]]
+// CHECK-KMSAN-NEXT:    store i64 0, ptr [[TMP4]], align 8, !dbg [[DBG17]]
+// CHECK-KMSAN-NEXT:    store volatile ptr [[X]], ptr [[P]], align 8, !dbg [[DBG17]], !tbaa [[INTPTR_TBAA18:![0-9]+]]
+// CHECK-KMSAN-NEXT:    [[P_0_P_0_P_0_P_0_:%.*]] = load volatile ptr, ptr [[P]], align 8, !dbg [[DBG21:![0-9]+]], !tbaa [[INTPTR_TBAA18]]
+// CHECK-KMSAN-NEXT:    [[TMP5:%.*]] = call { ptr, ptr } @__msan_metadata_ptr_for_load_8(ptr nonnull [[P]]) #[[ATTR2]], !dbg [[DBG21]]
+// CHECK-KMSAN-NEXT:    [[TMP6:%.*]] = extractvalue { ptr, ptr } [[TMP5]], 0, !dbg [[DBG21]]
+// CHECK-KMSAN-NEXT:    [[_MSLD:%.*]] = load i64, ptr [[TMP6]], align 8, !dbg [[DBG21]]
+// CHECK-KMSAN-NEXT:    [[_MSCMP_NOT:%.*]] = icmp eq i64 [[_MSLD]], 0, !dbg [[DBG22:![0-9]+]]
+// CHECK-KMSAN-NEXT:    br i1 [[_MSCMP_NOT]], label %[[BB10:.*]], label %[[BB7:.*]], !dbg [[DBG22]], !prof [[PROF23:![0-9]+]]
+// CHECK-KMSAN:       [[BB7]]:
+// CHECK-KMSAN-NEXT:    [[TMP8:%.*]] = extractvalue { ptr, ptr } [[TMP5]], 1, !dbg [[DBG21]]
+// CHECK-KMSAN-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 8, !dbg [[DBG21]]
+// CHECK-KMSAN-NEXT:    call void @__msan_warning(i32 [[TMP9]]) #[[ATTR3:[0-9]+]], !dbg [[DBG22]]
+// CHECK-KMSAN-NEXT:    br label %[[BB10]], !dbg [[DBG22]]
+// CHECK-KMSAN:       [[BB10]]:
 // CHECK-KMSAN-NEXT:    [[RETVAL_ORIGIN:%.*]] = getelementptr i8, ptr [[TMP0]], i64 4008
 // CHECK-KMSAN-NEXT:    [[RETVAL_SHADOW:%.*]] = getelementptr i8, ptr [[TMP0]], i64 800
-// CHECK-KMSAN-NEXT:    [[TMP11:%.*]] = load i32, ptr [[P_0_P_0_P_0_P_0_]], align 4, !dbg [[DBG20]], !tbaa [[TBAA11]]
-// CHECK-KMSAN-NEXT:    [[TMP12:%.*]] = call { ptr, ptr } @__msan_metadata_ptr_for_load_4(ptr nonnull [[P_0_P_0_P_0_P_0_]]) #[[ATTR2]], !dbg [[DBG20]]
-// CHECK-KMSAN-NEXT:    [[TMP13:%.*]] = extractvalue { ptr, ptr } [[TMP12]], 0, !dbg [[DBG20]]
-// CHECK-KMSAN-NEXT:    [[TMP14:%.*]] = extractvalue { ptr, ptr } [[TMP12]], 1, !dbg [[DBG20]]
-// CHECK-KMSAN-NEXT:    [[_MSLD1:%.*]] = load i32, ptr [[TMP13]], align 4, !dbg [[DBG20]]
-// CHECK-KMSAN-NEXT:    [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4, !dbg [[DBG20]]
-// CHECK-KMSAN-NEXT:    call void @llvm.lifetime.end.p0(ptr nonnull [[P]]), !dbg [[DBG22:![0-9]+]]
-// CHECK-KMSAN-NEXT:    call void @llvm.lifetime.end.p0(ptr nonnull [[X]]) #[[ATTR2]], !dbg [[DBG22]]
-// CHECK-KMSAN-NEXT:    store i32 [[_MSLD1]], ptr [[RETVAL_SHADOW]], align 8, !dbg [[DBG23:![0-9]+]]
-// CHECK-KMSAN-NEXT:    store i32 [[TMP15]], ptr [[RETVAL_ORIGIN]], align 4, !dbg [[DBG23]]
-// CHECK-KMSAN-NEXT:    ret i32 [[TMP11]], !dbg [[DBG23]]
+// CHECK-KMSAN-NEXT:    [[TMP11:%.*]] = load i32, ptr [[P_0_P_0_P_0_P_0_]], align 4, !dbg [[DBG22]], !tbaa [[INT_TBAA12]]
+// CHECK-KMSAN-NEXT:    [[TMP12:%.*]] = call { ptr, ptr } @__msan_metadata_ptr_for_load_4(ptr nonnull [[P_0_P_0_P_0_P_0_]]) #[[ATTR2]], !dbg [[DBG22]]
+// CHECK-KMSAN-NEXT:    [[TMP13:%.*]] = extractvalue { ptr, ptr } [[TMP12]], 0, !dbg [[DBG22]]
+// CHECK-KMSAN-NEXT:    [[TMP14:%.*]] = extractvalue { ptr, ptr } [[TMP12]], 1, !dbg [[DBG22]]
+// CHECK-KMSAN-NEXT:    [[_MSLD1:%.*]] = load i32, ptr [[TMP13]], align 4, !dbg [[DBG22]]
+// CHECK-KMSAN-NEXT:    [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4, !dbg [[DBG22]]
+// CHECK-KMSAN-NEXT:    call void @llvm.lifetime.end.p0(ptr nonnull [[P]]), !dbg [[DBG24:![0-9]+]]
+// CHECK-KMSAN-NEXT:    call void @llvm.lifetime.end.p0(ptr nonnull [[X]]) #[[ATTR2]], !dbg [[DBG24]]
+// CHECK-KMSAN-NEXT:    store i32 [[_MSLD1]], ptr [[RETVAL_SHADOW]], align 8, !dbg [[DBG25:![0-9]+]]
+// CHECK-KMSAN-NEXT:    store i32 [[TMP15]], ptr [[RETVAL_ORIGIN]], align 4, !dbg [[DBG25]]
+// CHECK-KMSAN-NEXT:    ret i32 [[TMP11]], !dbg [[DBG25]]
 //
 int test(void) {
   int x = 3;
diff --git a/clang/test/CodeGen/isfpclass.c b/clang/test/CodeGen/isfpclass.c
index ee3a22b..8a631c4 100644
--- a/clang/test/CodeGen/isfpclass.c
+++ b/clang/test/CodeGen/isfpclass.c
@@ -1,9 +1,9 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
 // RUN: %clang_cc1 -triple aarch64-linux-gnu -O1 -emit-llvm %s -o - | FileCheck %s
 
-// CHECK-LABEL: define dso_local noundef i1 @check_isfpclass_finite
-// CHECK-SAME: (float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define dso_local noundef i1 @check_isfpclass_finite(
+// CHECK-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call float @llvm.fabs.f32(float [[X]])
 // CHECK-NEXT:    [[TMP1:%.*]] = fcmp one float [[TMP0]], 0x7FF0000000000000
 // CHECK-NEXT:    ret i1 [[TMP1]]
@@ -12,9 +12,9 @@ _Bool check_isfpclass_finite(float x) {
   return __builtin_isfpclass(x, 504 /*Finite*/);
 }
 
-// CHECK-LABEL: define dso_local noundef i1 @check_isfpclass_finite_strict
-// CHECK-SAME: (float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] {
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define dso_local noundef i1 @check_isfpclass_finite_strict(
+// CHECK-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call i1 @llvm.is.fpclass.f32(float [[X]], i32 504) #[[ATTR5:[0-9]+]]
 // CHECK-NEXT:    ret i1 [[TMP0]]
 //
@@ -23,9 +23,9 @@ _Bool check_isfpclass_finite_strict(float x) {
   return __builtin_isfpclass(x, 504 /*Finite*/);
 }
 
-// CHECK-LABEL: define dso_local noundef i1 @check_isfpclass_nan_f32
-// CHECK-SAME: (float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define dso_local noundef i1 @check_isfpclass_nan_f32(
+// CHECK-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[TMP0:%.*]] = fcmp uno float [[X]], 0.000000e+00
 // CHECK-NEXT:    ret i1 [[TMP0]]
 //
@@ -33,9 +33,9 @@ _Bool check_isfpclass_nan_f32(float x) {
   return __builtin_isfpclass(x, 3 /*NaN*/);
 }
 
-// CHECK-LABEL: define dso_local noundef i1 @check_isfpclass_nan_f32_strict
-// CHECK-SAME: (float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR2]] {
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define dso_local noundef i1 @check_isfpclass_nan_f32_strict(
+// CHECK-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call i1 @llvm.is.fpclass.f32(float [[X]], i32 3) #[[ATTR5]]
 // CHECK-NEXT:    ret i1 [[TMP0]]
 //
@@ -44,9 +44,9 @@ _Bool check_isfpclass_nan_f32_strict(float x) {
   return __builtin_isfpclass(x, 3 /*NaN*/);
 }
 
-// CHECK-LABEL: define dso_local noundef i1 @check_isfpclass_snan_f64
-// CHECK-SAME: (double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define dso_local noundef i1 @check_isfpclass_snan_f64(
+// CHECK-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call i1 @llvm.is.fpclass.f64(double [[X]], i32 1)
 // CHECK-NEXT:    ret i1 [[TMP0]]
 //
@@ -54,9 +54,9 @@ _Bool check_isfpclass_snan_f64(double x) {
   return __builtin_isfpclass(x, 1 /*SNaN*/);
 }
 
-// CHECK-LABEL: define dso_local noundef i1 @check_isfpclass_snan_f64_strict
-// CHECK-SAME: (double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR2]] {
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define dso_local noundef i1 @check_isfpclass_snan_f64_strict(
+// CHECK-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call i1 @llvm.is.fpclass.f64(double [[X]], i32 1) #[[ATTR5]]
 // CHECK-NEXT:    ret i1 [[TMP0]]
 //
@@ -65,9 +65,9 @@ _Bool check_isfpclass_snan_f64_strict(double x) {
   return __builtin_isfpclass(x, 1 /*NaN*/);
 }
 
-// CHECK-LABEL: define dso_local noundef i1 @check_isfpclass_zero_f16
-// CHECK-SAME: (half noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define dso_local noundef i1 @check_isfpclass_zero_f16(
+// CHECK-SAME: half noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[TMP0:%.*]] = fcmp oeq half [[X]], 0xH0000
 // CHECK-NEXT:    ret i1 [[TMP0]]
 //
@@ -75,9 +75,9 @@ _Bool check_isfpclass_zero_f16(_Float16 x) {
   return __builtin_isfpclass(x, 96 /*Zero*/);
 }
 
-// CHECK-LABEL: define dso_local noundef i1 @check_isfpclass_zero_f16_strict
-// CHECK-SAME: (half noundef [[X:%.*]]) local_unnamed_addr #[[ATTR2]] {
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define dso_local noundef i1 @check_isfpclass_zero_f16_strict(
+// CHECK-SAME: half noundef [[X:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call i1 @llvm.is.fpclass.f16(half [[X]], i32 96) #[[ATTR5]]
 // CHECK-NEXT:    ret i1 [[TMP0]]
 //
@@ -86,9 +86,9 @@ _Bool check_isfpclass_zero_f16_strict(_Float16 x) {
   return __builtin_isfpclass(x, 96 /*Zero*/);
 }
 
-// CHECK-LABEL: define dso_local noundef i1 @check_isnan
-// CHECK-SAME: (float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR2]] {
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define dso_local noundef i1 @check_isnan(
+// CHECK-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call i1 @llvm.is.fpclass.f32(float [[X]], i32 3) #[[ATTR5]]
 // CHECK-NEXT:    ret i1 [[TMP0]]
 //
@@ -97,9 +97,9 @@ _Bool check_isnan(float x) {
   return __builtin_isnan(x);
 }
 
-// CHECK-LABEL: define dso_local noundef i1 @check_isinf
-// CHECK-SAME: (float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR2]] {
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define dso_local noundef i1 @check_isinf(
+// CHECK-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call i1 @llvm.is.fpclass.f32(float [[X]], i32 516) #[[ATTR5]]
 // CHECK-NEXT:    ret i1 [[TMP0]]
 //
@@ -108,9 +108,9 @@ _Bool check_isinf(float x) {
   return __builtin_isinf(x);
 }
 
-// CHECK-LABEL: define dso_local noundef i1 @check_isfinite
-// CHECK-SAME: (float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR2]] {
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define dso_local noundef i1 @check_isfinite(
+// CHECK-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call i1 @llvm.is.fpclass.f32(float [[X]], i32 504) #[[ATTR5]]
 // CHECK-NEXT:    ret i1 [[TMP0]]
 //
@@ -119,9 +119,9 @@ _Bool check_isfinite(float x) {
   return __builtin_isfinite(x);
 }
 
-// CHECK-LABEL: define dso_local noundef i1 @check_isnormal
-// CHECK-SAME: (float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR2]] {
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define dso_local noundef i1 @check_isnormal(
+// CHECK-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call i1 @llvm.is.fpclass.f32(float [[X]], i32 264) #[[ATTR5]]
 // CHECK-NEXT:    ret i1 [[TMP0]]
 //
@@ -136,9 +136,9 @@ typedef double __attribute__((ext_vector_type(4))) double4;
 typedef int __attribute__((ext_vector_type(4))) int4;
 typedef long __attribute__((ext_vector_type(4))) long4;
 
-// CHECK-LABEL: define dso_local range(i32 0, 2) <4 x i32> @check_isfpclass_nan_v4f32
-// CHECK-SAME: (<4 x float> noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define dso_local range(i32 0, 2) <4 x i32> @check_isfpclass_nan_v4f32(
+// CHECK-SAME: <4 x float> noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[TMP0:%.*]] = fcmp uno <4 x float> [[X]], zeroinitializer
 // CHECK-NEXT:    [[TMP1:%.*]] = zext <4 x i1> [[TMP0]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
@@ -147,9 +147,9 @@ int4 check_isfpclass_nan_v4f32(float4 x) {
   return __builtin_isfpclass(x, 3 /*NaN*/);
 }
 
-// CHECK-LABEL: define dso_local range(i32 0, 2) <4 x i32> @check_isfpclass_nan_strict_v4f32
-// CHECK-SAME: (<4 x float> noundef [[X:%.*]]) local_unnamed_addr #[[ATTR2]] {
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define dso_local range(i32 0, 2) <4 x i32> @check_isfpclass_nan_strict_v4f32(
+// CHECK-SAME: <4 x float> noundef [[X:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <4 x i1> @llvm.is.fpclass.v4f32(<4 x float> [[X]], i32 3) #[[ATTR5]]
 // CHECK-NEXT:    [[TMP1:%.*]] = zext <4 x i1> [[TMP0]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
@@ -159,15 +159,20 @@ int4 check_isfpclass_nan_strict_v4f32(float4 x) {
   return __builtin_isfpclass(x, 3 /*NaN*/);
 }
 
-// CHECK-LABEL: define dso_local void @check_isfpclass_nan_v4f64
-// CHECK-SAME: (ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 16 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR3:[0-9]+]] {
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[X:%.*]] = load <4 x double>, ptr [[TMP0]], align 16, !tbaa [[TBAA2:![0-9]+]]
+// CHECK-LABEL: define dso_local void @check_isfpclass_nan_v4f64(
+// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 16 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR3:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[X:%.*]] = load <4 x double>, ptr [[TMP0]], align 16, !tbaa [[CHAR_TBAA2:![0-9]+]]
 // CHECK-NEXT:    [[TMP1:%.*]] = fcmp uno <4 x double> [[X]], zeroinitializer
 // CHECK-NEXT:    [[TMP2:%.*]] = zext <4 x i1> [[TMP1]] to <4 x i64>
-// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 16, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 long4 check_isfpclass_nan_v4f64(double4 x) {
   return __builtin_isfpclass(x, 3 /*NaN*/);
 }
+//.
+// CHECK: [[CHAR_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0}
+// CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0}
+// CHECK: [[META4]] = !{!"Simple C/C++ TBAA"}
+//.
diff --git a/clang/test/CodeGen/math-libcalls-tbaa-indirect-args.c b/clang/test/CodeGen/math-libcalls-tbaa-indirect-args.c
index 0124cc5..20a3100 100644
--- a/clang/test/CodeGen/math-libcalls-tbaa-indirect-args.c
+++ b/clang/test/CodeGen/math-libcalls-tbaa-indirect-args.c
@@ -1,4 +1,4 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --filter "(@powl|@cargl|@ilogbl|!|load|store)" --version 5
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --filter "(@powl|@cargl|@ilogbl|!|load|store)" --version 6
 // RUN: %clang_cc1 %s -O3 -fmath-errno -emit-llvm -triple x86_64-unknown-unknown -o - | FileCheck %s -check-prefixes=CHECK
 // RUN: %clang_cc1 %s -O3 -fmath-errno -emit-llvm -triple x86_64-pc-win64 -o - | FileCheck %s -check-prefixes=CHECK-WIN64
 // RUN: %clang_cc1 %s -O3 -fmath-errno -emit-llvm -triple i686-unknown-unknown -o - | FileCheck %s -check-prefixes=CHECK-I686
@@ -18,49 +18,49 @@ long double powl(long double a, long double b);
 
 // CHECK-LABEL: define dso_local x86_fp80 @test_powl(
 // CHECK-SAME: x86_fp80 noundef [[A:%.*]], x86_fp80 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
-// CHECK:    [[CALL:%.*]] = tail call x86_fp80 @powl(x86_fp80 noundef [[A]], x86_fp80 noundef [[B]]) #[[ATTR5:[0-9]+]], !tbaa [[TBAA2:![0-9]+]]
+// CHECK:    [[CALL:%.*]] = tail call x86_fp80 @powl(x86_fp80 noundef [[A]], x86_fp80 noundef [[B]]) #[[ATTR5:[0-9]+]], !tbaa [[INT_TBAA2:![0-9]+]]
 //
 // CHECK-WIN64-LABEL: define dso_local x86_fp80 @test_powl(
 // CHECK-WIN64-SAME: x86_fp80 noundef [[A:%.*]], x86_fp80 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
-// CHECK-WIN64:    [[CALL:%.*]] = tail call x86_fp80 @powl(x86_fp80 noundef [[A]], x86_fp80 noundef [[B]]) #[[ATTR5:[0-9]+]], !tbaa [[TBAA2:![0-9]+]]
+// CHECK-WIN64:    [[CALL:%.*]] = tail call x86_fp80 @powl(x86_fp80 noundef [[A]], x86_fp80 noundef [[B]]) #[[ATTR5:[0-9]+]], !tbaa [[INT_TBAA2:![0-9]+]]
 //
 // CHECK-I686-LABEL: define dso_local x86_fp80 @test_powl(
 // CHECK-I686-SAME: x86_fp80 noundef [[A:%.*]], x86_fp80 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
-// CHECK-I686:    [[CALL:%.*]] = tail call x86_fp80 @powl(x86_fp80 noundef [[A]], x86_fp80 noundef [[B]]) #[[ATTR5:[0-9]+]], !tbaa [[TBAA3:![0-9]+]]
+// CHECK-I686:    [[CALL:%.*]] = tail call x86_fp80 @powl(x86_fp80 noundef [[A]], x86_fp80 noundef [[B]]) #[[ATTR5:[0-9]+]], !tbaa [[INT_TBAA3:![0-9]+]]
 //
 // CHECK-PPC-LABEL: define dso_local ppc_fp128 @test_powl(
 // CHECK-PPC-SAME: ppc_fp128 noundef [[A:%.*]], ppc_fp128 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
-// CHECK-PPC:    [[CALL:%.*]] = tail call ppc_fp128 @powl(ppc_fp128 noundef [[A]], ppc_fp128 noundef [[B]]) #[[ATTR4:[0-9]+]], !tbaa [[TBAA2:![0-9]+]]
+// CHECK-PPC:    [[CALL:%.*]] = tail call ppc_fp128 @powl(ppc_fp128 noundef [[A]], ppc_fp128 noundef [[B]]) #[[ATTR4:[0-9]+]], !tbaa [[INT_TBAA2:![0-9]+]]
 //
 // CHECK-ARM-LABEL: define dso_local double @test_powl(
 // CHECK-ARM-SAME: double noundef [[A:%.*]], double noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
-// CHECK-ARM:    [[CALL:%.*]] = tail call double @powl(double noundef [[A]], double noundef [[B]]) #[[ATTR3:[0-9]+]], !tbaa [[TBAA3:![0-9]+]]
+// CHECK-ARM:    [[CALL:%.*]] = tail call double @powl(double noundef [[A]], double noundef [[B]]) #[[ATTR3:[0-9]+]], !tbaa [[INT_TBAA3:![0-9]+]]
 //
 // CHECK-ARM-HF-LABEL: define dso_local double @test_powl(
 // CHECK-ARM-HF-SAME: double noundef [[A:%.*]], double noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
-// CHECK-ARM-HF:    [[CALL:%.*]] = tail call double @powl(double noundef [[A]], double noundef [[B]]) #[[ATTR3:[0-9]+]], !tbaa [[TBAA3:![0-9]+]]
+// CHECK-ARM-HF:    [[CALL:%.*]] = tail call double @powl(double noundef [[A]], double noundef [[B]]) #[[ATTR3:[0-9]+]], !tbaa [[INT_TBAA3:![0-9]+]]
 //
 // CHECK-THUMB-LABEL: define double @test_powl(
 // CHECK-THUMB-SAME: double noundef [[A:%.*]], double noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
-// CHECK-THUMB:    [[CALL:%.*]] = tail call double @powl(double noundef [[A]], double noundef [[B]]) #[[ATTR3:[0-9]+]], !tbaa [[TBAA3:![0-9]+]]
+// CHECK-THUMB:    [[CALL:%.*]] = tail call double @powl(double noundef [[A]], double noundef [[B]]) #[[ATTR3:[0-9]+]], !tbaa [[INT_TBAA3:![0-9]+]]
 //
 // CHECK-AARCH-LABEL: define dso_local fp128 @test_powl(
 // CHECK-AARCH-SAME: fp128 noundef [[A:%.*]], fp128 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
-// CHECK-AARCH:    [[CALL:%.*]] = tail call fp128 @powl(fp128 noundef [[A]], fp128 noundef [[B]]) #[[ATTR3:[0-9]+]], !tbaa [[TBAA2:![0-9]+]]
+// CHECK-AARCH:    [[CALL:%.*]] = tail call fp128 @powl(fp128 noundef [[A]], fp128 noundef [[B]]) #[[ATTR3:[0-9]+]], !tbaa [[INT_TBAA2:![0-9]+]]
 //
 // CHECK-SPIR-LABEL: define dso_local spir_func double @test_powl(
 // CHECK-SPIR-SAME: double noundef [[A:%.*]], double noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
-// CHECK-SPIR:    [[CALL:%.*]] = tail call spir_func double @powl(double noundef [[A]], double noundef [[B]]) #[[ATTR4:[0-9]+]], !tbaa [[TBAA2:![0-9]+]]
+// CHECK-SPIR:    [[CALL:%.*]] = tail call spir_func double @powl(double noundef [[A]], double noundef [[B]]) #[[ATTR4:[0-9]+]], !tbaa [[INT_TBAA2:![0-9]+]]
 //
 // CHECK-MINGW32-LABEL: define dso_local void @test_powl(
 // CHECK-MINGW32-SAME: ptr dead_on_unwind noalias writable writeonly sret(x86_fp80) align 16 captures(none) initializes((0, 10)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
-// CHECK-MINGW32:    [[A:%.*]] = load x86_fp80, ptr [[TMP0]], align 16, !tbaa [[TBAA6:![0-9]+]]
-// CHECK-MINGW32:    [[B:%.*]] = load x86_fp80, ptr [[TMP1]], align 16, !tbaa [[TBAA6]]
-// CHECK-MINGW32:    store x86_fp80 [[A]], ptr [[BYVAL_TEMP:%.*]], align 16, !tbaa [[TBAA6]]
-// CHECK-MINGW32:    store x86_fp80 [[B]], ptr [[BYVAL_TEMP1:%.*]], align 16, !tbaa [[TBAA6]]
+// CHECK-MINGW32:    [[A:%.*]] = load x86_fp80, ptr [[TMP0]], align 16, !tbaa [[LONG_DOUBLE_TBAA6:![0-9]+]]
+// CHECK-MINGW32:    [[B:%.*]] = load x86_fp80, ptr [[TMP1]], align 16, !tbaa [[LONG_DOUBLE_TBAA6]]
+// CHECK-MINGW32:    store x86_fp80 [[A]], ptr [[BYVAL_TEMP:%.*]], align 16, !tbaa [[LONG_DOUBLE_TBAA6]]
+// CHECK-MINGW32:    store x86_fp80 [[B]], ptr [[BYVAL_TEMP1:%.*]], align 16, !tbaa [[LONG_DOUBLE_TBAA6]]
 // CHECK-MINGW32:    call void @powl(ptr dead_on_unwind nonnull writable sret(x86_fp80) align 16 [[TMP:%.*]], ptr dead_on_return noundef nonnull [[BYVAL_TEMP]], ptr dead_on_return noundef nonnull [[BYVAL_TEMP1]]) #[[ATTR3:[0-9]+]]
-// CHECK-MINGW32:    [[TMP2:%.*]] = load x86_fp80, ptr [[TMP]], align 16, !tbaa [[TBAA6]]
-// CHECK-MINGW32:    store x86_fp80 [[TMP2]], ptr [[AGG_RESULT]], align 16, !tbaa [[TBAA6]]
+// CHECK-MINGW32:    [[TMP2:%.*]] = load x86_fp80, ptr [[TMP]], align 16, !tbaa [[LONG_DOUBLE_TBAA6]]
+// CHECK-MINGW32:    store x86_fp80 [[TMP2]], ptr [[AGG_RESULT]], align 16, !tbaa [[LONG_DOUBLE_TBAA6]]
 //
 long double test_powl(long double a, long double b) {
    return powl(a, b);
@@ -104,21 +104,21 @@ long double test_powl(long double a, long double b) {
 //
 // CHECK-ARM-LABEL: define dso_local void @test_cargl(
 // CHECK-ARM-SAME: ptr dead_on_unwind noalias writable writeonly sret({ double, double }) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], [2 x i64] noundef [[CLD_COERCE:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] {
-// CHECK-ARM:    [[CALL:%.*]] = tail call double @cargl([2 x i64] noundef [[CLD_COERCE]]) #[[ATTR3]], !tbaa [[TBAA3]]
+// CHECK-ARM:    [[CALL:%.*]] = tail call double @cargl([2 x i64] noundef [[CLD_COERCE]]) #[[ATTR3]], !tbaa [[INT_TBAA3]]
 // CHECK-ARM:    store double [[MUL_RL:%.*]], ptr [[AGG_RESULT]], align 8
 // CHECK-ARM:    store double [[MUL_IR:%.*]], ptr [[AGG_RESULT_IMAGP:%.*]], align 8
 //
 // CHECK-ARM-HF-LABEL: define dso_local { double, double } @test_cargl(
 // CHECK-ARM-HF-SAME: { double, double } noundef [[CLD_COERCE:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] {
-// CHECK-ARM-HF:    [[CALL:%.*]] = tail call double @cargl({ double, double } noundef [[CLD_COERCE]]) #[[ATTR3]], !tbaa [[TBAA3]]
+// CHECK-ARM-HF:    [[CALL:%.*]] = tail call double @cargl({ double, double } noundef [[CLD_COERCE]]) #[[ATTR3]], !tbaa [[INT_TBAA3]]
 //
 // CHECK-THUMB-LABEL: define { double, double } @test_cargl(
 // CHECK-THUMB-SAME: [2 x double] noundef [[CLD_COERCE:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] {
-// CHECK-THUMB:    [[CALL:%.*]] = tail call double @cargl([2 x double] noundef [[CLD_COERCE]]) #[[ATTR3]], !tbaa [[TBAA3]]
+// CHECK-THUMB:    [[CALL:%.*]] = tail call double @cargl([2 x double] noundef [[CLD_COERCE]]) #[[ATTR3]], !tbaa [[INT_TBAA3]]
 //
 // CHECK-AARCH-LABEL: define dso_local { fp128, fp128 } @test_cargl(
 // CHECK-AARCH-SAME: [2 x fp128] noundef alignstack(16) [[CLD_COERCE:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] {
-// CHECK-AARCH:    [[CALL:%.*]] = tail call fp128 @cargl([2 x fp128] noundef alignstack(16) [[CLD_COERCE]]) #[[ATTR3]], !tbaa [[TBAA2]]
+// CHECK-AARCH:    [[CALL:%.*]] = tail call fp128 @cargl([2 x fp128] noundef alignstack(16) [[CLD_COERCE]]) #[[ATTR3]], !tbaa [[INT_TBAA2]]
 //
 // CHECK-SPIR-LABEL: define dso_local spir_func void @test_cargl(
 // CHECK-SPIR-SAME: ptr dead_on_unwind noalias writable writeonly sret({ double, double }) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr noundef readonly byval({ double, double }) align 8 captures(none) [[CLD:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] {
@@ -137,7 +137,7 @@ long double test_powl(long double a, long double b) {
 // CHECK-MINGW32:    store x86_fp80 [[CLD_REAL]], ptr [[BYVAL_TEMP:%.*]], align 16
 // CHECK-MINGW32:    store x86_fp80 [[CLD_IMAG]], ptr [[BYVAL_TEMP_IMAGP:%.*]], align 16
 // CHECK-MINGW32:    call void @cargl(ptr dead_on_unwind nonnull writable sret(x86_fp80) align 16 [[TMP:%.*]], ptr dead_on_return noundef nonnull [[BYVAL_TEMP]]) #[[ATTR3]]
-// CHECK-MINGW32:    [[TMP0:%.*]] = load x86_fp80, ptr [[TMP]], align 16, !tbaa [[TBAA6]]
+// CHECK-MINGW32:    [[TMP0:%.*]] = load x86_fp80, ptr [[TMP]], align 16, !tbaa [[LONG_DOUBLE_TBAA6]]
 // CHECK-MINGW32:    [[CLD_REAL3:%.*]] = load x86_fp80, ptr [[CLD]], align 16
 // CHECK-MINGW32:    [[CLD_IMAG5:%.*]] = load x86_fp80, ptr [[CLD_IMAGP]], align 16
 // CHECK-MINGW32:    store x86_fp80 [[MUL_RL:%.*]], ptr [[AGG_RESULT]], align 16
@@ -154,96 +154,96 @@ int ilogbl(long double a);
 
 // CHECK-LABEL: define dso_local i32 @test_ilogb(
 // CHECK-SAME: x86_fp80 noundef [[A:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK:    [[CALL:%.*]] = tail call i32 @ilogbl(x86_fp80 noundef [[A]]) #[[ATTR5]], !tbaa [[TBAA2]]
+// CHECK:    [[CALL:%.*]] = tail call i32 @ilogbl(x86_fp80 noundef [[A]]) #[[ATTR5]], !tbaa [[INT_TBAA2]]
 //
 // CHECK-WIN64-LABEL: define dso_local i32 @test_ilogb(
 // CHECK-WIN64-SAME: x86_fp80 noundef [[A:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-WIN64:    [[CALL:%.*]] = tail call i32 @ilogbl(x86_fp80 noundef [[A]]) #[[ATTR5]], !tbaa [[TBAA2]]
+// CHECK-WIN64:    [[CALL:%.*]] = tail call i32 @ilogbl(x86_fp80 noundef [[A]]) #[[ATTR5]], !tbaa [[INT_TBAA2]]
 //
 // CHECK-I686-LABEL: define dso_local i32 @test_ilogb(
 // CHECK-I686-SAME: x86_fp80 noundef [[A:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-I686:    [[CALL:%.*]] = tail call i32 @ilogbl(x86_fp80 noundef [[A]]) #[[ATTR5]], !tbaa [[TBAA3]]
+// CHECK-I686:    [[CALL:%.*]] = tail call i32 @ilogbl(x86_fp80 noundef [[A]]) #[[ATTR5]], !tbaa [[INT_TBAA3]]
 //
 // CHECK-PPC-LABEL: define dso_local i32 @test_ilogb(
 // CHECK-PPC-SAME: ppc_fp128 noundef [[A:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-PPC:    [[CALL:%.*]] = tail call i32 @ilogbl(ppc_fp128 noundef [[A]]) #[[ATTR4]], !tbaa [[TBAA2]]
+// CHECK-PPC:    [[CALL:%.*]] = tail call i32 @ilogbl(ppc_fp128 noundef [[A]]) #[[ATTR4]], !tbaa [[INT_TBAA2]]
 //
 // CHECK-ARM-LABEL: define dso_local i32 @test_ilogb(
 // CHECK-ARM-SAME: double noundef [[A:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-ARM:    [[CALL:%.*]] = tail call i32 @ilogbl(double noundef [[A]]) #[[ATTR3]], !tbaa [[TBAA3]]
+// CHECK-ARM:    [[CALL:%.*]] = tail call i32 @ilogbl(double noundef [[A]]) #[[ATTR3]], !tbaa [[INT_TBAA3]]
 //
 // CHECK-ARM-HF-LABEL: define dso_local i32 @test_ilogb(
 // CHECK-ARM-HF-SAME: double noundef [[A:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-ARM-HF:    [[CALL:%.*]] = tail call i32 @ilogbl(double noundef [[A]]) #[[ATTR3]], !tbaa [[TBAA3]]
+// CHECK-ARM-HF:    [[CALL:%.*]] = tail call i32 @ilogbl(double noundef [[A]]) #[[ATTR3]], !tbaa [[INT_TBAA3]]
 //
 // CHECK-THUMB-LABEL: define i32 @test_ilogb(
 // CHECK-THUMB-SAME: double noundef [[A:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-THUMB:    [[CALL:%.*]] = tail call i32 @ilogbl(double noundef [[A]]) #[[ATTR3]], !tbaa [[TBAA3]]
+// CHECK-THUMB:    [[CALL:%.*]] = tail call i32 @ilogbl(double noundef [[A]]) #[[ATTR3]], !tbaa [[INT_TBAA3]]
 //
 // CHECK-AARCH-LABEL: define dso_local i32 @test_ilogb(
 // CHECK-AARCH-SAME: fp128 noundef [[A:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-AARCH:    [[CALL:%.*]] = tail call i32 @ilogbl(fp128 noundef [[A]]) #[[ATTR3]], !tbaa [[TBAA2]]
+// CHECK-AARCH:    [[CALL:%.*]] = tail call i32 @ilogbl(fp128 noundef [[A]]) #[[ATTR3]], !tbaa [[INT_TBAA2]]
 //
 // CHECK-SPIR-LABEL: define dso_local spir_func i32 @test_ilogb(
 // CHECK-SPIR-SAME: double noundef [[A:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-SPIR:    [[CALL:%.*]] = tail call spir_func i32 @ilogbl(double noundef [[A]]) #[[ATTR4]], !tbaa [[TBAA2]]
+// CHECK-SPIR:    [[CALL:%.*]] = tail call spir_func i32 @ilogbl(double noundef [[A]]) #[[ATTR4]], !tbaa [[INT_TBAA2]]
 //
 // CHECK-MINGW32-LABEL: define dso_local i32 @test_ilogb(
 // CHECK-MINGW32-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-MINGW32:    [[A:%.*]] = load x86_fp80, ptr [[TMP0]], align 16, !tbaa [[TBAA6]]
-// CHECK-MINGW32:    store x86_fp80 [[A]], ptr [[BYVAL_TEMP:%.*]], align 16, !tbaa [[TBAA6]]
+// CHECK-MINGW32:    [[A:%.*]] = load x86_fp80, ptr [[TMP0]], align 16, !tbaa [[LONG_DOUBLE_TBAA6]]
+// CHECK-MINGW32:    store x86_fp80 [[A]], ptr [[BYVAL_TEMP:%.*]], align 16, !tbaa [[LONG_DOUBLE_TBAA6]]
 // CHECK-MINGW32:    [[CALL:%.*]] = call i32 @ilogbl(ptr dead_on_return noundef nonnull [[BYVAL_TEMP]]) #[[ATTR3]]
 //
 int test_ilogb(long double a) {
    return ilogbl(a);
 }
 //.
-// CHECK: [[TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0}
+// CHECK: [[INT_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0}
 // CHECK: [[META3]] = !{!"int", [[META4:![0-9]+]], i64 0}
 // CHECK: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0}
 // CHECK: [[META5]] = !{!"Simple C/C++ TBAA"}
 //.
-// CHECK-WIN64: [[TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0}
+// CHECK-WIN64: [[INT_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0}
 // CHECK-WIN64: [[META3]] = !{!"int", [[META4:![0-9]+]], i64 0}
 // CHECK-WIN64: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0}
 // CHECK-WIN64: [[META5]] = !{!"Simple C/C++ TBAA"}
 //.
-// CHECK-I686: [[TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0}
+// CHECK-I686: [[INT_TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0}
 // CHECK-I686: [[META4]] = !{!"int", [[META5:![0-9]+]], i64 0}
 // CHECK-I686: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0}
 // CHECK-I686: [[META6]] = !{!"Simple C/C++ TBAA"}
 //.
-// CHECK-PPC: [[TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0}
+// CHECK-PPC: [[INT_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0}
 // CHECK-PPC: [[META3]] = !{!"int", [[META4:![0-9]+]], i64 0}
 // CHECK-PPC: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0}
 // CHECK-PPC: [[META5]] = !{!"Simple C/C++ TBAA"}
 //.
-// CHECK-ARM: [[TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0}
+// CHECK-ARM: [[INT_TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0}
 // CHECK-ARM: [[META4]] = !{!"int", [[META5:![0-9]+]], i64 0}
 // CHECK-ARM: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0}
 // CHECK-ARM: [[META6]] = !{!"Simple C/C++ TBAA"}
 //.
-// CHECK-ARM-HF: [[TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0}
+// CHECK-ARM-HF: [[INT_TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0}
 // CHECK-ARM-HF: [[META4]] = !{!"int", [[META5:![0-9]+]], i64 0}
 // CHECK-ARM-HF: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0}
 // CHECK-ARM-HF: [[META6]] = !{!"Simple C/C++ TBAA"}
 //.
-// CHECK-THUMB: [[TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0}
+// CHECK-THUMB: [[INT_TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0}
 // CHECK-THUMB: [[META4]] = !{!"int", [[META5:![0-9]+]], i64 0}
 // CHECK-THUMB: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0}
 // CHECK-THUMB: [[META6]] = !{!"Simple C/C++ TBAA"}
 //.
-// CHECK-AARCH: [[TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0}
+// CHECK-AARCH: [[INT_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0}
 // CHECK-AARCH: [[META3]] = !{!"int", [[META4:![0-9]+]], i64 0}
 // CHECK-AARCH: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0}
 // CHECK-AARCH: [[META5]] = !{!"Simple C/C++ TBAA"}
 //.
-// CHECK-SPIR: [[TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0}
+// CHECK-SPIR: [[INT_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0}
 // CHECK-SPIR: [[META3]] = !{!"int", [[META4:![0-9]+]], i64 0}
 // CHECK-SPIR: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0}
 // CHECK-SPIR: [[META5]] = !{!"Simple C/C++ TBAA"}
 //.
-// CHECK-MINGW32: [[TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0}
+// CHECK-MINGW32: [[LONG_DOUBLE_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0}
 // CHECK-MINGW32: [[META7]] = !{!"long double", [[META8:![0-9]+]], i64 0}
 // CHECK-MINGW32: [[META8]] = !{!"omnipotent char", [[META9:![0-9]+]], i64 0}
 // CHECK-MINGW32: [[META9]] = !{!"Simple C/C++ TBAA"}
diff --git a/clang/test/CodeGen/math-libcalls-tbaa.c b/clang/test/CodeGen/math-libcalls-tbaa.c
index b2f502e..53ca796 100644
--- a/clang/test/CodeGen/math-libcalls-tbaa.c
+++ b/clang/test/CodeGen/math-libcalls-tbaa.c
@@ -1,7 +1,7 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
 
-// RUN:  %clang_cc1 -triple=aarch64-unknown-linux-gnu -fmath-errno -O3 -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,NONEWSTRUCTPATHTBAA
-// RUN:  %clang_cc1 -triple=aarch64-unknown-linux-gnu -fmath-errno -O3 -new-struct-path-tbaa -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,NEWSTRUCTPATHTBAA
+// RUN:  %clang_cc1 -triple=aarch64-unknown-linux-gnu -fmath-errno -O3 -emit-llvm -o - %s | FileCheck %s -check-prefixes=NONEWSTRUCTPATHTBAA
+// RUN:  %clang_cc1 -triple=aarch64-unknown-linux-gnu -fmath-errno -O3 -new-struct-path-tbaa -emit-llvm -o - %s | FileCheck %s -check-prefixes=NEWSTRUCTPATHTBAA
 
 float expf(float);
 double remainder(double, double);
@@ -13,14 +13,23 @@ float crealf(float _Complex);
 
 // Emit int TBAA metadata on FP math libcalls, which is useful for alias analysis
 
-// CHECK-LABEL: define dso_local float @test_expf(
-// CHECK-SAME: ptr noundef readonly captures(none) [[NUM:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[NUM]], i64 40
-// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2:![0-9]+]]
-// CHECK-NEXT:    [[CALL:%.*]] = tail call float @expf(float noundef [[TMP0]]) #[[ATTR9:[0-9]+]], !tbaa [[TBAA6:![0-9]+]]
-// CHECK-NEXT:    [[MUL:%.*]] = fmul float [[TMP0]], [[CALL]]
-// CHECK-NEXT:    ret float [[MUL]]
+// NONEWSTRUCTPATHTBAA-LABEL: define dso_local float @test_expf(
+// NONEWSTRUCTPATHTBAA-SAME: ptr noundef readonly captures(none) [[NUM:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// NONEWSTRUCTPATHTBAA-NEXT:  [[ENTRY:.*:]]
+// NONEWSTRUCTPATHTBAA-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[NUM]], i64 40
+// NONEWSTRUCTPATHTBAA-NEXT:    [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !tbaa [[FLOAT_TBAA2:![0-9]+]]
+// NONEWSTRUCTPATHTBAA-NEXT:    [[CALL:%.*]] = tail call float @expf(float noundef [[TMP0]]) #[[ATTR9:[0-9]+]], !tbaa [[INT_TBAA6:![0-9]+]]
+// NONEWSTRUCTPATHTBAA-NEXT:    [[MUL:%.*]] = fmul float [[TMP0]], [[CALL]]
+// NONEWSTRUCTPATHTBAA-NEXT:    ret float [[MUL]]
+//
+// NEWSTRUCTPATHTBAA-LABEL: define dso_local float @test_expf(
+// NEWSTRUCTPATHTBAA-SAME: ptr noundef readonly captures(none) [[NUM:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// NEWSTRUCTPATHTBAA-NEXT:  [[ENTRY:.*:]]
+// NEWSTRUCTPATHTBAA-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[NUM]], i64 40
+// NEWSTRUCTPATHTBAA-NEXT:    [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2:![0-9]+]]
+// NEWSTRUCTPATHTBAA-NEXT:    [[CALL:%.*]] = tail call float @expf(float noundef [[TMP0]]) #[[ATTR9:[0-9]+]], !tbaa [[TBAA6:![0-9]+]]
+// NEWSTRUCTPATHTBAA-NEXT:    [[MUL:%.*]] = fmul float [[TMP0]], [[CALL]]
+// NEWSTRUCTPATHTBAA-NEXT:    ret float [[MUL]]
 //
 float test_expf (float num[]) {
    const float expm2 = expf(num[10]);  // Emit TBAA metadata on @expf
@@ -28,14 +37,23 @@ float test_expf (float num[]) {
    return tmp;
 }
 
-// CHECK-LABEL: define dso_local float @test_builtin_expf(
-// CHECK-SAME: ptr noundef readonly captures(none) [[NUM:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[NUM]], i64 40
-// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[CALL:%.*]] = tail call float @expf(float noundef [[TMP0]]) #[[ATTR9]], !tbaa [[TBAA6]]
-// CHECK-NEXT:    [[MUL:%.*]] = fmul float [[TMP0]], [[CALL]]
-// CHECK-NEXT:    ret float [[MUL]]
+// NONEWSTRUCTPATHTBAA-LABEL: define dso_local float @test_builtin_expf(
+// NONEWSTRUCTPATHTBAA-SAME: ptr noundef readonly captures(none) [[NUM:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// NONEWSTRUCTPATHTBAA-NEXT:  [[ENTRY:.*:]]
+// NONEWSTRUCTPATHTBAA-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[NUM]], i64 40
+// NONEWSTRUCTPATHTBAA-NEXT:    [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !tbaa [[FLOAT_TBAA2]]
+// NONEWSTRUCTPATHTBAA-NEXT:    [[CALL:%.*]] = tail call float @expf(float noundef [[TMP0]]) #[[ATTR9]], !tbaa [[INT_TBAA6]]
+// NONEWSTRUCTPATHTBAA-NEXT:    [[MUL:%.*]] = fmul float [[TMP0]], [[CALL]]
+// NONEWSTRUCTPATHTBAA-NEXT:    ret float [[MUL]]
+//
+// NEWSTRUCTPATHTBAA-LABEL: define dso_local float @test_builtin_expf(
+// NEWSTRUCTPATHTBAA-SAME: ptr noundef readonly captures(none) [[NUM:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// NEWSTRUCTPATHTBAA-NEXT:  [[ENTRY:.*:]]
+// NEWSTRUCTPATHTBAA-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[NUM]], i64 40
+// NEWSTRUCTPATHTBAA-NEXT:    [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]]
+// NEWSTRUCTPATHTBAA-NEXT:    [[CALL:%.*]] = tail call float @expf(float noundef [[TMP0]]) #[[ATTR9]], !tbaa [[TBAA6]]
+// NEWSTRUCTPATHTBAA-NEXT:    [[MUL:%.*]] = fmul float [[TMP0]], [[CALL]]
+// NEWSTRUCTPATHTBAA-NEXT:    ret float [[MUL]]
 //
 float test_builtin_expf (float num[]) {
    const float expm2 = __builtin_expf(num[10]);  // Emit TBAA metadata on @expf
@@ -45,14 +63,23 @@ float test_builtin_expf (float num[]) {
 
 //
 // Negative test: fabs cannot set errno
-// CHECK-LABEL: define dso_local double @test_fabs(
-// CHECK-SAME: ptr noundef readonly captures(none) [[NUM:%.*]]) local_unnamed_addr #[[ATTR3:[0-9]+]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[NUM]], i64 80
-// CHECK-NEXT:    [[TMP0:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA8:![0-9]+]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call double @llvm.fabs.f64(double [[TMP0]])
-// CHECK-NEXT:    [[MUL:%.*]] = fmul double [[TMP0]], [[TMP1]]
-// CHECK-NEXT:    ret double [[MUL]]
+// NONEWSTRUCTPATHTBAA-LABEL: define dso_local double @test_fabs(
+// NONEWSTRUCTPATHTBAA-SAME: ptr noundef readonly captures(none) [[NUM:%.*]]) local_unnamed_addr #[[ATTR3:[0-9]+]] {
+// NONEWSTRUCTPATHTBAA-NEXT:  [[ENTRY:.*:]]
+// NONEWSTRUCTPATHTBAA-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[NUM]], i64 80
+// NONEWSTRUCTPATHTBAA-NEXT:    [[TMP0:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !tbaa [[DOUBLE_TBAA8:![0-9]+]]
+// NONEWSTRUCTPATHTBAA-NEXT:    [[TMP1:%.*]] = tail call double @llvm.fabs.f64(double [[TMP0]])
+// NONEWSTRUCTPATHTBAA-NEXT:    [[MUL:%.*]] = fmul double [[TMP0]], [[TMP1]]
+// NONEWSTRUCTPATHTBAA-NEXT:    ret double [[MUL]]
+//
+// NEWSTRUCTPATHTBAA-LABEL: define dso_local double @test_fabs(
+// NEWSTRUCTPATHTBAA-SAME: ptr noundef readonly captures(none) [[NUM:%.*]]) local_unnamed_addr #[[ATTR3:[0-9]+]] {
+// NEWSTRUCTPATHTBAA-NEXT:  [[ENTRY:.*:]]
+// NEWSTRUCTPATHTBAA-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[NUM]], i64 80
+// NEWSTRUCTPATHTBAA-NEXT:    [[TMP0:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA8:![0-9]+]]
+// NEWSTRUCTPATHTBAA-NEXT:    [[TMP1:%.*]] = tail call double @llvm.fabs.f64(double [[TMP0]])
+// NEWSTRUCTPATHTBAA-NEXT:    [[MUL:%.*]] = fmul double [[TMP0]], [[TMP1]]
+// NEWSTRUCTPATHTBAA-NEXT:    ret double [[MUL]]
 //
 double test_fabs (double num[]) {
    const double expm2 = fabs(num[10]);          // Don't emit TBAA metadata
@@ -60,14 +87,23 @@ double test_fabs (double num[]) {
    return tmp;
 }
 
-// CHECK-LABEL: define dso_local double @test_remainder(
-// CHECK-SAME: ptr noundef readonly captures(none) [[NUM:%.*]], double noundef [[A:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[NUM]], i64 80
-// CHECK-NEXT:    [[TMP0:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA8]]
-// CHECK-NEXT:    [[CALL:%.*]] = tail call double @remainder(double noundef [[TMP0]], double noundef [[A]]) #[[ATTR9]], !tbaa [[TBAA6]]
-// CHECK-NEXT:    [[MUL:%.*]] = fmul double [[TMP0]], [[CALL]]
-// CHECK-NEXT:    ret double [[MUL]]
+// NONEWSTRUCTPATHTBAA-LABEL: define dso_local double @test_remainder(
+// NONEWSTRUCTPATHTBAA-SAME: ptr noundef readonly captures(none) [[NUM:%.*]], double noundef [[A:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// NONEWSTRUCTPATHTBAA-NEXT:  [[ENTRY:.*:]]
+// NONEWSTRUCTPATHTBAA-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[NUM]], i64 80
+// NONEWSTRUCTPATHTBAA-NEXT:    [[TMP0:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !tbaa [[DOUBLE_TBAA8]]
+// NONEWSTRUCTPATHTBAA-NEXT:    [[CALL:%.*]] = tail call double @remainder(double noundef [[TMP0]], double noundef [[A]]) #[[ATTR9]], !tbaa [[INT_TBAA6]]
+// NONEWSTRUCTPATHTBAA-NEXT:    [[MUL:%.*]] = fmul double [[TMP0]], [[CALL]]
+// NONEWSTRUCTPATHTBAA-NEXT:    ret double [[MUL]]
+//
+// NEWSTRUCTPATHTBAA-LABEL: define dso_local double @test_remainder(
+// NEWSTRUCTPATHTBAA-SAME: ptr noundef readonly captures(none) [[NUM:%.*]], double noundef [[A:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// NEWSTRUCTPATHTBAA-NEXT:  [[ENTRY:.*:]]
+// NEWSTRUCTPATHTBAA-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[NUM]], i64 80
+// NEWSTRUCTPATHTBAA-NEXT:    [[TMP0:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA8]]
+// NEWSTRUCTPATHTBAA-NEXT:    [[CALL:%.*]] = tail call double @remainder(double noundef [[TMP0]], double noundef [[A]]) #[[ATTR9]], !tbaa [[TBAA6]]
+// NEWSTRUCTPATHTBAA-NEXT:    [[MUL:%.*]] = fmul double [[TMP0]], [[CALL]]
+// NEWSTRUCTPATHTBAA-NEXT:    ret double [[MUL]]
 //
 double test_remainder (double num[], double a) {
    const double expm2 = remainder(num[10], a);  // Emit TBAA metadata
@@ -78,17 +114,29 @@ double test_remainder (double num[], double a) {
 //
 // TODO: frexp is not subject to any errors, but also writes to
 // its int pointer out argument, so it could emit int TBAA metadata.
-// CHECK-LABEL: define dso_local double @test_frexp(
-// CHECK-SAME: ptr noundef readonly captures(none) [[NUM:%.*]]) local_unnamed_addr #[[ATTR5:[0-9]+]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[E:%.*]] = alloca i32, align 4
-// CHECK-NEXT:    call void @llvm.lifetime.start.p0(ptr nonnull [[E]]) #[[ATTR9]]
-// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[NUM]], i64 16
-// CHECK-NEXT:    [[TMP0:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA8]]
-// CHECK-NEXT:    [[CALL:%.*]] = call double @frexp(double noundef [[TMP0]], ptr noundef nonnull [[E]]) #[[ATTR9]]
-// CHECK-NEXT:    [[MUL:%.*]] = fmul double [[TMP0]], [[CALL]]
-// CHECK-NEXT:    call void @llvm.lifetime.end.p0(ptr nonnull [[E]]) #[[ATTR9]]
-// CHECK-NEXT:    ret double [[MUL]]
+// NONEWSTRUCTPATHTBAA-LABEL: define dso_local double @test_frexp(
+// NONEWSTRUCTPATHTBAA-SAME: ptr noundef readonly captures(none) [[NUM:%.*]]) local_unnamed_addr #[[ATTR5:[0-9]+]] {
+// NONEWSTRUCTPATHTBAA-NEXT:  [[ENTRY:.*:]]
+// NONEWSTRUCTPATHTBAA-NEXT:    [[E:%.*]] = alloca i32, align 4
+// NONEWSTRUCTPATHTBAA-NEXT:    call void @llvm.lifetime.start.p0(ptr nonnull [[E]]) #[[ATTR9]]
+// NONEWSTRUCTPATHTBAA-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[NUM]], i64 16
+// NONEWSTRUCTPATHTBAA-NEXT:    [[TMP0:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !tbaa [[DOUBLE_TBAA8]]
+// NONEWSTRUCTPATHTBAA-NEXT:    [[CALL:%.*]] = call double @frexp(double noundef [[TMP0]], ptr noundef nonnull [[E]]) #[[ATTR9]]
+// NONEWSTRUCTPATHTBAA-NEXT:    [[MUL:%.*]] = fmul double [[TMP0]], [[CALL]]
+// NONEWSTRUCTPATHTBAA-NEXT:    call void @llvm.lifetime.end.p0(ptr nonnull [[E]]) #[[ATTR9]]
+// NONEWSTRUCTPATHTBAA-NEXT:    ret double [[MUL]]
+//
+// NEWSTRUCTPATHTBAA-LABEL: define dso_local double @test_frexp(
+// NEWSTRUCTPATHTBAA-SAME: ptr noundef readonly captures(none) [[NUM:%.*]]) local_unnamed_addr #[[ATTR5:[0-9]+]] {
+// NEWSTRUCTPATHTBAA-NEXT:  [[ENTRY:.*:]]
+// NEWSTRUCTPATHTBAA-NEXT:    [[E:%.*]] = alloca i32, align 4
+// NEWSTRUCTPATHTBAA-NEXT:    call void @llvm.lifetime.start.p0(ptr nonnull [[E]]) #[[ATTR9]]
+// NEWSTRUCTPATHTBAA-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[NUM]], i64 16
+// NEWSTRUCTPATHTBAA-NEXT:    [[TMP0:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA8]]
+// NEWSTRUCTPATHTBAA-NEXT:    [[CALL:%.*]] = call double @frexp(double noundef [[TMP0]], ptr noundef nonnull [[E]]) #[[ATTR9]]
+// NEWSTRUCTPATHTBAA-NEXT:    [[MUL:%.*]] = fmul double [[TMP0]], [[CALL]]
+// NEWSTRUCTPATHTBAA-NEXT:    call void @llvm.lifetime.end.p0(ptr nonnull [[E]]) #[[ATTR9]]
+// NEWSTRUCTPATHTBAA-NEXT:    ret double [[MUL]]
 //
 double test_frexp (double num[]) {
    int e;
@@ -100,24 +148,43 @@ double test_frexp (double num[]) {
 //
 // Negative test: sincos is a library function, but is not a builtin function
 // checked in CodeGenFunction::EmitCallExpr.
-// CHECK-LABEL: define dso_local float @test_sincos(
-// CHECK-SAME: ptr noundef readonly captures(none) [[NUM:%.*]]) local_unnamed_addr #[[ATTR7:[0-9]+]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[SIN:%.*]] = alloca float, align 4
-// CHECK-NEXT:    [[COS:%.*]] = alloca float, align 4
-// CHECK-NEXT:    call void @llvm.lifetime.start.p0(ptr nonnull [[SIN]]) #[[ATTR9]]
-// CHECK-NEXT:    call void @llvm.lifetime.start.p0(ptr nonnull [[COS]]) #[[ATTR9]]
-// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[NUM]], i64 8
-// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]]
-// CHECK-NEXT:    call void @sincos(float noundef [[TMP0]], ptr noundef nonnull [[SIN]], ptr noundef nonnull [[COS]]) #[[ATTR9]]
-// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[SIN]], align 4, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[COS]], align 4, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[MUL:%.*]] = fmul float [[TMP1]], [[TMP2]]
-// CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[ADD:%.*]] = fadd float [[MUL]], [[TMP3]]
-// CHECK-NEXT:    call void @llvm.lifetime.end.p0(ptr nonnull [[COS]]) #[[ATTR9]]
-// CHECK-NEXT:    call void @llvm.lifetime.end.p0(ptr nonnull [[SIN]]) #[[ATTR9]]
-// CHECK-NEXT:    ret float [[ADD]]
+// NONEWSTRUCTPATHTBAA-LABEL: define dso_local float @test_sincos(
+// NONEWSTRUCTPATHTBAA-SAME: ptr noundef readonly captures(none) [[NUM:%.*]]) local_unnamed_addr #[[ATTR7:[0-9]+]] {
+// NONEWSTRUCTPATHTBAA-NEXT:  [[ENTRY:.*:]]
+// NONEWSTRUCTPATHTBAA-NEXT:    [[SIN:%.*]] = alloca float, align 4
+// NONEWSTRUCTPATHTBAA-NEXT:    [[COS:%.*]] = alloca float, align 4
+// NONEWSTRUCTPATHTBAA-NEXT:    call void @llvm.lifetime.start.p0(ptr nonnull [[SIN]]) #[[ATTR9]]
+// NONEWSTRUCTPATHTBAA-NEXT:    call void @llvm.lifetime.start.p0(ptr nonnull [[COS]]) #[[ATTR9]]
+// NONEWSTRUCTPATHTBAA-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[NUM]], i64 8
+// NONEWSTRUCTPATHTBAA-NEXT:    [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !tbaa [[FLOAT_TBAA2]]
+// NONEWSTRUCTPATHTBAA-NEXT:    call void @sincos(float noundef [[TMP0]], ptr noundef nonnull [[SIN]], ptr noundef nonnull [[COS]]) #[[ATTR9]]
+// NONEWSTRUCTPATHTBAA-NEXT:    [[TMP1:%.*]] = load float, ptr [[SIN]], align 4, !tbaa [[FLOAT_TBAA2]]
+// NONEWSTRUCTPATHTBAA-NEXT:    [[TMP2:%.*]] = load float, ptr [[COS]], align 4, !tbaa [[FLOAT_TBAA2]]
+// NONEWSTRUCTPATHTBAA-NEXT:    [[MUL:%.*]] = fmul float [[TMP1]], [[TMP2]]
+// NONEWSTRUCTPATHTBAA-NEXT:    [[TMP3:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !tbaa [[FLOAT_TBAA2]]
+// NONEWSTRUCTPATHTBAA-NEXT:    [[ADD:%.*]] = fadd float [[MUL]], [[TMP3]]
+// NONEWSTRUCTPATHTBAA-NEXT:    call void @llvm.lifetime.end.p0(ptr nonnull [[COS]]) #[[ATTR9]]
+// NONEWSTRUCTPATHTBAA-NEXT:    call void @llvm.lifetime.end.p0(ptr nonnull [[SIN]]) #[[ATTR9]]
+// NONEWSTRUCTPATHTBAA-NEXT:    ret float [[ADD]]
+//
+// NEWSTRUCTPATHTBAA-LABEL: define dso_local float @test_sincos(
+// NEWSTRUCTPATHTBAA-SAME: ptr noundef readonly captures(none) [[NUM:%.*]]) local_unnamed_addr #[[ATTR7:[0-9]+]] {
+// NEWSTRUCTPATHTBAA-NEXT:  [[ENTRY:.*:]]
+// NEWSTRUCTPATHTBAA-NEXT:    [[SIN:%.*]] = alloca float, align 4
+// NEWSTRUCTPATHTBAA-NEXT:    [[COS:%.*]] = alloca float, align 4
+// NEWSTRUCTPATHTBAA-NEXT:    call void @llvm.lifetime.start.p0(ptr nonnull [[SIN]]) #[[ATTR9]]
+// NEWSTRUCTPATHTBAA-NEXT:    call void @llvm.lifetime.start.p0(ptr nonnull [[COS]]) #[[ATTR9]]
+// NEWSTRUCTPATHTBAA-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[NUM]], i64 8
+// NEWSTRUCTPATHTBAA-NEXT:    [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]]
+// NEWSTRUCTPATHTBAA-NEXT:    call void @sincos(float noundef [[TMP0]], ptr noundef nonnull [[SIN]], ptr noundef nonnull [[COS]]) #[[ATTR9]]
+// NEWSTRUCTPATHTBAA-NEXT:    [[TMP1:%.*]] = load float, ptr [[SIN]], align 4, !tbaa [[TBAA2]]
+// NEWSTRUCTPATHTBAA-NEXT:    [[TMP2:%.*]] = load float, ptr [[COS]], align 4, !tbaa [[TBAA2]]
+// NEWSTRUCTPATHTBAA-NEXT:    [[MUL:%.*]] = fmul float [[TMP1]], [[TMP2]]
+// NEWSTRUCTPATHTBAA-NEXT:    [[TMP3:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]]
+// NEWSTRUCTPATHTBAA-NEXT:    [[ADD:%.*]] = fadd float [[MUL]], [[TMP3]]
+// NEWSTRUCTPATHTBAA-NEXT:    call void @llvm.lifetime.end.p0(ptr nonnull [[COS]]) #[[ATTR9]]
+// NEWSTRUCTPATHTBAA-NEXT:    call void @llvm.lifetime.end.p0(ptr nonnull [[SIN]]) #[[ATTR9]]
+// NEWSTRUCTPATHTBAA-NEXT:    ret float [[ADD]]
 //
 float test_sincos (float num[]) {
    float sin, cos;
@@ -127,18 +194,31 @@ float test_sincos (float num[]) {
 }
 
 // TODO: The builtin return a complex type
-// CHECK-LABEL: define dso_local float @test_cacoshf(
-// CHECK-SAME: ptr noundef readonly captures(none) [[NUM:%.*]]) local_unnamed_addr #[[ATTR7]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[NUM]], i64 8
-// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[DOTFCA_0_INSERT:%.*]] = insertvalue [2 x float] poison, float [[TMP0]], 0
-// CHECK-NEXT:    [[DOTFCA_1_INSERT:%.*]] = insertvalue [2 x float] [[DOTFCA_0_INSERT]], float 0.000000e+00, 1
-// CHECK-NEXT:    [[CALL:%.*]] = tail call { float, float } @cacoshf([2 x float] noundef alignstack(8) [[DOTFCA_1_INSERT]]) #[[ATTR9]]
-// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { float, float } [[CALL]], 0
-// CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[ADD:%.*]] = fadd float [[TMP1]], [[TMP2]]
-// CHECK-NEXT:    ret float [[ADD]]
+// NONEWSTRUCTPATHTBAA-LABEL: define dso_local float @test_cacoshf(
+// NONEWSTRUCTPATHTBAA-SAME: ptr noundef readonly captures(none) [[NUM:%.*]]) local_unnamed_addr #[[ATTR7]] {
+// NONEWSTRUCTPATHTBAA-NEXT:  [[ENTRY:.*:]]
+// NONEWSTRUCTPATHTBAA-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[NUM]], i64 8
+// NONEWSTRUCTPATHTBAA-NEXT:    [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !tbaa [[FLOAT_TBAA2]]
+// NONEWSTRUCTPATHTBAA-NEXT:    [[DOTFCA_0_INSERT:%.*]] = insertvalue [2 x float] poison, float [[TMP0]], 0
+// NONEWSTRUCTPATHTBAA-NEXT:    [[DOTFCA_1_INSERT:%.*]] = insertvalue [2 x float] [[DOTFCA_0_INSERT]], float 0.000000e+00, 1
+// NONEWSTRUCTPATHTBAA-NEXT:    [[CALL:%.*]] = tail call { float, float } @cacoshf([2 x float] noundef alignstack(8) [[DOTFCA_1_INSERT]]) #[[ATTR9]]
+// NONEWSTRUCTPATHTBAA-NEXT:    [[TMP1:%.*]] = extractvalue { float, float } [[CALL]], 0
+// NONEWSTRUCTPATHTBAA-NEXT:    [[TMP2:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !tbaa [[FLOAT_TBAA2]]
+// NONEWSTRUCTPATHTBAA-NEXT:    [[ADD:%.*]] = fadd float [[TMP1]], [[TMP2]]
+// NONEWSTRUCTPATHTBAA-NEXT:    ret float [[ADD]]
+//
+// NEWSTRUCTPATHTBAA-LABEL: define dso_local float @test_cacoshf(
+// NEWSTRUCTPATHTBAA-SAME: ptr noundef readonly captures(none) [[NUM:%.*]]) local_unnamed_addr #[[ATTR7]] {
+// NEWSTRUCTPATHTBAA-NEXT:  [[ENTRY:.*:]]
+// NEWSTRUCTPATHTBAA-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[NUM]], i64 8
+// NEWSTRUCTPATHTBAA-NEXT:    [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]]
+// NEWSTRUCTPATHTBAA-NEXT:    [[DOTFCA_0_INSERT:%.*]] = insertvalue [2 x float] poison, float [[TMP0]], 0
+// NEWSTRUCTPATHTBAA-NEXT:    [[DOTFCA_1_INSERT:%.*]] = insertvalue [2 x float] [[DOTFCA_0_INSERT]], float 0.000000e+00, 1
+// NEWSTRUCTPATHTBAA-NEXT:    [[CALL:%.*]] = tail call { float, float } @cacoshf([2 x float] noundef alignstack(8) [[DOTFCA_1_INSERT]]) #[[ATTR9]]
+// NEWSTRUCTPATHTBAA-NEXT:    [[TMP1:%.*]] = extractvalue { float, float } [[CALL]], 0
+// NEWSTRUCTPATHTBAA-NEXT:    [[TMP2:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]]
+// NEWSTRUCTPATHTBAA-NEXT:    [[ADD:%.*]] = fadd float [[TMP1]], [[TMP2]]
+// NEWSTRUCTPATHTBAA-NEXT:    ret float [[ADD]]
 //
 float test_cacoshf (float num[]) {
    float _Complex z = cacoshf(num[2]);  // Don't emit TBAA metadata
@@ -147,13 +227,13 @@ float test_cacoshf (float num[]) {
 }
 
 //.
-// NONEWSTRUCTPATHTBAA: [[TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0}
+// NONEWSTRUCTPATHTBAA: [[FLOAT_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0}
 // NONEWSTRUCTPATHTBAA: [[META3]] = !{!"float", [[META4:![0-9]+]], i64 0}
 // NONEWSTRUCTPATHTBAA: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0}
 // NONEWSTRUCTPATHTBAA: [[META5]] = !{!"Simple C/C++ TBAA"}
-// NONEWSTRUCTPATHTBAA: [[TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0}
+// NONEWSTRUCTPATHTBAA: [[INT_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0}
 // NONEWSTRUCTPATHTBAA: [[META7]] = !{!"int", [[META4]], i64 0}
-// NONEWSTRUCTPATHTBAA: [[TBAA8]] = !{[[META9:![0-9]+]], [[META9]], i64 0}
+// NONEWSTRUCTPATHTBAA: [[DOUBLE_TBAA8]] = !{[[META9:![0-9]+]], [[META9]], i64 0}
 // NONEWSTRUCTPATHTBAA: [[META9]] = !{!"double", [[META4]], i64 0}
 //.
 // NEWSTRUCTPATHTBAA: [[TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0, i64 4}
@@ -165,6 +245,3 @@ float test_cacoshf (float num[]) {
 // NEWSTRUCTPATHTBAA: [[TBAA8]] = !{[[META9:![0-9]+]], [[META9]], i64 0, i64 8}
 // NEWSTRUCTPATHTBAA: [[META9]] = !{[[META4]], i64 8, !"double"}
 //.
-//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-// NEWSTRUCTPATHTBAA: {{.*}}
-// NONEWSTRUCTPATHTBAA: {{.*}}
diff --git a/clang/test/CodeGen/sanitize-metadata-nosanitize.c b/clang/test/CodeGen/sanitize-metadata-nosanitize.c
index eabcbd1..22ed25b 100644
--- a/clang/test/CodeGen/sanitize-metadata-nosanitize.c
+++ b/clang/test/CodeGen/sanitize-metadata-nosanitize.c
@@ -1,4 +1,4 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-attributes --check-globals --version 2
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 6
 // RUN: %clang_cc1 -O -fexperimental-sanitize-metadata=covered -fexperimental-sanitize-metadata=atomics -fexperimental-sanitize-metadata=uar -triple x86_64-gnu-linux -x c -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK
 
 //.
@@ -11,9 +11,9 @@
 // CHECK: @llvm.global_dtors = appending global [2 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 2, ptr @__sanitizer_metadata_covered2.module_dtor, ptr @__sanitizer_metadata_covered2.module_dtor }, { i32, ptr, ptr } { i32 2, ptr @__sanitizer_metadata_atomics2.module_dtor, ptr @__sanitizer_metadata_atomics2.module_dtor }]
 //.
 // CHECK: Function Attrs: mustprogress nofree noinline norecurse nosync nounwind willreturn memory(write, argmem: none, inaccessiblemem: none)
-// CHECK-LABEL: define dso_local void @escape
-// CHECK-SAME: (ptr noundef [[P:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !pcsections [[META2:![0-9]+]] {
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define dso_local void @escape(
+// CHECK-SAME: ptr noundef [[P:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !pcsections [[META2:![0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    ret void
 //
 __attribute__((noinline, not_tail_called)) void escape(const volatile void *p) {
@@ -22,14 +22,14 @@ __attribute__((noinline, not_tail_called)) void escape(const volatile void *p) {
 }
 
 // CHECK: Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(write, argmem: readwrite, inaccessiblemem: none)
-// CHECK-LABEL: define dso_local i32 @normal_function
-// CHECK-SAME: (ptr noundef [[X:%.*]], ptr noundef readonly captures(none) [[Y:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] !pcsections [[META4:![0-9]+]] {
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define dso_local i32 @normal_function(
+// CHECK-SAME: ptr noundef [[X:%.*]], ptr noundef readonly captures(none) [[Y:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] !pcsections [[META4:![0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[X_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-NEXT:    store ptr [[X]], ptr [[X_ADDR]], align 8, !tbaa [[TBAA6:![0-9]+]]
+// CHECK-NEXT:    store ptr [[X]], ptr [[X_ADDR]], align 8, !tbaa [[INTPTR_TBAA6:![0-9]+]]
 // CHECK-NEXT:    store atomic i32 1, ptr [[X]] monotonic, align 4, !pcsections [[META11:![0-9]+]]
 // CHECK-NEXT:    notail call void @escape(ptr noundef nonnull [[X_ADDR]])
-// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[Y]], align 4, !tbaa [[TBAA12:![0-9]+]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[Y]], align 4, !tbaa [[INT_TBAA12:![0-9]+]]
 // CHECK-NEXT:    ret i32 [[TMP0]]
 //
 int normal_function(int *x, int *y) {
@@ -39,14 +39,14 @@ int normal_function(int *x, int *y) {
 }
 
 // CHECK: Function Attrs: disable_sanitizer_instrumentation mustprogress nofree norecurse nounwind willreturn memory(write, argmem: readwrite, inaccessiblemem: none)
-// CHECK-LABEL: define dso_local i32 @test_disable_sanitize_instrumentation
-// CHECK-SAME: (ptr noundef [[X:%.*]], ptr noundef readonly captures(none) [[Y:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] {
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define dso_local i32 @test_disable_sanitize_instrumentation(
+// CHECK-SAME: ptr noundef [[X:%.*]], ptr noundef readonly captures(none) [[Y:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[X_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-NEXT:    store ptr [[X]], ptr [[X_ADDR]], align 8, !tbaa [[TBAA6]]
+// CHECK-NEXT:    store ptr [[X]], ptr [[X_ADDR]], align 8, !tbaa [[INTPTR_TBAA6]]
 // CHECK-NEXT:    store atomic i32 1, ptr [[X]] monotonic, align 4
 // CHECK-NEXT:    notail call void @escape(ptr noundef nonnull [[X_ADDR]])
-// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[Y]], align 4, !tbaa [[TBAA12]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[Y]], align 4, !tbaa [[INT_TBAA12]]
 // CHECK-NEXT:    ret i32 [[TMP0]]
 //
 __attribute__((disable_sanitizer_instrumentation)) int test_disable_sanitize_instrumentation(int *x, int *y) {
@@ -56,14 +56,14 @@ __attribute__((disable_sanitizer_instrumentation)) int test_disable_sanitize_ins
 }
 
 // CHECK: Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(write, argmem: readwrite, inaccessiblemem: none)
-// CHECK-LABEL: define dso_local i32 @test_no_sanitize_thread
-// CHECK-SAME: (ptr noundef [[X:%.*]], ptr noundef readonly captures(none) [[Y:%.*]]) local_unnamed_addr #[[ATTR3:[0-9]+]] !pcsections [[META14:![0-9]+]] {
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define dso_local i32 @test_no_sanitize_thread(
+// CHECK-SAME: ptr noundef [[X:%.*]], ptr noundef readonly captures(none) [[Y:%.*]]) local_unnamed_addr #[[ATTR3:[0-9]+]] !pcsections [[META14:![0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[X_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-NEXT:    store ptr [[X]], ptr [[X_ADDR]], align 8, !tbaa [[TBAA6]]
+// CHECK-NEXT:    store ptr [[X]], ptr [[X_ADDR]], align 8, !tbaa [[INTPTR_TBAA6]]
 // CHECK-NEXT:    store atomic i32 1, ptr [[X]] monotonic, align 4, !pcsections [[META11]]
 // CHECK-NEXT:    notail call void @escape(ptr noundef nonnull [[X_ADDR]])
-// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[Y]], align 4, !tbaa [[TBAA12]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[Y]], align 4, !tbaa [[INT_TBAA12]]
 // CHECK-NEXT:    ret i32 [[TMP0]]
 //
 __attribute__((no_sanitize("thread"))) int test_no_sanitize_thread(int *x, int *y) {
@@ -73,14 +73,14 @@ __attribute__((no_sanitize("thread"))) int test_no_sanitize_thread(int *x, int *
 }
 
 // CHECK: Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(write, argmem: readwrite, inaccessiblemem: none)
-// CHECK-LABEL: define dso_local i32 @test_no_sanitize_all
-// CHECK-SAME: (ptr noundef [[X:%.*]], ptr noundef readonly captures(none) [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] !pcsections [[META14]] {
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define dso_local i32 @test_no_sanitize_all(
+// CHECK-SAME: ptr noundef [[X:%.*]], ptr noundef readonly captures(none) [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] !pcsections [[META14]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[X_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-NEXT:    store ptr [[X]], ptr [[X_ADDR]], align 8, !tbaa [[TBAA6]]
+// CHECK-NEXT:    store ptr [[X]], ptr [[X_ADDR]], align 8, !tbaa [[INTPTR_TBAA6]]
 // CHECK-NEXT:    store atomic i32 1, ptr [[X]] monotonic, align 4, !pcsections [[META11]]
 // CHECK-NEXT:    notail call void @escape(ptr noundef nonnull [[X_ADDR]])
-// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[Y]], align 4, !tbaa [[TBAA12]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[Y]], align 4, !tbaa [[INT_TBAA12]]
 // CHECK-NEXT:    ret i32 [[TMP0]]
 //
 __attribute__((no_sanitize("all"))) int test_no_sanitize_all(int *x, int *y) {
@@ -101,13 +101,13 @@ __attribute__((no_sanitize("all"))) int test_no_sanitize_all(int *x, int *y) {
 // CHECK: [[META3]] = !{i64 0}
 // CHECK: [[META4]] = !{!"sanmd_covered2!C", [[META5:![0-9]+]]}
 // CHECK: [[META5]] = !{i64 3}
-// CHECK: [[TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0}
+// CHECK: [[INTPTR_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0}
 // CHECK: [[META7]] = !{!"p1 int", [[META8:![0-9]+]], i64 0}
 // CHECK: [[META8]] = !{!"any pointer", [[META9:![0-9]+]], i64 0}
 // CHECK: [[META9]] = !{!"omnipotent char", [[META10:![0-9]+]], i64 0}
 // CHECK: [[META10]] = !{!"Simple C/C++ TBAA"}
 // CHECK: [[META11]] = !{!"sanmd_atomics2!C"}
-// CHECK: [[TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0}
+// CHECK: [[INT_TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0}
 // CHECK: [[META13]] = !{!"int", [[META9]], i64 0}
 // CHECK: [[META14]] = !{!"sanmd_covered2!C", [[META15:![0-9]+]]}
 // CHECK: [[META15]] = !{i64 2}
diff --git a/clang/test/CodeGenCXX/attr-likelihood-if-branch-weights.cpp b/clang/test/CodeGenCXX/attr-likelihood-if-branch-weights.cpp
index a77593f..8969e12 100644
--- a/clang/test/CodeGenCXX/attr-likelihood-if-branch-weights.cpp
+++ b/clang/test/CodeGenCXX/attr-likelihood-if-branch-weights.cpp
@@ -1,4 +1,4 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
 // RUN: %clang_cc1 -O1 -disable-llvm-passes -emit-llvm %s -o - -triple=x86_64-- | FileCheck %s
 
 extern volatile bool b;
@@ -6,22 +6,23 @@ extern volatile int i;
 extern bool A();
 extern bool B();
 
-// CHECK-LABEL: @_Z1fv(
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define dso_local noundef zeroext i1 @_Z1fv(
+// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[RETVAL:%.*]] = alloca i1, align 1
-// CHECK-NEXT:    [[TMP0:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[TBAA2:![0-9]+]], !range [[RNG6:![0-9]+]]
-// CHECK-NEXT:    [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1
-// CHECK-NEXT:    [[TOBOOL_EXPVAL:%.*]] = call i1 @llvm.expect.i1(i1 [[TOBOOL]], i1 true)
-// CHECK-NEXT:    br i1 [[TOBOOL_EXPVAL]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
-// CHECK:       if.then:
+// CHECK-NEXT:    [[TMP0:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[BOOL_TBAA2:![0-9]+]], !range [[RNG6:![0-9]+]], !noundef [[META7:![0-9]+]]
+// CHECK-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP0]] to i1
+// CHECK-NEXT:    [[LOADEDV_EXPVAL:%.*]] = call i1 @llvm.expect.i1(i1 [[LOADEDV]], i1 true)
+// CHECK-NEXT:    br i1 [[LOADEDV_EXPVAL]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
+// CHECK:       [[IF_THEN]]:
 // CHECK-NEXT:    [[CALL:%.*]] = call noundef zeroext i1 @_Z1Av()
 // CHECK-NEXT:    store i1 [[CALL]], ptr [[RETVAL]], align 1
-// CHECK-NEXT:    br label [[RETURN:%.*]]
-// CHECK:       if.end:
+// CHECK-NEXT:    br label %[[RETURN:.*]]
+// CHECK:       [[IF_END]]:
 // CHECK-NEXT:    [[CALL1:%.*]] = call noundef zeroext i1 @_Z1Bv()
 // CHECK-NEXT:    store i1 [[CALL1]], ptr [[RETVAL]], align 1
-// CHECK-NEXT:    br label [[RETURN]]
-// CHECK:       return:
+// CHECK-NEXT:    br label %[[RETURN]]
+// CHECK:       [[RETURN]]:
 // CHECK-NEXT:    [[TMP1:%.*]] = load i1, ptr [[RETVAL]], align 1
 // CHECK-NEXT:    ret i1 [[TMP1]]
 //
@@ -33,22 +34,23 @@ bool f() {
   return B();
 }
 
-// CHECK-LABEL: @_Z1gv(
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define dso_local noundef zeroext i1 @_Z1gv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[RETVAL:%.*]] = alloca i1, align 1
-// CHECK-NEXT:    [[TMP0:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[TBAA2]], !range [[RNG6]]
-// CHECK-NEXT:    [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1
-// CHECK-NEXT:    [[TOBOOL_EXPVAL:%.*]] = call i1 @llvm.expect.i1(i1 [[TOBOOL]], i1 false)
-// CHECK-NEXT:    br i1 [[TOBOOL_EXPVAL]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
-// CHECK:       if.then:
+// CHECK-NEXT:    [[TMP0:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[BOOL_TBAA2]], !range [[RNG6]], !noundef [[META7]]
+// CHECK-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP0]] to i1
+// CHECK-NEXT:    [[LOADEDV_EXPVAL:%.*]] = call i1 @llvm.expect.i1(i1 [[LOADEDV]], i1 false)
+// CHECK-NEXT:    br i1 [[LOADEDV_EXPVAL]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
+// CHECK:       [[IF_THEN]]:
 // CHECK-NEXT:    [[CALL:%.*]] = call noundef zeroext i1 @_Z1Av()
 // CHECK-NEXT:    store i1 [[CALL]], ptr [[RETVAL]], align 1
-// CHECK-NEXT:    br label [[RETURN:%.*]]
-// CHECK:       if.end:
+// CHECK-NEXT:    br label %[[RETURN:.*]]
+// CHECK:       [[IF_END]]:
 // CHECK-NEXT:    [[CALL1:%.*]] = call noundef zeroext i1 @_Z1Bv()
 // CHECK-NEXT:    store i1 [[CALL1]], ptr [[RETVAL]], align 1
-// CHECK-NEXT:    br label [[RETURN]]
-// CHECK:       return:
+// CHECK-NEXT:    br label %[[RETURN]]
+// CHECK:       [[RETURN]]:
 // CHECK-NEXT:    [[TMP1:%.*]] = load i1, ptr [[RETVAL]], align 1
 // CHECK-NEXT:    ret i1 [[TMP1]]
 //
@@ -61,22 +63,23 @@ bool g() {
   return B();
 }
 
-// CHECK-LABEL: @_Z1hv(
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define dso_local noundef zeroext i1 @_Z1hv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[RETVAL:%.*]] = alloca i1, align 1
-// CHECK-NEXT:    [[TMP0:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[TBAA2]], !range [[RNG6]]
-// CHECK-NEXT:    [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1
-// CHECK-NEXT:    [[TOBOOL_EXPVAL:%.*]] = call i1 @llvm.expect.i1(i1 [[TOBOOL]], i1 false)
-// CHECK-NEXT:    br i1 [[TOBOOL_EXPVAL]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
-// CHECK:       if.then:
+// CHECK-NEXT:    [[TMP0:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[BOOL_TBAA2]], !range [[RNG6]], !noundef [[META7]]
+// CHECK-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP0]] to i1
+// CHECK-NEXT:    [[LOADEDV_EXPVAL:%.*]] = call i1 @llvm.expect.i1(i1 [[LOADEDV]], i1 false)
+// CHECK-NEXT:    br i1 [[LOADEDV_EXPVAL]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
+// CHECK:       [[IF_THEN]]:
 // CHECK-NEXT:    [[CALL:%.*]] = call noundef zeroext i1 @_Z1Av()
 // CHECK-NEXT:    store i1 [[CALL]], ptr [[RETVAL]], align 1
-// CHECK-NEXT:    br label [[RETURN:%.*]]
-// CHECK:       if.end:
+// CHECK-NEXT:    br label %[[RETURN:.*]]
+// CHECK:       [[IF_END]]:
 // CHECK-NEXT:    [[CALL1:%.*]] = call noundef zeroext i1 @_Z1Bv()
 // CHECK-NEXT:    store i1 [[CALL1]], ptr [[RETVAL]], align 1
-// CHECK-NEXT:    br label [[RETURN]]
-// CHECK:       return:
+// CHECK-NEXT:    br label %[[RETURN]]
+// CHECK:       [[RETURN]]:
 // CHECK-NEXT:    [[TMP1:%.*]] = load i1, ptr [[RETVAL]], align 1
 // CHECK-NEXT:    ret i1 [[TMP1]]
 //
@@ -87,18 +90,19 @@ bool h() {
   return B();
 }
 
-// CHECK-LABEL: @_Z8NullStmtv(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[TBAA2]], !range [[RNG6]]
-// CHECK-NEXT:    [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1
-// CHECK-NEXT:    [[TOBOOL_EXPVAL:%.*]] = call i1 @llvm.expect.i1(i1 [[TOBOOL]], i1 false)
-// CHECK-NEXT:    br i1 [[TOBOOL_EXPVAL]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
-// CHECK:       if.then:
-// CHECK-NEXT:    br label [[IF_END:%.*]]
-// CHECK:       if.else:
-// CHECK-NEXT:    store volatile i8 1, ptr @b, align 1, !tbaa [[TBAA2]]
-// CHECK-NEXT:    br label [[IF_END]]
-// CHECK:       if.end:
+// CHECK-LABEL: define dso_local void @_Z8NullStmtv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[BOOL_TBAA2]], !range [[RNG6]], !noundef [[META7]]
+// CHECK-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP0]] to i1
+// CHECK-NEXT:    [[LOADEDV_EXPVAL:%.*]] = call i1 @llvm.expect.i1(i1 [[LOADEDV]], i1 false)
+// CHECK-NEXT:    br i1 [[LOADEDV_EXPVAL]], label %[[IF_THEN:.*]], label %[[IF_ELSE:.*]]
+// CHECK:       [[IF_THEN]]:
+// CHECK-NEXT:    br label %[[IF_END:.*]]
+// CHECK:       [[IF_ELSE]]:
+// CHECK-NEXT:    store volatile i8 1, ptr @b, align 1, !tbaa [[BOOL_TBAA2]]
+// CHECK-NEXT:    br label %[[IF_END]]
+// CHECK:       [[IF_END]]:
 // CHECK-NEXT:    ret void
 //
 void NullStmt() {
@@ -110,33 +114,34 @@ void NullStmt() {
   }
 }
 
-// CHECK-LABEL: @_Z6IfStmtv(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[TBAA2]], !range [[RNG6]]
-// CHECK-NEXT:    [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1
-// CHECK-NEXT:    [[TOBOOL_EXPVAL:%.*]] = call i1 @llvm.expect.i1(i1 [[TOBOOL]], i1 false)
-// CHECK-NEXT:    br i1 [[TOBOOL_EXPVAL]], label [[IF_THEN:%.*]], label [[IF_END2:%.*]]
-// CHECK:       if.then:
+// CHECK-LABEL: define dso_local void @_Z6IfStmtv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[BOOL_TBAA2]], !range [[RNG6]], !noundef [[META7]]
+// CHECK-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP0]] to i1
+// CHECK-NEXT:    [[LOADEDV_EXPVAL:%.*]] = call i1 @llvm.expect.i1(i1 [[LOADEDV]], i1 false)
+// CHECK-NEXT:    br i1 [[LOADEDV_EXPVAL]], label %[[IF_THEN:.*]], label %[[IF_END2:.*]]
+// CHECK:       [[IF_THEN]]:
 // CHECK-NEXT:    [[CALL:%.*]] = call noundef zeroext i1 @_Z1Bv()
-// CHECK-NEXT:    br i1 [[CALL]], label [[IF_THEN1:%.*]], label [[IF_END:%.*]]
-// CHECK:       if.then1:
-// CHECK-NEXT:    br label [[IF_END]]
-// CHECK:       if.end:
-// CHECK-NEXT:    br label [[IF_END2]]
-// CHECK:       if.end2:
-// CHECK-NEXT:    [[TMP1:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[TBAA2]], !range [[RNG6]]
-// CHECK-NEXT:    [[TOBOOL3:%.*]] = trunc i8 [[TMP1]] to i1
-// CHECK-NEXT:    br i1 [[TOBOOL3]], label [[IF_THEN4:%.*]], label [[IF_END8:%.*]]
-// CHECK:       if.then4:
+// CHECK-NEXT:    br i1 [[CALL]], label %[[IF_THEN1:.*]], label %[[IF_END:.*]]
+// CHECK:       [[IF_THEN1]]:
+// CHECK-NEXT:    br label %[[IF_END]]
+// CHECK:       [[IF_END]]:
+// CHECK-NEXT:    br label %[[IF_END2]]
+// CHECK:       [[IF_END2]]:
+// CHECK-NEXT:    [[TMP1:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[BOOL_TBAA2]], !range [[RNG6]], !noundef [[META7]]
+// CHECK-NEXT:    [[LOADEDV3:%.*]] = trunc i8 [[TMP1]] to i1
+// CHECK-NEXT:    br i1 [[LOADEDV3]], label %[[IF_THEN4:.*]], label %[[IF_END8:.*]]
+// CHECK:       [[IF_THEN4]]:
 // CHECK-NEXT:    [[CALL5:%.*]] = call noundef zeroext i1 @_Z1Bv()
 // CHECK-NEXT:    [[CALL5_EXPVAL:%.*]] = call i1 @llvm.expect.i1(i1 [[CALL5]], i1 false)
-// CHECK-NEXT:    br i1 [[CALL5_EXPVAL]], label [[IF_THEN6:%.*]], label [[IF_END7:%.*]]
-// CHECK:       if.then6:
-// CHECK-NEXT:    store volatile i8 0, ptr @b, align 1, !tbaa [[TBAA2]]
-// CHECK-NEXT:    br label [[IF_END7]]
-// CHECK:       if.end7:
-// CHECK-NEXT:    br label [[IF_END8]]
-// CHECK:       if.end8:
+// CHECK-NEXT:    br i1 [[CALL5_EXPVAL]], label %[[IF_THEN6:.*]], label %[[IF_END7:.*]]
+// CHECK:       [[IF_THEN6]]:
+// CHECK-NEXT:    store volatile i8 0, ptr @b, align 1, !tbaa [[BOOL_TBAA2]]
+// CHECK-NEXT:    br label %[[IF_END7]]
+// CHECK:       [[IF_END7]]:
+// CHECK-NEXT:    br label %[[IF_END8]]
+// CHECK:       [[IF_END8]]:
 // CHECK-NEXT:    ret void
 //
 void IfStmt() {
@@ -149,37 +154,38 @@ void IfStmt() {
   }
 }
 
-// CHECK-LABEL: @_Z9WhileStmtv(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[TBAA2]], !range [[RNG6]]
-// CHECK-NEXT:    [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1
-// CHECK-NEXT:    [[TOBOOL_EXPVAL:%.*]] = call i1 @llvm.expect.i1(i1 [[TOBOOL]], i1 false)
-// CHECK-NEXT:    br i1 [[TOBOOL_EXPVAL]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
-// CHECK:       if.then:
-// CHECK-NEXT:    br label [[WHILE_COND:%.*]]
-// CHECK:       while.cond:
+// CHECK-LABEL: define dso_local void @_Z9WhileStmtv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[BOOL_TBAA2]], !range [[RNG6]], !noundef [[META7]]
+// CHECK-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP0]] to i1
+// CHECK-NEXT:    [[LOADEDV_EXPVAL:%.*]] = call i1 @llvm.expect.i1(i1 [[LOADEDV]], i1 false)
+// CHECK-NEXT:    br i1 [[LOADEDV_EXPVAL]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
+// CHECK:       [[IF_THEN]]:
+// CHECK-NEXT:    br label %[[WHILE_COND:.*]]
+// CHECK:       [[WHILE_COND]]:
 // CHECK-NEXT:    [[CALL:%.*]] = call noundef zeroext i1 @_Z1Bv()
-// CHECK-NEXT:    br i1 [[CALL]], label [[WHILE_BODY:%.*]], label [[WHILE_END:%.*]]
-// CHECK:       while.body:
-// CHECK-NEXT:    br label [[WHILE_COND]], !llvm.loop [[LOOP7:![0-9]+]]
-// CHECK:       while.end:
-// CHECK-NEXT:    br label [[IF_END]]
-// CHECK:       if.end:
-// CHECK-NEXT:    [[TMP1:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[TBAA2]], !range [[RNG6]]
-// CHECK-NEXT:    [[TOBOOL1:%.*]] = trunc i8 [[TMP1]] to i1
-// CHECK-NEXT:    br i1 [[TOBOOL1]], label [[IF_THEN2:%.*]], label [[IF_END7:%.*]]
-// CHECK:       if.then2:
-// CHECK-NEXT:    br label [[WHILE_COND3:%.*]]
-// CHECK:       while.cond3:
+// CHECK-NEXT:    br i1 [[CALL]], label %[[WHILE_BODY:.*]], label %[[WHILE_END:.*]]
+// CHECK:       [[WHILE_BODY]]:
+// CHECK-NEXT:    br label %[[WHILE_COND]], !llvm.loop [[LOOP8:![0-9]+]]
+// CHECK:       [[WHILE_END]]:
+// CHECK-NEXT:    br label %[[IF_END]]
+// CHECK:       [[IF_END]]:
+// CHECK-NEXT:    [[TMP1:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[BOOL_TBAA2]], !range [[RNG6]], !noundef [[META7]]
+// CHECK-NEXT:    [[LOADEDV1:%.*]] = trunc i8 [[TMP1]] to i1
+// CHECK-NEXT:    br i1 [[LOADEDV1]], label %[[IF_THEN2:.*]], label %[[IF_END7:.*]]
+// CHECK:       [[IF_THEN2]]:
+// CHECK-NEXT:    br label %[[WHILE_COND3:.*]]
+// CHECK:       [[WHILE_COND3]]:
 // CHECK-NEXT:    [[CALL4:%.*]] = call noundef zeroext i1 @_Z1Bv()
 // CHECK-NEXT:    [[CALL4_EXPVAL:%.*]] = call i1 @llvm.expect.i1(i1 [[CALL4]], i1 false)
-// CHECK-NEXT:    br i1 [[CALL4_EXPVAL]], label [[WHILE_BODY5:%.*]], label [[WHILE_END6:%.*]]
-// CHECK:       while.body5:
-// CHECK-NEXT:    store volatile i8 0, ptr @b, align 1, !tbaa [[TBAA2]]
-// CHECK-NEXT:    br label [[WHILE_COND3]], !llvm.loop [[LOOP10:![0-9]+]]
-// CHECK:       while.end6:
-// CHECK-NEXT:    br label [[IF_END7]]
-// CHECK:       if.end7:
+// CHECK-NEXT:    br i1 [[CALL4_EXPVAL]], label %[[WHILE_BODY5:.*]], label %[[WHILE_END6:.*]]
+// CHECK:       [[WHILE_BODY5]]:
+// CHECK-NEXT:    store volatile i8 0, ptr @b, align 1, !tbaa [[BOOL_TBAA2]]
+// CHECK-NEXT:    br label %[[WHILE_COND3]], !llvm.loop [[LOOP11:![0-9]+]]
+// CHECK:       [[WHILE_END6]]:
+// CHECK-NEXT:    br label %[[IF_END7]]
+// CHECK:       [[IF_END7]]:
 // CHECK-NEXT:    ret void
 //
 void WhileStmt() {
@@ -191,35 +197,36 @@ void WhileStmt() {
       [[unlikely]] { b = false; }
 }
 
-// CHECK-LABEL: @_Z6DoStmtv(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[TBAA2]], !range [[RNG6]]
-// CHECK-NEXT:    [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1
-// CHECK-NEXT:    [[TOBOOL_EXPVAL:%.*]] = call i1 @llvm.expect.i1(i1 [[TOBOOL]], i1 false)
-// CHECK-NEXT:    br i1 [[TOBOOL_EXPVAL]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
-// CHECK:       if.then:
-// CHECK-NEXT:    br label [[DO_BODY:%.*]]
-// CHECK:       do.body:
-// CHECK-NEXT:    br label [[DO_COND:%.*]]
-// CHECK:       do.cond:
+// CHECK-LABEL: define dso_local void @_Z6DoStmtv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[BOOL_TBAA2]], !range [[RNG6]], !noundef [[META7]]
+// CHECK-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP0]] to i1
+// CHECK-NEXT:    [[LOADEDV_EXPVAL:%.*]] = call i1 @llvm.expect.i1(i1 [[LOADEDV]], i1 false)
+// CHECK-NEXT:    br i1 [[LOADEDV_EXPVAL]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
+// CHECK:       [[IF_THEN]]:
+// CHECK-NEXT:    br label %[[DO_BODY:.*]]
+// CHECK:       [[DO_BODY]]:
+// CHECK-NEXT:    br label %[[DO_COND:.*]]
+// CHECK:       [[DO_COND]]:
 // CHECK-NEXT:    [[CALL:%.*]] = call noundef zeroext i1 @_Z1Bv()
-// CHECK-NEXT:    br i1 [[CALL]], label [[DO_BODY]], label [[DO_END:%.*]], !llvm.loop [[LOOP11:![0-9]+]]
-// CHECK:       do.end:
-// CHECK-NEXT:    br label [[IF_END]]
-// CHECK:       if.end:
-// CHECK-NEXT:    [[TMP1:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[TBAA2]], !range [[RNG6]]
-// CHECK-NEXT:    [[TOBOOL1:%.*]] = trunc i8 [[TMP1]] to i1
-// CHECK-NEXT:    br i1 [[TOBOOL1]], label [[IF_THEN2:%.*]], label [[IF_END7:%.*]]
-// CHECK:       if.then2:
-// CHECK-NEXT:    br label [[DO_BODY3:%.*]]
-// CHECK:       do.body3:
-// CHECK-NEXT:    br label [[DO_COND4:%.*]]
-// CHECK:       do.cond4:
+// CHECK-NEXT:    br i1 [[CALL]], label %[[DO_BODY]], label %[[DO_END:.*]], !llvm.loop [[LOOP12:![0-9]+]]
+// CHECK:       [[DO_END]]:
+// CHECK-NEXT:    br label %[[IF_END]]
+// CHECK:       [[IF_END]]:
+// CHECK-NEXT:    [[TMP1:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[BOOL_TBAA2]], !range [[RNG6]], !noundef [[META7]]
+// CHECK-NEXT:    [[LOADEDV1:%.*]] = trunc i8 [[TMP1]] to i1
+// CHECK-NEXT:    br i1 [[LOADEDV1]], label %[[IF_THEN2:.*]], label %[[IF_END7:.*]]
+// CHECK:       [[IF_THEN2]]:
+// CHECK-NEXT:    br label %[[DO_BODY3:.*]]
+// CHECK:       [[DO_BODY3]]:
+// CHECK-NEXT:    br label %[[DO_COND4:.*]]
+// CHECK:       [[DO_COND4]]:
 // CHECK-NEXT:    [[CALL5:%.*]] = call noundef zeroext i1 @_Z1Bv()
-// CHECK-NEXT:    br i1 [[CALL5]], label [[DO_BODY3]], label [[DO_END6:%.*]], !llvm.loop [[LOOP12:![0-9]+]]
-// CHECK:       do.end6:
-// CHECK-NEXT:    br label [[IF_END7]]
-// CHECK:       if.end7:
+// CHECK-NEXT:    br i1 [[CALL5]], label %[[DO_BODY3]], label %[[DO_END6:.*]], !llvm.loop [[LOOP13:![0-9]+]]
+// CHECK:       [[DO_END6]]:
+// CHECK-NEXT:    br label %[[IF_END7]]
+// CHECK:       [[IF_END7]]:
 // CHECK-NEXT:    ret void
 //
 void DoStmt() {
@@ -234,36 +241,37 @@ void DoStmt() {
     while (B());
 }
 
-// CHECK-LABEL: @_Z7ForStmtv(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[TBAA2]], !range [[RNG6]]
-// CHECK-NEXT:    [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1
-// CHECK-NEXT:    [[TOBOOL_EXPVAL:%.*]] = call i1 @llvm.expect.i1(i1 [[TOBOOL]], i1 false)
-// CHECK-NEXT:    br i1 [[TOBOOL_EXPVAL]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
-// CHECK:       if.then:
-// CHECK-NEXT:    br label [[FOR_COND:%.*]]
-// CHECK:       for.cond:
+// CHECK-LABEL: define dso_local void @_Z7ForStmtv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[BOOL_TBAA2]], !range [[RNG6]], !noundef [[META7]]
+// CHECK-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP0]] to i1
+// CHECK-NEXT:    [[LOADEDV_EXPVAL:%.*]] = call i1 @llvm.expect.i1(i1 [[LOADEDV]], i1 false)
+// CHECK-NEXT:    br i1 [[LOADEDV_EXPVAL]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
+// CHECK:       [[IF_THEN]]:
+// CHECK-NEXT:    br label %[[FOR_COND:.*]]
+// CHECK:       [[FOR_COND]]:
 // CHECK-NEXT:    [[CALL:%.*]] = call noundef zeroext i1 @_Z1Bv()
-// CHECK-NEXT:    br i1 [[CALL]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
-// CHECK:       for.body:
-// CHECK-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]]
-// CHECK:       for.end:
-// CHECK-NEXT:    br label [[IF_END]]
-// CHECK:       if.end:
-// CHECK-NEXT:    [[TMP1:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[TBAA2]], !range [[RNG6]]
-// CHECK-NEXT:    [[TOBOOL1:%.*]] = trunc i8 [[TMP1]] to i1
-// CHECK-NEXT:    br i1 [[TOBOOL1]], label [[IF_THEN2:%.*]], label [[IF_END7:%.*]]
-// CHECK:       if.then2:
-// CHECK-NEXT:    br label [[FOR_COND3:%.*]]
-// CHECK:       for.cond3:
+// CHECK-NEXT:    br i1 [[CALL]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
+// CHECK:       [[FOR_BODY]]:
+// CHECK-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]]
+// CHECK:       [[FOR_END]]:
+// CHECK-NEXT:    br label %[[IF_END]]
+// CHECK:       [[IF_END]]:
+// CHECK-NEXT:    [[TMP1:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[BOOL_TBAA2]], !range [[RNG6]], !noundef [[META7]]
+// CHECK-NEXT:    [[LOADEDV1:%.*]] = trunc i8 [[TMP1]] to i1
+// CHECK-NEXT:    br i1 [[LOADEDV1]], label %[[IF_THEN2:.*]], label %[[IF_END7:.*]]
+// CHECK:       [[IF_THEN2]]:
+// CHECK-NEXT:    br label %[[FOR_COND3:.*]]
+// CHECK:       [[FOR_COND3]]:
 // CHECK-NEXT:    [[CALL4:%.*]] = call noundef zeroext i1 @_Z1Bv()
 // CHECK-NEXT:    [[CALL4_EXPVAL:%.*]] = call i1 @llvm.expect.i1(i1 [[CALL4]], i1 false)
-// CHECK-NEXT:    br i1 [[CALL4_EXPVAL]], label [[FOR_BODY5:%.*]], label [[FOR_END6:%.*]]
-// CHECK:       for.body5:
-// CHECK-NEXT:    br label [[FOR_COND3]], !llvm.loop [[LOOP14:![0-9]+]]
-// CHECK:       for.end6:
-// CHECK-NEXT:    br label [[IF_END7]]
-// CHECK:       if.end7:
+// CHECK-NEXT:    br i1 [[CALL4_EXPVAL]], label %[[FOR_BODY5:.*]], label %[[FOR_END6:.*]]
+// CHECK:       [[FOR_BODY5]]:
+// CHECK-NEXT:    br label %[[FOR_COND3]], !llvm.loop [[LOOP15:![0-9]+]]
+// CHECK:       [[FOR_END6]]:
+// CHECK-NEXT:    br label %[[IF_END7]]
+// CHECK:       [[IF_END7]]:
 // CHECK-NEXT:    ret void
 //
 void ForStmt() {
@@ -275,20 +283,21 @@ void ForStmt() {
       [[unlikely]] {}
 }
 
-// CHECK-LABEL: @_Z8GotoStmtv(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[TBAA2]], !range [[RNG6]]
-// CHECK-NEXT:    [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1
-// CHECK-NEXT:    [[TOBOOL_EXPVAL:%.*]] = call i1 @llvm.expect.i1(i1 [[TOBOOL]], i1 false)
-// CHECK-NEXT:    br i1 [[TOBOOL_EXPVAL]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
-// CHECK:       if.then:
-// CHECK-NEXT:    br label [[END:%.*]]
-// CHECK:       if.else:
-// CHECK-NEXT:    store volatile i8 1, ptr @b, align 1, !tbaa [[TBAA2]]
-// CHECK-NEXT:    br label [[IF_END:%.*]]
-// CHECK:       if.end:
-// CHECK-NEXT:    br label [[END]]
-// CHECK:       end:
+// CHECK-LABEL: define dso_local void @_Z8GotoStmtv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[BOOL_TBAA2]], !range [[RNG6]], !noundef [[META7]]
+// CHECK-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP0]] to i1
+// CHECK-NEXT:    [[LOADEDV_EXPVAL:%.*]] = call i1 @llvm.expect.i1(i1 [[LOADEDV]], i1 false)
+// CHECK-NEXT:    br i1 [[LOADEDV_EXPVAL]], label %[[IF_THEN:.*]], label %[[IF_ELSE:.*]]
+// CHECK:       [[IF_THEN]]:
+// CHECK-NEXT:    br label %[[END:.*]]
+// CHECK:       [[IF_ELSE]]:
+// CHECK-NEXT:    store volatile i8 1, ptr @b, align 1, !tbaa [[BOOL_TBAA2]]
+// CHECK-NEXT:    br label %[[IF_END:.*]]
+// CHECK:       [[IF_END]]:
+// CHECK-NEXT:    br label %[[END]]
+// CHECK:       [[END]]:
 // CHECK-NEXT:    ret void
 //
 void GotoStmt() {
@@ -301,18 +310,19 @@ void GotoStmt() {
 end:;
 }
 
-// CHECK-LABEL: @_Z10ReturnStmtv(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[TBAA2]], !range [[RNG6]]
-// CHECK-NEXT:    [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1
-// CHECK-NEXT:    [[TOBOOL_EXPVAL:%.*]] = call i1 @llvm.expect.i1(i1 [[TOBOOL]], i1 false)
-// CHECK-NEXT:    br i1 [[TOBOOL_EXPVAL]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
-// CHECK:       if.then:
-// CHECK-NEXT:    br label [[IF_END:%.*]]
-// CHECK:       if.else:
-// CHECK-NEXT:    store volatile i8 1, ptr @b, align 1, !tbaa [[TBAA2]]
-// CHECK-NEXT:    br label [[IF_END]]
-// CHECK:       if.end:
+// CHECK-LABEL: define dso_local void @_Z10ReturnStmtv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[BOOL_TBAA2]], !range [[RNG6]], !noundef [[META7]]
+// CHECK-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP0]] to i1
+// CHECK-NEXT:    [[LOADEDV_EXPVAL:%.*]] = call i1 @llvm.expect.i1(i1 [[LOADEDV]], i1 false)
+// CHECK-NEXT:    br i1 [[LOADEDV_EXPVAL]], label %[[IF_THEN:.*]], label %[[IF_ELSE:.*]]
+// CHECK:       [[IF_THEN]]:
+// CHECK-NEXT:    br label %[[IF_END:.*]]
+// CHECK:       [[IF_ELSE]]:
+// CHECK-NEXT:    store volatile i8 1, ptr @b, align 1, !tbaa [[BOOL_TBAA2]]
+// CHECK-NEXT:    br label %[[IF_END]]
+// CHECK:       [[IF_END]]:
 // CHECK-NEXT:    ret void
 //
 void ReturnStmt() {
@@ -324,35 +334,36 @@ void ReturnStmt() {
   }
 }
 
-// CHECK-LABEL: @_Z10SwitchStmtv(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[TBAA2]], !range [[RNG6]]
-// CHECK-NEXT:    [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1
-// CHECK-NEXT:    [[TOBOOL_EXPVAL:%.*]] = call i1 @llvm.expect.i1(i1 [[TOBOOL]], i1 false)
-// CHECK-NEXT:    br i1 [[TOBOOL_EXPVAL]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
-// CHECK:       if.then:
-// CHECK-NEXT:    [[TMP1:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[TBAA15:![0-9]+]]
-// CHECK-NEXT:    switch i32 [[TMP1]], label [[SW_EPILOG:%.*]] [
+// CHECK-LABEL: define dso_local void @_Z10SwitchStmtv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[BOOL_TBAA2]], !range [[RNG6]], !noundef [[META7]]
+// CHECK-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP0]] to i1
+// CHECK-NEXT:    [[LOADEDV_EXPVAL:%.*]] = call i1 @llvm.expect.i1(i1 [[LOADEDV]], i1 false)
+// CHECK-NEXT:    br i1 [[LOADEDV_EXPVAL]], label %[[IF_THEN:.*]], label %[[IF_ELSE:.*]]
+// CHECK:       [[IF_THEN]]:
+// CHECK-NEXT:    [[TMP1:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[INT_TBAA16:![0-9]+]]
+// CHECK-NEXT:    switch i32 [[TMP1]], label %[[SW_EPILOG:.*]] [
 // CHECK-NEXT:    ]
-// CHECK:       sw.epilog:
-// CHECK-NEXT:    br label [[IF_END:%.*]]
-// CHECK:       if.else:
-// CHECK-NEXT:    store volatile i8 1, ptr @b, align 1, !tbaa [[TBAA2]]
-// CHECK-NEXT:    br label [[IF_END]]
-// CHECK:       if.end:
-// CHECK-NEXT:    [[TMP2:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[TBAA2]], !range [[RNG6]]
-// CHECK-NEXT:    [[TOBOOL1:%.*]] = trunc i8 [[TMP2]] to i1
-// CHECK-NEXT:    br i1 [[TOBOOL1]], label [[IF_THEN2:%.*]], label [[IF_ELSE4:%.*]]
-// CHECK:       if.then2:
-// CHECK-NEXT:    [[TMP3:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[TBAA15]]
-// CHECK-NEXT:    switch i32 [[TMP3]], label [[SW_EPILOG3:%.*]] [
+// CHECK:       [[SW_EPILOG]]:
+// CHECK-NEXT:    br label %[[IF_END:.*]]
+// CHECK:       [[IF_ELSE]]:
+// CHECK-NEXT:    store volatile i8 1, ptr @b, align 1, !tbaa [[BOOL_TBAA2]]
+// CHECK-NEXT:    br label %[[IF_END]]
+// CHECK:       [[IF_END]]:
+// CHECK-NEXT:    [[TMP2:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[BOOL_TBAA2]], !range [[RNG6]], !noundef [[META7]]
+// CHECK-NEXT:    [[LOADEDV1:%.*]] = trunc i8 [[TMP2]] to i1
+// CHECK-NEXT:    br i1 [[LOADEDV1]], label %[[IF_THEN2:.*]], label %[[IF_ELSE4:.*]]
+// CHECK:       [[IF_THEN2]]:
+// CHECK-NEXT:    [[TMP3:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[INT_TBAA16]]
+// CHECK-NEXT:    switch i32 [[TMP3]], label %[[SW_EPILOG3:.*]] [
 // CHECK-NEXT:    ]
-// CHECK:       sw.epilog3:
-// CHECK-NEXT:    br label [[IF_END5:%.*]]
-// CHECK:       if.else4:
-// CHECK-NEXT:    store volatile i8 1, ptr @b, align 1, !tbaa [[TBAA2]]
-// CHECK-NEXT:    br label [[IF_END5]]
-// CHECK:       if.end5:
+// CHECK:       [[SW_EPILOG3]]:
+// CHECK-NEXT:    br label %[[IF_END5:.*]]
+// CHECK:       [[IF_ELSE4]]:
+// CHECK-NEXT:    store volatile i8 1, ptr @b, align 1, !tbaa [[BOOL_TBAA2]]
+// CHECK-NEXT:    br label %[[IF_END5]]
+// CHECK:       [[IF_END5]]:
 // CHECK-NEXT:    ret void
 //
 void SwitchStmt() {
@@ -371,3 +382,21 @@ void SwitchStmt() {
   }
 }
 
+//.
+// CHECK: [[BOOL_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0}
+// CHECK: [[META3]] = !{!"bool", [[META4:![0-9]+]], i64 0}
+// CHECK: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0}
+// CHECK: [[META5]] = !{!"Simple C++ TBAA"}
+// CHECK: [[RNG6]] = !{i8 0, i8 2}
+// CHECK: [[META7]] = !{}
+// CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META9:![0-9]+]], [[META10:![0-9]+]]}
+// CHECK: [[META9]] = !{!"llvm.loop.mustprogress"}
+// CHECK: [[META10]] = !{!"llvm.loop.unroll.disable"}
+// CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META9]], [[META10]]}
+// CHECK: [[LOOP12]] = distinct !{[[LOOP12]], [[META9]], [[META10]]}
+// CHECK: [[LOOP13]] = distinct !{[[LOOP13]], [[META9]], [[META10]]}
+// CHECK: [[LOOP14]] = distinct !{[[LOOP14]], [[META9]], [[META10]]}
+// CHECK: [[LOOP15]] = distinct !{[[LOOP15]], [[META9]], [[META10]]}
+// CHECK: [[INT_TBAA16]] = !{[[META17:![0-9]+]], [[META17]], i64 0}
+// CHECK: [[META17]] = !{!"int", [[META4]], i64 0}
+//.
diff --git a/clang/test/CodeGenCXX/attr-likelihood-iteration-stmt.cpp b/clang/test/CodeGenCXX/attr-likelihood-iteration-stmt.cpp
index 151b77a..441faac 100644
--- a/clang/test/CodeGenCXX/attr-likelihood-iteration-stmt.cpp
+++ b/clang/test/CodeGenCXX/attr-likelihood-iteration-stmt.cpp
@@ -1,61 +1,64 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
 // RUN: %clang_cc1 -O1 -disable-llvm-passes -emit-llvm %s -o - -triple=x86_64-linux-gnu -verify
 // RUN: %clang_cc1 -O1 -disable-llvm-passes -emit-llvm %s -o - -triple=x86_64-linux-gnu | FileCheck %s
 
-// CHECK-LABEL: @_Z2wli(
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define dso_local void @_Z2wli(
+// CHECK-SAME: i32 noundef [[E:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[E_ADDR:%.*]] = alloca i32, align 4
-// CHECK-NEXT:    store i32 [[E:%.*]], ptr [[E_ADDR]], align 4, !tbaa [[TBAA2:![0-9]+]]
-// CHECK-NEXT:    br label [[WHILE_COND:%.*]]
-// CHECK:       while.cond:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[E_ADDR]], align 4, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store i32 [[E]], ptr [[E_ADDR]], align 4, !tbaa [[INT_TBAA2:![0-9]+]]
+// CHECK-NEXT:    br label %[[WHILE_COND:.*]]
+// CHECK:       [[WHILE_COND]]:
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[E_ADDR]], align 4, !tbaa [[INT_TBAA2]]
 // CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
 // CHECK-NEXT:    [[TOBOOL_EXPVAL:%.*]] = call i1 @llvm.expect.i1(i1 [[TOBOOL]], i1 true)
-// CHECK-NEXT:    br i1 [[TOBOOL_EXPVAL]], label [[WHILE_BODY:%.*]], label [[WHILE_END:%.*]]
-// CHECK:       while.body:
-// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[E_ADDR]], align 4, !tbaa [[TBAA2]]
+// CHECK-NEXT:    br i1 [[TOBOOL_EXPVAL]], label %[[WHILE_BODY:.*]], label %[[WHILE_END:.*]]
+// CHECK:       [[WHILE_BODY]]:
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[E_ADDR]], align 4, !tbaa [[INT_TBAA2]]
 // CHECK-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP1]], 1
-// CHECK-NEXT:    store i32 [[INC]], ptr [[E_ADDR]], align 4, !tbaa [[TBAA2]]
-// CHECK-NEXT:    br label [[WHILE_COND]], !llvm.loop [[LOOP6:![0-9]+]]
-// CHECK:       while.end:
+// CHECK-NEXT:    store i32 [[INC]], ptr [[E_ADDR]], align 4, !tbaa [[INT_TBAA2]]
+// CHECK-NEXT:    br label %[[WHILE_COND]], !llvm.loop [[LOOP6:![0-9]+]]
+// CHECK:       [[WHILE_END]]:
 // CHECK-NEXT:    ret void
 //
 void wl(int e){
   while(e) [[likely]] ++e;
 }
 
-// CHECK-LABEL: @_Z2wui(
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define dso_local void @_Z2wui(
+// CHECK-SAME: i32 noundef [[E:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[E_ADDR:%.*]] = alloca i32, align 4
-// CHECK-NEXT:    store i32 [[E:%.*]], ptr [[E_ADDR]], align 4, !tbaa [[TBAA2]]
-// CHECK-NEXT:    br label [[WHILE_COND:%.*]]
-// CHECK:       while.cond:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[E_ADDR]], align 4, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store i32 [[E]], ptr [[E_ADDR]], align 4, !tbaa [[INT_TBAA2]]
+// CHECK-NEXT:    br label %[[WHILE_COND:.*]]
+// CHECK:       [[WHILE_COND]]:
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[E_ADDR]], align 4, !tbaa [[INT_TBAA2]]
 // CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
 // CHECK-NEXT:    [[TOBOOL_EXPVAL:%.*]] = call i1 @llvm.expect.i1(i1 [[TOBOOL]], i1 false)
-// CHECK-NEXT:    br i1 [[TOBOOL_EXPVAL]], label [[WHILE_BODY:%.*]], label [[WHILE_END:%.*]]
-// CHECK:       while.body:
-// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[E_ADDR]], align 4, !tbaa [[TBAA2]]
+// CHECK-NEXT:    br i1 [[TOBOOL_EXPVAL]], label %[[WHILE_BODY:.*]], label %[[WHILE_END:.*]]
+// CHECK:       [[WHILE_BODY]]:
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[E_ADDR]], align 4, !tbaa [[INT_TBAA2]]
 // CHECK-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP1]], 1
-// CHECK-NEXT:    store i32 [[INC]], ptr [[E_ADDR]], align 4, !tbaa [[TBAA2]]
-// CHECK-NEXT:    br label [[WHILE_COND]], !llvm.loop [[LOOP9:![0-9]+]]
-// CHECK:       while.end:
+// CHECK-NEXT:    store i32 [[INC]], ptr [[E_ADDR]], align 4, !tbaa [[INT_TBAA2]]
+// CHECK-NEXT:    br label %[[WHILE_COND]], !llvm.loop [[LOOP9:![0-9]+]]
+// CHECK:       [[WHILE_END]]:
 // CHECK-NEXT:    ret void
 //
 void wu(int e){
   while(e) [[unlikely]] ++e;
 }
 
-// CHECK-LABEL: @_Z15w_branch_elidedj(
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define dso_local void @_Z15w_branch_elidedj(
+// CHECK-SAME: i32 noundef [[E:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[E_ADDR:%.*]] = alloca i32, align 4
-// CHECK-NEXT:    store i32 [[E:%.*]], ptr [[E_ADDR]], align 4, !tbaa [[TBAA2]]
-// CHECK-NEXT:    br label [[WHILE_BODY:%.*]]
-// CHECK:       while.body:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[E_ADDR]], align 4, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store i32 [[E]], ptr [[E_ADDR]], align 4, !tbaa [[INT_TBAA2]]
+// CHECK-NEXT:    br label %[[WHILE_BODY:.*]]
+// CHECK:       [[WHILE_BODY]]:
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[E_ADDR]], align 4, !tbaa [[INT_TBAA2]]
 // CHECK-NEXT:    [[INC:%.*]] = add i32 [[TMP0]], 1
-// CHECK-NEXT:    store i32 [[INC]], ptr [[E_ADDR]], align 4, !tbaa [[TBAA2]]
-// CHECK-NEXT:    br label [[WHILE_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
+// CHECK-NEXT:    store i32 [[INC]], ptr [[E_ADDR]], align 4, !tbaa [[INT_TBAA2]]
+// CHECK-NEXT:    br label %[[WHILE_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
 //
 void w_branch_elided(unsigned e){
   // expected-warning@+2 {{attribute 'likely' has no effect when annotating an infinite loop}}
@@ -63,31 +66,32 @@ void w_branch_elided(unsigned e){
   while(1) [[likely]] ++e;
 }
 
-// CHECK-LABEL: @_Z2flj(
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define dso_local void @_Z2flj(
+// CHECK-SAME: i32 noundef [[E:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[E_ADDR:%.*]] = alloca i32, align 4
 // CHECK-NEXT:    [[I:%.*]] = alloca i32, align 4
-// CHECK-NEXT:    store i32 [[E:%.*]], ptr [[E_ADDR]], align 4, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store i32 [[E]], ptr [[E_ADDR]], align 4, !tbaa [[INT_TBAA2]]
 // CHECK-NEXT:    call void @llvm.lifetime.start.p0(ptr [[I]]) #[[ATTR3:[0-9]+]]
-// CHECK-NEXT:    store i32 0, ptr [[I]], align 4, !tbaa [[TBAA2]]
-// CHECK-NEXT:    br label [[FOR_COND:%.*]]
-// CHECK:       for.cond:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I]], align 4, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[E_ADDR]], align 4, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store i32 0, ptr [[I]], align 4, !tbaa [[INT_TBAA2]]
+// CHECK-NEXT:    br label %[[FOR_COND:.*]]
+// CHECK:       [[FOR_COND]]:
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I]], align 4, !tbaa [[INT_TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[E_ADDR]], align 4, !tbaa [[INT_TBAA2]]
 // CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[TMP0]], [[TMP1]]
 // CHECK-NEXT:    [[CMP_EXPVAL:%.*]] = call i1 @llvm.expect.i1(i1 [[CMP]], i1 true)
-// CHECK-NEXT:    br i1 [[CMP_EXPVAL]], label [[FOR_BODY:%.*]], label [[FOR_COND_CLEANUP:%.*]]
-// CHECK:       for.cond.cleanup:
+// CHECK-NEXT:    br i1 [[CMP_EXPVAL]], label %[[FOR_BODY:.*]], label %[[FOR_COND_CLEANUP:.*]]
+// CHECK:       [[FOR_COND_CLEANUP]]:
 // CHECK-NEXT:    call void @llvm.lifetime.end.p0(ptr [[I]]) #[[ATTR3]]
-// CHECK-NEXT:    br label [[FOR_END:%.*]]
-// CHECK:       for.body:
-// CHECK-NEXT:    br label [[FOR_INC:%.*]]
-// CHECK:       for.inc:
-// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[E_ADDR]], align 4, !tbaa [[TBAA2]]
+// CHECK-NEXT:    br label %[[FOR_END:.*]]
+// CHECK:       [[FOR_BODY]]:
+// CHECK-NEXT:    br label %[[FOR_INC:.*]]
+// CHECK:       [[FOR_INC]]:
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[E_ADDR]], align 4, !tbaa [[INT_TBAA2]]
 // CHECK-NEXT:    [[INC:%.*]] = add i32 [[TMP2]], 1
-// CHECK-NEXT:    store i32 [[INC]], ptr [[E_ADDR]], align 4, !tbaa [[TBAA2]]
-// CHECK-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]]
-// CHECK:       for.end:
+// CHECK-NEXT:    store i32 [[INC]], ptr [[E_ADDR]], align 4, !tbaa [[INT_TBAA2]]
+// CHECK-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]]
+// CHECK:       [[FOR_END]]:
 // CHECK-NEXT:    ret void
 //
 void fl(unsigned e)
@@ -95,31 +99,32 @@ void fl(unsigned e)
   for(int i = 0; i != e; ++e) [[likely]];
 }
 
-// CHECK-LABEL: @_Z2fui(
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define dso_local void @_Z2fui(
+// CHECK-SAME: i32 noundef [[E:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[E_ADDR:%.*]] = alloca i32, align 4
 // CHECK-NEXT:    [[I:%.*]] = alloca i32, align 4
-// CHECK-NEXT:    store i32 [[E:%.*]], ptr [[E_ADDR]], align 4, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store i32 [[E]], ptr [[E_ADDR]], align 4, !tbaa [[INT_TBAA2]]
 // CHECK-NEXT:    call void @llvm.lifetime.start.p0(ptr [[I]]) #[[ATTR3]]
-// CHECK-NEXT:    store i32 0, ptr [[I]], align 4, !tbaa [[TBAA2]]
-// CHECK-NEXT:    br label [[FOR_COND:%.*]]
-// CHECK:       for.cond:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I]], align 4, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[E_ADDR]], align 4, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store i32 0, ptr [[I]], align 4, !tbaa [[INT_TBAA2]]
+// CHECK-NEXT:    br label %[[FOR_COND:.*]]
+// CHECK:       [[FOR_COND]]:
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I]], align 4, !tbaa [[INT_TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[E_ADDR]], align 4, !tbaa [[INT_TBAA2]]
 // CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[TMP0]], [[TMP1]]
 // CHECK-NEXT:    [[CMP_EXPVAL:%.*]] = call i1 @llvm.expect.i1(i1 [[CMP]], i1 false)
-// CHECK-NEXT:    br i1 [[CMP_EXPVAL]], label [[FOR_BODY:%.*]], label [[FOR_COND_CLEANUP:%.*]]
-// CHECK:       for.cond.cleanup:
+// CHECK-NEXT:    br i1 [[CMP_EXPVAL]], label %[[FOR_BODY:.*]], label %[[FOR_COND_CLEANUP:.*]]
+// CHECK:       [[FOR_COND_CLEANUP]]:
 // CHECK-NEXT:    call void @llvm.lifetime.end.p0(ptr [[I]]) #[[ATTR3]]
-// CHECK-NEXT:    br label [[FOR_END:%.*]]
-// CHECK:       for.body:
-// CHECK-NEXT:    br label [[FOR_INC:%.*]]
-// CHECK:       for.inc:
-// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[E_ADDR]], align 4, !tbaa [[TBAA2]]
+// CHECK-NEXT:    br label %[[FOR_END:.*]]
+// CHECK:       [[FOR_BODY]]:
+// CHECK-NEXT:    br label %[[FOR_INC:.*]]
+// CHECK:       [[FOR_INC]]:
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[E_ADDR]], align 4, !tbaa [[INT_TBAA2]]
 // CHECK-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP2]], 1
-// CHECK-NEXT:    store i32 [[INC]], ptr [[E_ADDR]], align 4, !tbaa [[TBAA2]]
-// CHECK-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]]
-// CHECK:       for.end:
+// CHECK-NEXT:    store i32 [[INC]], ptr [[E_ADDR]], align 4, !tbaa [[INT_TBAA2]]
+// CHECK-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]]
+// CHECK:       [[FOR_END]]:
 // CHECK-NEXT:    ret void
 //
 void fu(int e)
@@ -127,62 +132,64 @@ void fu(int e)
   for(int i = 0; i != e; ++e) [[unlikely]];
 }
 
-// CHECK-LABEL: @_Z15f_branch_elidedv(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    br label [[FOR_COND:%.*]]
-// CHECK:       for.cond:
-// CHECK-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]]
+// CHECK-LABEL: define dso_local void @_Z15f_branch_elidedv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    br label %[[FOR_COND:.*]]
+// CHECK:       [[FOR_COND]]:
+// CHECK-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]]
 //
 void f_branch_elided()
 {
   for(;;) [[likely]];
 }
 
-// CHECK-LABEL: @_Z3frlOA4_i(
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define dso_local void @_Z3frlOA4_i(
+// CHECK-SAME: ptr noundef nonnull align 4 dereferenceable(16) [[E:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[E_ADDR:%.*]] = alloca ptr, align 8
 // CHECK-NEXT:    [[__RANGE1:%.*]] = alloca ptr, align 8
 // CHECK-NEXT:    [[__BEGIN1:%.*]] = alloca ptr, align 8
 // CHECK-NEXT:    [[__END1:%.*]] = alloca ptr, align 8
 // CHECK-NEXT:    [[I:%.*]] = alloca i32, align 4
-// CHECK-NEXT:    store ptr [[E:%.*]], ptr [[E_ADDR]], align 8, !tbaa [[TBAA14:![0-9]+]]
+// CHECK-NEXT:    store ptr [[E]], ptr [[E_ADDR]], align 8, !tbaa [[INTPTR_TBAA14:![0-9]+]]
 // CHECK-NEXT:    call void @llvm.lifetime.start.p0(ptr [[__RANGE1]]) #[[ATTR3]]
-// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 8, !tbaa [[TBAA14]]
-// CHECK-NEXT:    store ptr [[TMP0]], ptr [[__RANGE1]], align 8, !tbaa [[TBAA14]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 8, !tbaa [[INTPTR_TBAA14]], !nonnull [[META17:![0-9]+]], !align [[META18:![0-9]+]]
+// CHECK-NEXT:    store ptr [[TMP0]], ptr [[__RANGE1]], align 8, !tbaa [[INTPTR_TBAA14]]
 // CHECK-NEXT:    call void @llvm.lifetime.start.p0(ptr [[__BEGIN1]]) #[[ATTR3]]
-// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[__RANGE1]], align 8, !tbaa [[TBAA14]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[__RANGE1]], align 8, !tbaa [[INTPTR_TBAA14]], !nonnull [[META17]], !align [[META18]]
 // CHECK-NEXT:    [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x i32], ptr [[TMP1]], i64 0, i64 0
-// CHECK-NEXT:    store ptr [[ARRAYDECAY]], ptr [[__BEGIN1]], align 8, !tbaa [[TBAA14]]
+// CHECK-NEXT:    store ptr [[ARRAYDECAY]], ptr [[__BEGIN1]], align 8, !tbaa [[INTPTR_TBAA14]]
 // CHECK-NEXT:    call void @llvm.lifetime.start.p0(ptr [[__END1]]) #[[ATTR3]]
-// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[__RANGE1]], align 8, !tbaa [[TBAA14]]
+// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[__RANGE1]], align 8, !tbaa [[INTPTR_TBAA14]], !nonnull [[META17]], !align [[META18]]
 // CHECK-NEXT:    [[ARRAYDECAY1:%.*]] = getelementptr inbounds [4 x i32], ptr [[TMP2]], i64 0, i64 0
 // CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAYDECAY1]], i64 4
-// CHECK-NEXT:    store ptr [[ADD_PTR]], ptr [[__END1]], align 8, !tbaa [[TBAA14]]
-// CHECK-NEXT:    br label [[FOR_COND:%.*]]
-// CHECK:       for.cond:
-// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[__BEGIN1]], align 8, !tbaa [[TBAA14]]
-// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[__END1]], align 8, !tbaa [[TBAA14]]
+// CHECK-NEXT:    store ptr [[ADD_PTR]], ptr [[__END1]], align 8, !tbaa [[INTPTR_TBAA14]]
+// CHECK-NEXT:    br label %[[FOR_COND:.*]]
+// CHECK:       [[FOR_COND]]:
+// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[__BEGIN1]], align 8, !tbaa [[INTPTR_TBAA14]]
+// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[__END1]], align 8, !tbaa [[INTPTR_TBAA14]]
 // CHECK-NEXT:    [[CMP:%.*]] = icmp ne ptr [[TMP3]], [[TMP4]]
 // CHECK-NEXT:    [[CMP_EXPVAL:%.*]] = call i1 @llvm.expect.i1(i1 [[CMP]], i1 true)
-// CHECK-NEXT:    br i1 [[CMP_EXPVAL]], label [[FOR_BODY:%.*]], label [[FOR_COND_CLEANUP:%.*]]
-// CHECK:       for.cond.cleanup:
+// CHECK-NEXT:    br i1 [[CMP_EXPVAL]], label %[[FOR_BODY:.*]], label %[[FOR_COND_CLEANUP:.*]]
+// CHECK:       [[FOR_COND_CLEANUP]]:
 // CHECK-NEXT:    call void @llvm.lifetime.end.p0(ptr [[__END1]]) #[[ATTR3]]
 // CHECK-NEXT:    call void @llvm.lifetime.end.p0(ptr [[__BEGIN1]]) #[[ATTR3]]
 // CHECK-NEXT:    call void @llvm.lifetime.end.p0(ptr [[__RANGE1]]) #[[ATTR3]]
-// CHECK-NEXT:    br label [[FOR_END:%.*]]
-// CHECK:       for.body:
+// CHECK-NEXT:    br label %[[FOR_END:.*]]
+// CHECK:       [[FOR_BODY]]:
 // CHECK-NEXT:    call void @llvm.lifetime.start.p0(ptr [[I]]) #[[ATTR3]]
-// CHECK-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[__BEGIN1]], align 8, !tbaa [[TBAA14]]
-// CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA2]]
-// CHECK-NEXT:    store i32 [[TMP6]], ptr [[I]], align 4, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[__BEGIN1]], align 8, !tbaa [[INTPTR_TBAA14]]
+// CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[INT_TBAA2]]
+// CHECK-NEXT:    store i32 [[TMP6]], ptr [[I]], align 4, !tbaa [[INT_TBAA2]]
 // CHECK-NEXT:    call void @llvm.lifetime.end.p0(ptr [[I]]) #[[ATTR3]]
-// CHECK-NEXT:    br label [[FOR_INC:%.*]]
-// CHECK:       for.inc:
-// CHECK-NEXT:    [[TMP7:%.*]] = load ptr, ptr [[__BEGIN1]], align 8, !tbaa [[TBAA14]]
+// CHECK-NEXT:    br label %[[FOR_INC:.*]]
+// CHECK:       [[FOR_INC]]:
+// CHECK-NEXT:    [[TMP7:%.*]] = load ptr, ptr [[__BEGIN1]], align 8, !tbaa [[INTPTR_TBAA14]]
 // CHECK-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP7]], i32 1
-// CHECK-NEXT:    store ptr [[INCDEC_PTR]], ptr [[__BEGIN1]], align 8, !tbaa [[TBAA14]]
-// CHECK-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
-// CHECK:       for.end:
+// CHECK-NEXT:    store ptr [[INCDEC_PTR]], ptr [[__BEGIN1]], align 8, !tbaa [[INTPTR_TBAA14]]
+// CHECK-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
+// CHECK:       [[FOR_END]]:
 // CHECK-NEXT:    ret void
 //
 void frl(int (&&e) [4])
@@ -190,54 +197,76 @@ void frl(int (&&e) [4])
   for(int i : e) [[likely]];
 }
 
-// CHECK-LABEL: @_Z3fruOA4_i(
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define dso_local void @_Z3fruOA4_i(
+// CHECK-SAME: ptr noundef nonnull align 4 dereferenceable(16) [[E:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[E_ADDR:%.*]] = alloca ptr, align 8
 // CHECK-NEXT:    [[__RANGE1:%.*]] = alloca ptr, align 8
 // CHECK-NEXT:    [[__BEGIN1:%.*]] = alloca ptr, align 8
 // CHECK-NEXT:    [[__END1:%.*]] = alloca ptr, align 8
 // CHECK-NEXT:    [[I:%.*]] = alloca i32, align 4
-// CHECK-NEXT:    store ptr [[E:%.*]], ptr [[E_ADDR]], align 8, !tbaa [[TBAA14]]
+// CHECK-NEXT:    store ptr [[E]], ptr [[E_ADDR]], align 8, !tbaa [[INTPTR_TBAA14]]
 // CHECK-NEXT:    call void @llvm.lifetime.start.p0(ptr [[__RANGE1]]) #[[ATTR3]]
-// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 8, !tbaa [[TBAA14]]
-// CHECK-NEXT:    store ptr [[TMP0]], ptr [[__RANGE1]], align 8, !tbaa [[TBAA14]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 8, !tbaa [[INTPTR_TBAA14]], !nonnull [[META17]], !align [[META18]]
+// CHECK-NEXT:    store ptr [[TMP0]], ptr [[__RANGE1]], align 8, !tbaa [[INTPTR_TBAA14]]
 // CHECK-NEXT:    call void @llvm.lifetime.start.p0(ptr [[__BEGIN1]]) #[[ATTR3]]
-// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[__RANGE1]], align 8, !tbaa [[TBAA14]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[__RANGE1]], align 8, !tbaa [[INTPTR_TBAA14]], !nonnull [[META17]], !align [[META18]]
 // CHECK-NEXT:    [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x i32], ptr [[TMP1]], i64 0, i64 0
-// CHECK-NEXT:    store ptr [[ARRAYDECAY]], ptr [[__BEGIN1]], align 8, !tbaa [[TBAA14]]
+// CHECK-NEXT:    store ptr [[ARRAYDECAY]], ptr [[__BEGIN1]], align 8, !tbaa [[INTPTR_TBAA14]]
 // CHECK-NEXT:    call void @llvm.lifetime.start.p0(ptr [[__END1]]) #[[ATTR3]]
-// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[__RANGE1]], align 8, !tbaa [[TBAA14]]
+// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[__RANGE1]], align 8, !tbaa [[INTPTR_TBAA14]], !nonnull [[META17]], !align [[META18]]
 // CHECK-NEXT:    [[ARRAYDECAY1:%.*]] = getelementptr inbounds [4 x i32], ptr [[TMP2]], i64 0, i64 0
 // CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAYDECAY1]], i64 4
-// CHECK-NEXT:    store ptr [[ADD_PTR]], ptr [[__END1]], align 8, !tbaa [[TBAA14]]
-// CHECK-NEXT:    br label [[FOR_COND:%.*]]
-// CHECK:       for.cond:
-// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[__BEGIN1]], align 8, !tbaa [[TBAA14]]
-// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[__END1]], align 8, !tbaa [[TBAA14]]
+// CHECK-NEXT:    store ptr [[ADD_PTR]], ptr [[__END1]], align 8, !tbaa [[INTPTR_TBAA14]]
+// CHECK-NEXT:    br label %[[FOR_COND:.*]]
+// CHECK:       [[FOR_COND]]:
+// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[__BEGIN1]], align 8, !tbaa [[INTPTR_TBAA14]]
+// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[__END1]], align 8, !tbaa [[INTPTR_TBAA14]]
 // CHECK-NEXT:    [[CMP:%.*]] = icmp ne ptr [[TMP3]], [[TMP4]]
 // CHECK-NEXT:    [[CMP_EXPVAL:%.*]] = call i1 @llvm.expect.i1(i1 [[CMP]], i1 false)
-// CHECK-NEXT:    br i1 [[CMP_EXPVAL]], label [[FOR_BODY:%.*]], label [[FOR_COND_CLEANUP:%.*]]
-// CHECK:       for.cond.cleanup:
+// CHECK-NEXT:    br i1 [[CMP_EXPVAL]], label %[[FOR_BODY:.*]], label %[[FOR_COND_CLEANUP:.*]]
+// CHECK:       [[FOR_COND_CLEANUP]]:
 // CHECK-NEXT:    call void @llvm.lifetime.end.p0(ptr [[__END1]]) #[[ATTR3]]
 // CHECK-NEXT:    call void @llvm.lifetime.end.p0(ptr [[__BEGIN1]]) #[[ATTR3]]
 // CHECK-NEXT:    call void @llvm.lifetime.end.p0(ptr [[__RANGE1]]) #[[ATTR3]]
-// CHECK-NEXT:    br label [[FOR_END:%.*]]
-// CHECK:       for.body:
+// CHECK-NEXT:    br label %[[FOR_END:.*]]
+// CHECK:       [[FOR_BODY]]:
 // CHECK-NEXT:    call void @llvm.lifetime.start.p0(ptr [[I]]) #[[ATTR3]]
-// CHECK-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[__BEGIN1]], align 8, !tbaa [[TBAA14]]
-// CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA2]]
-// CHECK-NEXT:    store i32 [[TMP6]], ptr [[I]], align 4, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[__BEGIN1]], align 8, !tbaa [[INTPTR_TBAA14]]
+// CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[INT_TBAA2]]
+// CHECK-NEXT:    store i32 [[TMP6]], ptr [[I]], align 4, !tbaa [[INT_TBAA2]]
 // CHECK-NEXT:    call void @llvm.lifetime.end.p0(ptr [[I]]) #[[ATTR3]]
-// CHECK-NEXT:    br label [[FOR_INC:%.*]]
-// CHECK:       for.inc:
-// CHECK-NEXT:    [[TMP7:%.*]] = load ptr, ptr [[__BEGIN1]], align 8, !tbaa [[TBAA14]]
+// CHECK-NEXT:    br label %[[FOR_INC:.*]]
+// CHECK:       [[FOR_INC]]:
+// CHECK-NEXT:    [[TMP7:%.*]] = load ptr, ptr [[__BEGIN1]], align 8, !tbaa [[INTPTR_TBAA14]]
 // CHECK-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP7]], i32 1
-// CHECK-NEXT:    store ptr [[INCDEC_PTR]], ptr [[__BEGIN1]], align 8, !tbaa [[TBAA14]]
-// CHECK-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]]
-// CHECK:       for.end:
+// CHECK-NEXT:    store ptr [[INCDEC_PTR]], ptr [[__BEGIN1]], align 8, !tbaa [[INTPTR_TBAA14]]
+// CHECK-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]]
+// CHECK:       [[FOR_END]]:
 // CHECK-NEXT:    ret void
 //
 void fru(int (&&e) [4])
 {
   for(int i : e) [[unlikely]];
 }
+//.
+// CHECK: [[INT_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0}
+// CHECK: [[META3]] = !{!"int", [[META4:![0-9]+]], i64 0}
+// CHECK: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0}
+// CHECK: [[META5]] = !{!"Simple C++ TBAA"}
+// CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META7:![0-9]+]], [[META8:![0-9]+]]}
+// CHECK: [[META7]] = !{!"llvm.loop.mustprogress"}
+// CHECK: [[META8]] = !{!"llvm.loop.unroll.disable"}
+// CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META7]], [[META8]]}
+// CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META7]], [[META8]]}
+// CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META7]], [[META8]]}
+// CHECK: [[LOOP12]] = distinct !{[[LOOP12]], [[META7]], [[META8]]}
+// CHECK: [[LOOP13]] = distinct !{[[LOOP13]], [[META7]], [[META8]]}
+// CHECK: [[INTPTR_TBAA14]] = !{[[META15:![0-9]+]], [[META15]], i64 0}
+// CHECK: [[META15]] = !{!"p1 int", [[META16:![0-9]+]], i64 0}
+// CHECK: [[META16]] = !{!"any pointer", [[META4]], i64 0}
+// CHECK: [[META17]] = !{}
+// CHECK: [[META18]] = !{i64 4}
+// CHECK: [[LOOP19]] = distinct !{[[LOOP19]], [[META8]]}
+// CHECK: [[LOOP20]] = distinct !{[[LOOP20]], [[META8]]}
+//.
diff --git a/clang/test/CodeGenCXX/attr-likelihood-switch-branch-weights.cpp b/clang/test/CodeGenCXX/attr-likelihood-switch-branch-weights.cpp
index 328d1bc..bb6f5bb 100644
--- a/clang/test/CodeGenCXX/attr-likelihood-switch-branch-weights.cpp
+++ b/clang/test/CodeGenCXX/attr-likelihood-switch-branch-weights.cpp
@@ -1,15 +1,16 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
 // RUN: %clang_cc1 -O1 -disable-llvm-passes -emit-llvm %s -o - -triple=x86_64-linux-gnu | FileCheck %s
 
 extern volatile int i;
 
-// CHECK-LABEL: @_Z8OneCaseLv(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[TBAA2:![0-9]+]]
-// CHECK-NEXT:    switch i32 [[TMP0]], label [[SW_EPILOG:%.*]] [
-// CHECK-NEXT:    i32 1, label [[SW_EPILOG]]
-// CHECK-NEXT:    ], !prof !6
-// CHECK:       sw.epilog:
+// CHECK-LABEL: define dso_local void @_Z8OneCaseLv(
+// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[INT_TBAA2:![0-9]+]]
+// CHECK-NEXT:    switch i32 [[TMP0]], label %[[SW_EPILOG:.*]] [
+// CHECK-NEXT:      i32 1, label %[[SW_EPILOG]]
+// CHECK-NEXT:    ], !prof [[PROF6:![0-9]+]]
+// CHECK:       [[SW_EPILOG]]:
 // CHECK-NEXT:    ret void
 //
 void OneCaseL() {
@@ -18,18 +19,19 @@ void OneCaseL() {
   }
 }
 
-// CHECK-LABEL: @_Z8OneCaseUv(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[TBAA2]]
-// CHECK-NEXT:    switch i32 [[TMP0]], label [[SW_EPILOG:%.*]] [
-// CHECK-NEXT:    i32 1, label [[SW_BB:%.*]]
-// CHECK-NEXT:    ], !prof !7
-// CHECK:       sw.bb:
-// CHECK-NEXT:    [[TMP1:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @_Z8OneCaseUv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[INT_TBAA2]]
+// CHECK-NEXT:    switch i32 [[TMP0]], label %[[SW_EPILOG:.*]] [
+// CHECK-NEXT:      i32 1, label %[[SW_BB:.*]]
+// CHECK-NEXT:    ], !prof [[PROF7:![0-9]+]]
+// CHECK:       [[SW_BB]]:
+// CHECK-NEXT:    [[TMP1:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[INT_TBAA2]]
 // CHECK-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP1]], 1
-// CHECK-NEXT:    store volatile i32 [[INC]], ptr @i, align 4, !tbaa [[TBAA2]]
-// CHECK-NEXT:    br label [[SW_EPILOG]]
-// CHECK:       sw.epilog:
+// CHECK-NEXT:    store volatile i32 [[INC]], ptr @i, align 4, !tbaa [[INT_TBAA2]]
+// CHECK-NEXT:    br label %[[SW_EPILOG]]
+// CHECK:       [[SW_EPILOG]]:
 // CHECK-NEXT:    ret void
 //
 void OneCaseU() {
@@ -38,14 +40,15 @@ void OneCaseU() {
   }
 }
 
-// CHECK-LABEL: @_Z10TwoCasesLNv(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[TBAA2]]
-// CHECK-NEXT:    switch i32 [[TMP0]], label [[SW_EPILOG:%.*]] [
-// CHECK-NEXT:    i32 1, label [[SW_EPILOG]]
-// CHECK-NEXT:    i32 2, label [[SW_EPILOG]]
-// CHECK-NEXT:    ], !prof !8
-// CHECK:       sw.epilog:
+// CHECK-LABEL: define dso_local void @_Z10TwoCasesLNv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[INT_TBAA2]]
+// CHECK-NEXT:    switch i32 [[TMP0]], label %[[SW_EPILOG:.*]] [
+// CHECK-NEXT:      i32 1, label %[[SW_EPILOG]]
+// CHECK-NEXT:      i32 2, label %[[SW_EPILOG]]
+// CHECK-NEXT:    ], !prof [[PROF8:![0-9]+]]
+// CHECK:       [[SW_EPILOG]]:
 // CHECK-NEXT:    ret void
 //
 void TwoCasesLN() {
@@ -55,14 +58,15 @@ void TwoCasesLN() {
   }
 }
 
-// CHECK-LABEL: @_Z10TwoCasesUNv(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[TBAA2]]
-// CHECK-NEXT:    switch i32 [[TMP0]], label [[SW_EPILOG:%.*]] [
-// CHECK-NEXT:    i32 1, label [[SW_EPILOG]]
-// CHECK-NEXT:    i32 2, label [[SW_EPILOG]]
-// CHECK-NEXT:    ], !prof !9
-// CHECK:       sw.epilog:
+// CHECK-LABEL: define dso_local void @_Z10TwoCasesUNv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[INT_TBAA2]]
+// CHECK-NEXT:    switch i32 [[TMP0]], label %[[SW_EPILOG:.*]] [
+// CHECK-NEXT:      i32 1, label %[[SW_EPILOG]]
+// CHECK-NEXT:      i32 2, label %[[SW_EPILOG]]
+// CHECK-NEXT:    ], !prof [[PROF9:![0-9]+]]
+// CHECK:       [[SW_EPILOG]]:
 // CHECK-NEXT:    ret void
 //
 void TwoCasesUN() {
@@ -72,14 +76,15 @@ void TwoCasesUN() {
   }
 }
 
-// CHECK-LABEL: @_Z10TwoCasesLUv(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[TBAA2]]
-// CHECK-NEXT:    switch i32 [[TMP0]], label [[SW_EPILOG:%.*]] [
-// CHECK-NEXT:    i32 1, label [[SW_EPILOG]]
-// CHECK-NEXT:    i32 2, label [[SW_EPILOG]]
-// CHECK-NEXT:    ], !prof !10
-// CHECK:       sw.epilog:
+// CHECK-LABEL: define dso_local void @_Z10TwoCasesLUv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[INT_TBAA2]]
+// CHECK-NEXT:    switch i32 [[TMP0]], label %[[SW_EPILOG:.*]] [
+// CHECK-NEXT:      i32 1, label %[[SW_EPILOG]]
+// CHECK-NEXT:      i32 2, label %[[SW_EPILOG]]
+// CHECK-NEXT:    ], !prof [[PROF10:![0-9]+]]
+// CHECK:       [[SW_EPILOG]]:
 // CHECK-NEXT:    ret void
 //
 void TwoCasesLU() {
@@ -89,20 +94,21 @@ void TwoCasesLU() {
   }
 }
 
-// CHECK-LABEL: @_Z20CasesFallthroughNNLNv(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[TBAA2]]
-// CHECK-NEXT:    switch i32 [[TMP0]], label [[SW_EPILOG:%.*]] [
-// CHECK-NEXT:    i32 1, label [[SW_BB:%.*]]
-// CHECK-NEXT:    i32 2, label [[SW_BB]]
-// CHECK-NEXT:    i32 3, label [[SW_BB1:%.*]]
-// CHECK-NEXT:    i32 4, label [[SW_BB1]]
-// CHECK-NEXT:    ], !prof !11
-// CHECK:       sw.bb:
-// CHECK-NEXT:    br label [[SW_BB1]]
-// CHECK:       sw.bb1:
-// CHECK-NEXT:    br label [[SW_EPILOG]]
-// CHECK:       sw.epilog:
+// CHECK-LABEL: define dso_local void @_Z20CasesFallthroughNNLNv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[INT_TBAA2]]
+// CHECK-NEXT:    switch i32 [[TMP0]], label %[[SW_EPILOG:.*]] [
+// CHECK-NEXT:      i32 1, label %[[SW_BB:.*]]
+// CHECK-NEXT:      i32 2, label %[[SW_BB]]
+// CHECK-NEXT:      i32 3, label %[[SW_BB1:.*]]
+// CHECK-NEXT:      i32 4, label %[[SW_BB1]]
+// CHECK-NEXT:    ], !prof [[PROF11:![0-9]+]]
+// CHECK:       [[SW_BB]]:
+// CHECK-NEXT:    br label %[[SW_BB1]]
+// CHECK:       [[SW_BB1]]:
+// CHECK-NEXT:    br label %[[SW_EPILOG]]
+// CHECK:       [[SW_EPILOG]]:
 // CHECK-NEXT:    ret void
 //
 void CasesFallthroughNNLN() {
@@ -114,20 +120,21 @@ void CasesFallthroughNNLN() {
   }
 }
 
-// CHECK-LABEL: @_Z20CasesFallthroughNNUNv(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[TBAA2]]
-// CHECK-NEXT:    switch i32 [[TMP0]], label [[SW_EPILOG:%.*]] [
-// CHECK-NEXT:    i32 1, label [[SW_BB:%.*]]
-// CHECK-NEXT:    i32 2, label [[SW_BB]]
-// CHECK-NEXT:    i32 3, label [[SW_BB1:%.*]]
-// CHECK-NEXT:    i32 4, label [[SW_BB1]]
-// CHECK-NEXT:    ], !prof !12
-// CHECK:       sw.bb:
-// CHECK-NEXT:    br label [[SW_BB1]]
-// CHECK:       sw.bb1:
-// CHECK-NEXT:    br label [[SW_EPILOG]]
-// CHECK:       sw.epilog:
+// CHECK-LABEL: define dso_local void @_Z20CasesFallthroughNNUNv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[INT_TBAA2]]
+// CHECK-NEXT:    switch i32 [[TMP0]], label %[[SW_EPILOG:.*]] [
+// CHECK-NEXT:      i32 1, label %[[SW_BB:.*]]
+// CHECK-NEXT:      i32 2, label %[[SW_BB]]
+// CHECK-NEXT:      i32 3, label %[[SW_BB1:.*]]
+// CHECK-NEXT:      i32 4, label %[[SW_BB1]]
+// CHECK-NEXT:    ], !prof [[PROF12:![0-9]+]]
+// CHECK:       [[SW_BB]]:
+// CHECK-NEXT:    br label %[[SW_BB1]]
+// CHECK:       [[SW_BB1]]:
+// CHECK-NEXT:    br label %[[SW_EPILOG]]
+// CHECK:       [[SW_EPILOG]]:
 // CHECK-NEXT:    ret void
 //
 void CasesFallthroughNNUN() {
@@ -139,29 +146,30 @@ void CasesFallthroughNNUN() {
   }
 }
 
-// CHECK-LABEL: @_Z28CasesFallthroughRangeSmallLNv(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[TBAA2]]
-// CHECK-NEXT:    switch i32 [[TMP0]], label [[SW_EPILOG:%.*]] [
-// CHECK-NEXT:    i32 1, label [[SW_BB:%.*]]
-// CHECK-NEXT:    i32 2, label [[SW_BB]]
-// CHECK-NEXT:    i32 3, label [[SW_BB]]
-// CHECK-NEXT:    i32 4, label [[SW_BB]]
-// CHECK-NEXT:    i32 5, label [[SW_BB]]
-// CHECK-NEXT:    i32 102, label [[SW_BB1:%.*]]
-// CHECK-NEXT:    i32 103, label [[SW_BB2:%.*]]
-// CHECK-NEXT:    i32 104, label [[SW_BB2]]
-// CHECK-NEXT:    ], !prof !13
-// CHECK:       sw.bb:
-// CHECK-NEXT:    [[TMP1:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @_Z28CasesFallthroughRangeSmallLNv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[INT_TBAA2]]
+// CHECK-NEXT:    switch i32 [[TMP0]], label %[[SW_EPILOG:.*]] [
+// CHECK-NEXT:      i32 1, label %[[SW_BB:.*]]
+// CHECK-NEXT:      i32 2, label %[[SW_BB]]
+// CHECK-NEXT:      i32 3, label %[[SW_BB]]
+// CHECK-NEXT:      i32 4, label %[[SW_BB]]
+// CHECK-NEXT:      i32 5, label %[[SW_BB]]
+// CHECK-NEXT:      i32 102, label %[[SW_BB1:.*]]
+// CHECK-NEXT:      i32 103, label %[[SW_BB2:.*]]
+// CHECK-NEXT:      i32 104, label %[[SW_BB2]]
+// CHECK-NEXT:    ], !prof [[PROF13:![0-9]+]]
+// CHECK:       [[SW_BB]]:
+// CHECK-NEXT:    [[TMP1:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[INT_TBAA2]]
 // CHECK-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP1]], 1
-// CHECK-NEXT:    store volatile i32 [[INC]], ptr @i, align 4, !tbaa [[TBAA2]]
-// CHECK-NEXT:    br label [[SW_BB1]]
-// CHECK:       sw.bb1:
-// CHECK-NEXT:    br label [[SW_BB2]]
-// CHECK:       sw.bb2:
-// CHECK-NEXT:    br label [[SW_EPILOG]]
-// CHECK:       sw.epilog:
+// CHECK-NEXT:    store volatile i32 [[INC]], ptr @i, align 4, !tbaa [[INT_TBAA2]]
+// CHECK-NEXT:    br label %[[SW_BB1]]
+// CHECK:       [[SW_BB1]]:
+// CHECK-NEXT:    br label %[[SW_BB2]]
+// CHECK:       [[SW_BB2]]:
+// CHECK-NEXT:    br label %[[SW_EPILOG]]
+// CHECK:       [[SW_EPILOG]]:
 // CHECK-NEXT:    ret void
 //
 void CasesFallthroughRangeSmallLN() {
@@ -173,29 +181,30 @@ void CasesFallthroughRangeSmallLN() {
   }
 }
 
-// CHECK-LABEL: @_Z28CasesFallthroughRangeSmallUNv(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[TBAA2]]
-// CHECK-NEXT:    switch i32 [[TMP0]], label [[SW_EPILOG:%.*]] [
-// CHECK-NEXT:    i32 1, label [[SW_BB:%.*]]
-// CHECK-NEXT:    i32 2, label [[SW_BB]]
-// CHECK-NEXT:    i32 3, label [[SW_BB]]
-// CHECK-NEXT:    i32 4, label [[SW_BB]]
-// CHECK-NEXT:    i32 5, label [[SW_BB]]
-// CHECK-NEXT:    i32 102, label [[SW_BB1:%.*]]
-// CHECK-NEXT:    i32 103, label [[SW_BB2:%.*]]
-// CHECK-NEXT:    i32 104, label [[SW_BB2]]
-// CHECK-NEXT:    ], !prof !14
-// CHECK:       sw.bb:
-// CHECK-NEXT:    [[TMP1:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[TBAA2]]
+// CHECK-LABEL: define dso_local void @_Z28CasesFallthroughRangeSmallUNv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[INT_TBAA2]]
+// CHECK-NEXT:    switch i32 [[TMP0]], label %[[SW_EPILOG:.*]] [
+// CHECK-NEXT:      i32 1, label %[[SW_BB:.*]]
+// CHECK-NEXT:      i32 2, label %[[SW_BB]]
+// CHECK-NEXT:      i32 3, label %[[SW_BB]]
+// CHECK-NEXT:      i32 4, label %[[SW_BB]]
+// CHECK-NEXT:      i32 5, label %[[SW_BB]]
+// CHECK-NEXT:      i32 102, label %[[SW_BB1:.*]]
+// CHECK-NEXT:      i32 103, label %[[SW_BB2:.*]]
+// CHECK-NEXT:      i32 104, label %[[SW_BB2]]
+// CHECK-NEXT:    ], !prof [[PROF14:![0-9]+]]
+// CHECK:       [[SW_BB]]:
+// CHECK-NEXT:    [[TMP1:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[INT_TBAA2]]
 // CHECK-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP1]], 1
-// CHECK-NEXT:    store volatile i32 [[INC]], ptr @i, align 4, !tbaa [[TBAA2]]
-// CHECK-NEXT:    br label [[SW_BB1]]
-// CHECK:       sw.bb1:
-// CHECK-NEXT:    br label [[SW_BB2]]
-// CHECK:       sw.bb2:
-// CHECK-NEXT:    br label [[SW_EPILOG]]
-// CHECK:       sw.epilog:
+// CHECK-NEXT:    store volatile i32 [[INC]], ptr @i, align 4, !tbaa [[INT_TBAA2]]
+// CHECK-NEXT:    br label %[[SW_BB1]]
+// CHECK:       [[SW_BB1]]:
+// CHECK-NEXT:    br label %[[SW_BB2]]
+// CHECK:       [[SW_BB2]]:
+// CHECK-NEXT:    br label %[[SW_EPILOG]]
+// CHECK:       [[SW_EPILOG]]:
 // CHECK-NEXT:    ret void
 //
 void CasesFallthroughRangeSmallUN() {
@@ -207,23 +216,24 @@ void CasesFallthroughRangeSmallUN() {
   }
 }
 
-// CHECK-LABEL: @_Z29CasesFallthroughRangeLargeLLNv(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[TBAA2]]
-// CHECK-NEXT:    switch i32 [[TMP0]], label [[SW_CASERANGE:%.*]] [
-// CHECK-NEXT:    i32 1003, label [[SW_BB1:%.*]]
-// CHECK-NEXT:    i32 104, label [[SW_BB1]]
-// CHECK-NEXT:    ], !prof !8
-// CHECK:       sw.bb:
-// CHECK-NEXT:    br label [[SW_BB1]]
-// CHECK:       sw.bb1:
-// CHECK-NEXT:    br label [[SW_EPILOG:%.*]]
-// CHECK:       sw.caserange:
+// CHECK-LABEL: define dso_local void @_Z29CasesFallthroughRangeLargeLLNv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[INT_TBAA2]]
+// CHECK-NEXT:    switch i32 [[TMP0]], label %[[SW_CASERANGE:.*]] [
+// CHECK-NEXT:      i32 1003, label %[[SW_BB1:.*]]
+// CHECK-NEXT:      i32 104, label %[[SW_BB1]]
+// CHECK-NEXT:    ], !prof [[PROF8]]
+// CHECK:       [[SW_BB:.*]]:
+// CHECK-NEXT:    br label %[[SW_BB1]]
+// CHECK:       [[SW_BB1]]:
+// CHECK-NEXT:    br label %[[SW_EPILOG:.*]]
+// CHECK:       [[SW_CASERANGE]]:
 // CHECK-NEXT:    [[TMP1:%.*]] = sub i32 [[TMP0]], 0
 // CHECK-NEXT:    [[INBOUNDS:%.*]] = icmp ule i32 [[TMP1]], 64
 // CHECK-NEXT:    [[INBOUNDS_EXPVAL:%.*]] = call i1 @llvm.expect.i1(i1 [[INBOUNDS]], i1 true)
-// CHECK-NEXT:    br i1 [[INBOUNDS_EXPVAL]], label [[SW_BB:%.*]], label [[SW_EPILOG]]
-// CHECK:       sw.epilog:
+// CHECK-NEXT:    br i1 [[INBOUNDS_EXPVAL]], label %[[SW_BB]], label %[[SW_EPILOG]]
+// CHECK:       [[SW_EPILOG]]:
 // CHECK-NEXT:    ret void
 //
 void CasesFallthroughRangeLargeLLN() {
@@ -234,23 +244,24 @@ void CasesFallthroughRangeLargeLLN() {
   }
 }
 
-// CHECK-LABEL: @_Z29CasesFallthroughRangeLargeUUNv(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[TBAA2]]
-// CHECK-NEXT:    switch i32 [[TMP0]], label [[SW_CASERANGE:%.*]] [
-// CHECK-NEXT:    i32 1003, label [[SW_BB1:%.*]]
-// CHECK-NEXT:    i32 104, label [[SW_BB1]]
-// CHECK-NEXT:    ], !prof !9
-// CHECK:       sw.bb:
-// CHECK-NEXT:    br label [[SW_BB1]]
-// CHECK:       sw.bb1:
-// CHECK-NEXT:    br label [[SW_EPILOG:%.*]]
-// CHECK:       sw.caserange:
+// CHECK-LABEL: define dso_local void @_Z29CasesFallthroughRangeLargeUUNv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[INT_TBAA2]]
+// CHECK-NEXT:    switch i32 [[TMP0]], label %[[SW_CASERANGE:.*]] [
+// CHECK-NEXT:      i32 1003, label %[[SW_BB1:.*]]
+// CHECK-NEXT:      i32 104, label %[[SW_BB1]]
+// CHECK-NEXT:    ], !prof [[PROF9]]
+// CHECK:       [[SW_BB:.*]]:
+// CHECK-NEXT:    br label %[[SW_BB1]]
+// CHECK:       [[SW_BB1]]:
+// CHECK-NEXT:    br label %[[SW_EPILOG:.*]]
+// CHECK:       [[SW_CASERANGE]]:
 // CHECK-NEXT:    [[TMP1:%.*]] = sub i32 [[TMP0]], 0
 // CHECK-NEXT:    [[INBOUNDS:%.*]] = icmp ule i32 [[TMP1]], 64
 // CHECK-NEXT:    [[INBOUNDS_EXPVAL:%.*]] = call i1 @llvm.expect.i1(i1 [[INBOUNDS]], i1 false)
-// CHECK-NEXT:    br i1 [[INBOUNDS_EXPVAL]], label [[SW_BB:%.*]], label [[SW_EPILOG]]
-// CHECK:       sw.epilog:
+// CHECK-NEXT:    br i1 [[INBOUNDS_EXPVAL]], label %[[SW_BB]], label %[[SW_EPILOG]]
+// CHECK:       [[SW_EPILOG]]:
 // CHECK-NEXT:    ret void
 //
 void CasesFallthroughRangeLargeUUN() {
@@ -261,15 +272,16 @@ void CasesFallthroughRangeLargeUUN() {
   }
 }
 
-// CHECK-LABEL: @_Z15OneCaseDefaultLv(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[TBAA2]]
-// CHECK-NEXT:    switch i32 [[TMP0]], label [[SW_DEFAULT:%.*]] [
-// CHECK-NEXT:    i32 1, label [[SW_EPILOG:%.*]]
-// CHECK-NEXT:    ], !prof !15
-// CHECK:       sw.default:
-// CHECK-NEXT:    br label [[SW_EPILOG]]
-// CHECK:       sw.epilog:
+// CHECK-LABEL: define dso_local void @_Z15OneCaseDefaultLv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[INT_TBAA2]]
+// CHECK-NEXT:    switch i32 [[TMP0]], label %[[SW_DEFAULT:.*]] [
+// CHECK-NEXT:      i32 1, label %[[SW_EPILOG:.*]]
+// CHECK-NEXT:    ], !prof [[PROF15:![0-9]+]]
+// CHECK:       [[SW_DEFAULT]]:
+// CHECK-NEXT:    br label %[[SW_EPILOG]]
+// CHECK:       [[SW_EPILOG]]:
 // CHECK-NEXT:    ret void
 //
 void OneCaseDefaultL() {
@@ -279,15 +291,16 @@ void OneCaseDefaultL() {
   }
 }
 
-// CHECK-LABEL: @_Z15OneCaseDefaultUv(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[TBAA2]]
-// CHECK-NEXT:    switch i32 [[TMP0]], label [[SW_DEFAULT:%.*]] [
-// CHECK-NEXT:    i32 1, label [[SW_EPILOG:%.*]]
-// CHECK-NEXT:    ], !prof !16
-// CHECK:       sw.default:
-// CHECK-NEXT:    br label [[SW_EPILOG]]
-// CHECK:       sw.epilog:
+// CHECK-LABEL: define dso_local void @_Z15OneCaseDefaultUv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[INT_TBAA2]]
+// CHECK-NEXT:    switch i32 [[TMP0]], label %[[SW_DEFAULT:.*]] [
+// CHECK-NEXT:      i32 1, label %[[SW_EPILOG:.*]]
+// CHECK-NEXT:    ], !prof [[PROF16:![0-9]+]]
+// CHECK:       [[SW_DEFAULT]]:
+// CHECK-NEXT:    br label %[[SW_EPILOG]]
+// CHECK:       [[SW_EPILOG]]:
 // CHECK-NEXT:    ret void
 //
 void OneCaseDefaultU() {
@@ -297,16 +310,17 @@ void OneCaseDefaultU() {
   }
 }
 
-// CHECK-LABEL: @_Z18TwoCasesDefaultLNLv(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[TBAA2]]
-// CHECK-NEXT:    switch i32 [[TMP0]], label [[SW_DEFAULT:%.*]] [
-// CHECK-NEXT:    i32 1, label [[SW_EPILOG:%.*]]
-// CHECK-NEXT:    i32 2, label [[SW_EPILOG]]
-// CHECK-NEXT:    ], !prof !17
-// CHECK:       sw.default:
-// CHECK-NEXT:    br label [[SW_EPILOG]]
-// CHECK:       sw.epilog:
+// CHECK-LABEL: define dso_local void @_Z18TwoCasesDefaultLNLv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[INT_TBAA2]]
+// CHECK-NEXT:    switch i32 [[TMP0]], label %[[SW_DEFAULT:.*]] [
+// CHECK-NEXT:      i32 1, label %[[SW_EPILOG:.*]]
+// CHECK-NEXT:      i32 2, label %[[SW_EPILOG]]
+// CHECK-NEXT:    ], !prof [[PROF17:![0-9]+]]
+// CHECK:       [[SW_DEFAULT]]:
+// CHECK-NEXT:    br label %[[SW_EPILOG]]
+// CHECK:       [[SW_EPILOG]]:
 // CHECK-NEXT:    ret void
 //
 void TwoCasesDefaultLNL() {
@@ -317,16 +331,17 @@ void TwoCasesDefaultLNL() {
   }
 }
 
-// CHECK-LABEL: @_Z18TwoCasesDefaultLNNv(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[TBAA2]]
-// CHECK-NEXT:    switch i32 [[TMP0]], label [[SW_DEFAULT:%.*]] [
-// CHECK-NEXT:    i32 1, label [[SW_EPILOG:%.*]]
-// CHECK-NEXT:    i32 2, label [[SW_EPILOG]]
-// CHECK-NEXT:    ], !prof !8
-// CHECK:       sw.default:
-// CHECK-NEXT:    br label [[SW_EPILOG]]
-// CHECK:       sw.epilog:
+// CHECK-LABEL: define dso_local void @_Z18TwoCasesDefaultLNNv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[INT_TBAA2]]
+// CHECK-NEXT:    switch i32 [[TMP0]], label %[[SW_DEFAULT:.*]] [
+// CHECK-NEXT:      i32 1, label %[[SW_EPILOG:.*]]
+// CHECK-NEXT:      i32 2, label %[[SW_EPILOG]]
+// CHECK-NEXT:    ], !prof [[PROF8]]
+// CHECK:       [[SW_DEFAULT]]:
+// CHECK-NEXT:    br label %[[SW_EPILOG]]
+// CHECK:       [[SW_EPILOG]]:
 // CHECK-NEXT:    ret void
 //
 void TwoCasesDefaultLNN() {
@@ -337,16 +352,17 @@ void TwoCasesDefaultLNN() {
   }
 }
 
-// CHECK-LABEL: @_Z18TwoCasesDefaultLNUv(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[TBAA2]]
-// CHECK-NEXT:    switch i32 [[TMP0]], label [[SW_DEFAULT:%.*]] [
-// CHECK-NEXT:    i32 1, label [[SW_EPILOG:%.*]]
-// CHECK-NEXT:    i32 2, label [[SW_EPILOG]]
-// CHECK-NEXT:    ], !prof !18
-// CHECK:       sw.default:
-// CHECK-NEXT:    br label [[SW_EPILOG]]
-// CHECK:       sw.epilog:
+// CHECK-LABEL: define dso_local void @_Z18TwoCasesDefaultLNUv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[INT_TBAA2]]
+// CHECK-NEXT:    switch i32 [[TMP0]], label %[[SW_DEFAULT:.*]] [
+// CHECK-NEXT:      i32 1, label %[[SW_EPILOG:.*]]
+// CHECK-NEXT:      i32 2, label %[[SW_EPILOG]]
+// CHECK-NEXT:    ], !prof [[PROF18:![0-9]+]]
+// CHECK:       [[SW_DEFAULT]]:
+// CHECK-NEXT:    br label %[[SW_EPILOG]]
+// CHECK:       [[SW_EPILOG]]:
 // CHECK-NEXT:    ret void
 //
 void TwoCasesDefaultLNU() {
@@ -356,3 +372,22 @@ void TwoCasesDefaultLNU() {
     [[unlikely]] default: break;
   }
 }
+//.
+// CHECK: [[INT_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0}
+// CHECK: [[META3]] = !{!"int", [[META4:![0-9]+]], i64 0}
+// CHECK: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0}
+// CHECK: [[META5]] = !{!"Simple C++ TBAA"}
+// CHECK: [[PROF6]] = !{!"branch_weights", i32 357913942, i32 715827883}
+// CHECK: [[PROF7]] = !{!"branch_weights", i32 536870912, i32 1}
+// CHECK: [[PROF8]] = !{!"branch_weights", i32 238609295, i32 715827883, i32 238609295}
+// CHECK: [[PROF9]] = !{!"branch_weights", i32 357913942, i32 1, i32 357913942}
+// CHECK: [[PROF10]] = !{!"branch_weights", i32 357913942, i32 715827883, i32 1}
+// CHECK: [[PROF11]] = !{!"branch_weights", i32 143165577, i32 143165577, i32 143165577, i32 715827883, i32 143165577}
+// CHECK: [[PROF12]] = !{!"branch_weights", i32 214748365, i32 214748365, i32 214748365, i32 1, i32 214748365}
+// CHECK: [[PROF13]] = !{!"branch_weights", i32 79536432, i32 79536432, i32 79536432, i32 79536432, i32 79536432, i32 79536432, i32 79536432, i32 715827883, i32 79536432}
+// CHECK: [[PROF14]] = !{!"branch_weights", i32 119304648, i32 119304648, i32 119304648, i32 119304648, i32 119304648, i32 119304648, i32 119304648, i32 1, i32 119304648}
+// CHECK: [[PROF15]] = !{!"branch_weights", i32 715827883, i32 357913942}
+// CHECK: [[PROF16]] = !{!"branch_weights", i32 1, i32 536870912}
+// CHECK: [[PROF17]] = !{!"branch_weights", i32 536870912, i32 536870912, i32 268435456}
+// CHECK: [[PROF18]] = !{!"branch_weights", i32 1, i32 715827883, i32 357913942}
+//.
diff --git a/clang/test/CodeGenCXX/cfi-mfcall-nomerge.cpp b/clang/test/CodeGenCXX/cfi-mfcall-nomerge.cpp
index c1ee5af..d4b4f30 100644
--- a/clang/test/CodeGenCXX/cfi-mfcall-nomerge.cpp
+++ b/clang/test/CodeGenCXX/cfi-mfcall-nomerge.cpp
@@ -1,4 +1,4 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --version 5
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --version 6
 // with MERGE/NO-MERGE assertions added manually.
 
 // N.B. although the clang driver defaults to merge, clang_cc1 defaults to non-merge.
@@ -29,7 +29,7 @@ void f(S *s, void (S::*p)()) {
 // NO-MERGE-NEXT:    [[MEMPTR_ISVIRTUAL_NOT:%.*]] = icmp eq i64 [[TMP1]], 0
 // NO-MERGE-NEXT:    br i1 [[MEMPTR_ISVIRTUAL_NOT]], label %[[MEMPTR_NONVIRTUAL:.*]], label %[[MEMPTR_VIRTUAL:.*]]
 // NO-MERGE:       [[MEMPTR_VIRTUAL]]:
-// NO-MERGE-NEXT:    [[VTABLE:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[TBAA2:![0-9]+]]
+// NO-MERGE-NEXT:    [[VTABLE:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[VTABLE_POINTER_TBAA2:![0-9]+]]
 // NO-MERGE-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[VTABLE]], i64 [[P_COERCE0]]
 // NO-MERGE-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP2]], i64 -1
 // NO-MERGE-NEXT:    [[TMP4:%.*]] = tail call i1 @llvm.type.test(ptr [[TMP3]], metadata !"_ZTSM1SFvvE.virtual"), !nosanitize [[META5:![0-9]+]]
@@ -49,7 +49,7 @@ void f(S *s, void (S::*p)()) {
 // NO-MERGE:       [[MEMPTR_VIRTUAL7]]:
 // NO-MERGE-NEXT:    [[MEMPTR_VIRTUALFN:%.*]] = load ptr, ptr [[TMP3]], align 8, !nosanitize [[META5]]
 // NO-MERGE-NEXT:    tail call void [[MEMPTR_VIRTUALFN]](ptr noundef nonnull align 1 dereferenceable(1) [[TMP0]]) #[[ATTR5:[0-9]+]]
-// NO-MERGE-NEXT:    [[VTABLE8:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[TBAA2]]
+// NO-MERGE-NEXT:    [[VTABLE8:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[VTABLE_POINTER_TBAA2]]
 // NO-MERGE-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[VTABLE8]], i64 [[P_COERCE0]]
 // NO-MERGE-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[TMP8]], i64 -1
 // NO-MERGE-NEXT:    [[TMP10:%.*]] = tail call i1 @llvm.type.test(ptr [[TMP9]], metadata !"_ZTSM1SFvvE.virtual"), !nosanitize [[META5]]
@@ -60,7 +60,7 @@ void f(S *s, void (S::*p)()) {
 // NO-MERGE:       [[MEMPTR_VIRTUAL19]]:
 // NO-MERGE-NEXT:    [[MEMPTR_VIRTUALFN9:%.*]] = load ptr, ptr [[TMP9]], align 8, !nosanitize [[META5]]
 // NO-MERGE-NEXT:    tail call void [[MEMPTR_VIRTUALFN9]](ptr noundef nonnull align 1 dereferenceable(1) [[TMP0]]) #[[ATTR5]]
-// NO-MERGE-NEXT:    [[VTABLE20:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[TBAA2]]
+// NO-MERGE-NEXT:    [[VTABLE20:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[VTABLE_POINTER_TBAA2]]
 // NO-MERGE-NEXT:    [[TMP11:%.*]] = getelementptr i8, ptr [[VTABLE20]], i64 [[P_COERCE0]]
 // NO-MERGE-NEXT:    [[TMP12:%.*]] = getelementptr i8, ptr [[TMP11]], i64 -1
 // NO-MERGE-NEXT:    [[TMP13:%.*]] = tail call i1 @llvm.type.test(ptr [[TMP12]], metadata !"_ZTSM1SFvvE.virtual"), !nosanitize [[META5]]
@@ -84,7 +84,7 @@ void f(S *s, void (S::*p)()) {
 // MERGE-NEXT:    [[MEMPTR_ISVIRTUAL_NOT:%.*]] = icmp eq i64 [[TMP1]], 0
 // MERGE-NEXT:    br i1 [[MEMPTR_ISVIRTUAL_NOT]], label %[[MEMPTR_NONVIRTUAL:.*]], label %[[MEMPTR_VIRTUAL:.*]]
 // MERGE:       [[MEMPTR_VIRTUAL]]:
-// MERGE-NEXT:    [[VTABLE:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[TBAA2:![0-9]+]]
+// MERGE-NEXT:    [[VTABLE:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[VTABLE_POINTER_TBAA2:![0-9]+]]
 // MERGE-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[VTABLE]], i64 [[P_COERCE0]]
 // MERGE-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP2]], i64 -1
 // MERGE-NEXT:    [[TMP4:%.*]] = tail call i1 @llvm.type.test(ptr [[TMP3]], metadata !"_ZTSM1SFvvE.virtual"), !nosanitize [[META5:![0-9]+]]
@@ -101,7 +101,7 @@ void f(S *s, void (S::*p)()) {
 // MERGE:       [[MEMPTR_VIRTUAL6]]:
 // MERGE-NEXT:    [[MEMPTR_VIRTUALFN:%.*]] = load ptr, ptr [[TMP3]], align 8, !nosanitize [[META5]]
 // MERGE-NEXT:    tail call void [[MEMPTR_VIRTUALFN]](ptr noundef nonnull align 1 dereferenceable(1) [[TMP0]]) #[[ATTR4:[0-9]+]]
-// MERGE-NEXT:    [[VTABLE7:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[TBAA2]]
+// MERGE-NEXT:    [[VTABLE7:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[VTABLE_POINTER_TBAA2]]
 // MERGE-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[VTABLE7]], i64 [[P_COERCE0]]
 // MERGE-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[TMP8]], i64 -1
 // MERGE-NEXT:    [[TMP10:%.*]] = tail call i1 @llvm.type.test(ptr [[TMP9]], metadata !"_ZTSM1SFvvE.virtual"), !nosanitize [[META5]]
@@ -109,7 +109,7 @@ void f(S *s, void (S::*p)()) {
 // MERGE:       [[MEMPTR_VIRTUAL17]]:
 // MERGE-NEXT:    [[MEMPTR_VIRTUALFN8:%.*]] = load ptr, ptr [[TMP9]], align 8, !nosanitize [[META5]]
 // MERGE-NEXT:    tail call void [[MEMPTR_VIRTUALFN8]](ptr noundef nonnull align 1 dereferenceable(1) [[TMP0]]) #[[ATTR4]]
-// MERGE-NEXT:    [[VTABLE18:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[TBAA2]]
+// MERGE-NEXT:    [[VTABLE18:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[VTABLE_POINTER_TBAA2]]
 // MERGE-NEXT:    [[TMP11:%.*]] = getelementptr i8, ptr [[VTABLE18]], i64 [[P_COERCE0]]
 // MERGE-NEXT:    [[TMP12:%.*]] = getelementptr i8, ptr [[TMP11]], i64 -1
 // MERGE-NEXT:    [[TMP13:%.*]] = tail call i1 @llvm.type.test(ptr [[TMP12]], metadata !"_ZTSM1SFvvE.virtual"), !nosanitize [[META5]]
@@ -123,9 +123,17 @@ void f(S *s, void (S::*p)()) {
 // MERGE-NEXT:    [[TMP14:%.*]] = phi ptr [ [[MEMPTR_VIRTUALFN19]], %[[MEMPTR_VIRTUAL17]] ], [ [[MEMPTR_NONVIRTUALFN]], %[[MEMPTR_NONVIRTUAL21]] ]
 // MERGE-NEXT:    tail call void [[TMP14]](ptr noundef nonnull align 1 dereferenceable(1) [[TMP0]]) #[[ATTR4]]
 // MERGE-NEXT:    ret void
-
-// MERGE: [[ATTR3]] = { noreturn nounwind }
-// MERGE: [[ATTR4]] = { nounwind }
-
-// NO-MERGE: [[ATTR4]] = { nomerge noreturn nounwind }
-// NO-MERGE: [[ATTR5]] = { nounwind }
+//
+//.
+// NO-MERGE: [[VTABLE_POINTER_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0}
+// NO-MERGE: [[META3]] = !{!"vtable pointer", [[META4:![0-9]+]], i64 0}
+// NO-MERGE: [[META4]] = !{!"Simple C++ TBAA"}
+// NO-MERGE: [[META5]] = !{}
+// NO-MERGE: [[PROF6]] = !{!"branch_weights", i32 1048575, i32 1}
+//.
+// MERGE: [[VTABLE_POINTER_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0}
+// MERGE: [[META3]] = !{!"vtable pointer", [[META4:![0-9]+]], i64 0}
+// MERGE: [[META4]] = !{!"Simple C++ TBAA"}
+// MERGE: [[META5]] = !{}
+// MERGE: [[PROF6]] = !{!"branch_weights", i32 1048575, i32 1}
+//.
diff --git a/clang/test/CodeGenCXX/inline-then-fold-variadics.cpp b/clang/test/CodeGenCXX/inline-then-fold-variadics.cpp
index 5f83545..ab3695a 100644
--- a/clang/test/CodeGenCXX/inline-then-fold-variadics.cpp
+++ b/clang/test/CodeGenCXX/inline-then-fold-variadics.cpp
@@ -1,4 +1,4 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
 // REQUIRES: webassembly-registered-target
 
 // Simple calls to known variadic functions that are completely elided when
@@ -33,32 +33,32 @@ template <typename X, typename Y> static Y second(...) {
 
 extern "C" {
 
-// CHECK-LABEL: define {{[^@]+}}@first_pair_i32
-// CHECK-SAME: (i32 noundef returned [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define noundef i32 @first_pair_i32(
+// CHECK-SAME: i32 noundef returned [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    ret i32 [[X]]
 //
 int first_pair_i32(int x, int y) { return first<int, int>(x, y); }
 
-// CHECK-LABEL: define {{[^@]+}}@second_pair_i32
-// CHECK-SAME: (i32 noundef [[X:%.*]], i32 noundef returned [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define noundef i32 @second_pair_i32(
+// CHECK-SAME: i32 noundef [[X:%.*]], i32 noundef returned [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    ret i32 [[Y]]
 //
 int second_pair_i32(int x, int y) { return second<int, int>(x, y); }
 
-// CHECK-LABEL: define {{[^@]+}}@first_pair_f64
-// CHECK-SAME: (double noundef returned [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define noundef double @first_pair_f64(
+// CHECK-SAME: double noundef returned [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    ret double [[X]]
 //
 double first_pair_f64(double x, double y) {
   return first<double, double>(x, y);
 }
 
-// CHECK-LABEL: define {{[^@]+}}@second_pair_f64
-// CHECK-SAME: (double noundef [[X:%.*]], double noundef returned [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define noundef double @second_pair_f64(
+// CHECK-SAME: double noundef [[X:%.*]], double noundef returned [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    ret double [[Y]]
 //
 double second_pair_f64(double x, double y) {
@@ -68,30 +68,30 @@ double second_pair_f64(double x, double y) {
 
 extern "C" {
 
-// CHECK-LABEL: define {{[^@]+}}@first_i32_f64
-// CHECK-SAME: (i32 noundef returned [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define noundef i32 @first_i32_f64(
+// CHECK-SAME: i32 noundef returned [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    ret i32 [[X]]
 //
 int first_i32_f64(int x, double y) { return first<int, double>(x, y); }
 
-// CHECK-LABEL: define {{[^@]+}}@second_i32_f64
-// CHECK-SAME: (i32 noundef [[X:%.*]], double noundef returned [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define noundef double @second_i32_f64(
+// CHECK-SAME: i32 noundef [[X:%.*]], double noundef returned [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    ret double [[Y]]
 //
 double second_i32_f64(int x, double y) { return second<int, double>(x, y); }
 
-// CHECK-LABEL: define {{[^@]+}}@first_f64_i32
-// CHECK-SAME: (double noundef returned [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define noundef double @first_f64_i32(
+// CHECK-SAME: double noundef returned [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    ret double [[X]]
 //
 double first_f64_i32(double x, int y) { return first<double, int>(x, y); }
 
-// CHECK-LABEL: define {{[^@]+}}@second_f64_i32
-// CHECK-SAME: (double noundef [[X:%.*]], i32 noundef returned [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define noundef i32 @second_f64_i32(
+// CHECK-SAME: double noundef [[X:%.*]], i32 noundef returned [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    ret i32 [[Y]]
 //
 int second_f64_i32(double x, int y) { return second<double, int>(x, y); }
@@ -100,38 +100,38 @@ int second_f64_i32(double x, int y) { return second<double, int>(x, y); }
 extern "C" {
 typedef uint64_t ulong2 __attribute__((__vector_size__(16), __aligned__(16)));
 
-// CHECK-LABEL: define {{[^@]+}}@first_i32_ulong2
-// CHECK-SAME: (i32 noundef returned [[X:%.*]], ptr noundef readonly captures(none) [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define noundef i32 @first_i32_ulong2(
+// CHECK-SAME: i32 noundef returned [[X:%.*]], ptr noundef readonly captures(none) [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    ret i32 [[X]]
 //
 int first_i32_ulong2(int x, ulong2 *y) { return first<int, ulong2>(x, *y); }
 
-// CHECK-LABEL: define {{[^@]+}}@second_i32_ulong2
-// CHECK-SAME: (i32 noundef [[X:%.*]], ptr noundef readonly captures(none) [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 16)) [[R:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] {
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i64>, ptr [[Y]], align 16, !tbaa [[TBAA2:![0-9]+]]
-// CHECK-NEXT:    store <2 x i64> [[TMP0]], ptr [[R]], align 16, !tbaa [[TBAA2]]
+// CHECK-LABEL: define void @second_i32_ulong2(
+// CHECK-SAME: i32 noundef [[X:%.*]], ptr noundef readonly captures(none) [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 16)) [[R:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i64>, ptr [[Y]], align 16, !tbaa [[INT_TBAA2:![0-9]+]]
+// CHECK-NEXT:    store <2 x i64> [[TMP0]], ptr [[R]], align 16, !tbaa [[INT_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 void second_i32_ulong2(int x, ulong2 *y, ulong2 *r) {
   *r = second<int, ulong2>(x, *y);
 }
 
-// CHECK-LABEL: define {{[^@]+}}@first_ulong2_i32
-// CHECK-SAME: (ptr noundef readonly captures(none) [[X:%.*]], i32 noundef [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 16)) [[R:%.*]]) local_unnamed_addr #[[ATTR1]] {
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i64>, ptr [[X]], align 16, !tbaa [[TBAA2]]
-// CHECK-NEXT:    store <2 x i64> [[TMP0]], ptr [[R]], align 16, !tbaa [[TBAA2]]
+// CHECK-LABEL: define void @first_ulong2_i32(
+// CHECK-SAME: ptr noundef readonly captures(none) [[X:%.*]], i32 noundef [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 16)) [[R:%.*]]) local_unnamed_addr #[[ATTR1]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i64>, ptr [[X]], align 16, !tbaa [[INT_TBAA2]]
+// CHECK-NEXT:    store <2 x i64> [[TMP0]], ptr [[R]], align 16, !tbaa [[INT_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 void first_ulong2_i32(ulong2 *x, int y, ulong2 *r) {
   *r = first<ulong2, int>(*x, y);
 }
 
-// CHECK-LABEL: define {{[^@]+}}@second_ulong2_i32
-// CHECK-SAME: (ptr noundef readonly captures(none) [[X:%.*]], i32 noundef returned [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define noundef i32 @second_ulong2_i32(
+// CHECK-SAME: ptr noundef readonly captures(none) [[X:%.*]], i32 noundef returned [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    ret i32 [[Y]]
 //
 int second_ulong2_i32(ulong2 *x, int y) { return second<ulong2, int>(*x, y); }
@@ -149,33 +149,38 @@ typedef struct {
 
 extern "C" {
 
-// CHECK-LABEL: define {{[^@]+}}@first_i32_asc
-// CHECK-SAME: (i32 noundef returned [[X:%.*]], ptr noundef readonly captures(none) [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define noundef i32 @first_i32_asc(
+// CHECK-SAME: i32 noundef returned [[X:%.*]], ptr noundef readonly captures(none) [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    ret i32 [[X]]
 //
 int first_i32_asc(int x, asc *y) { return first<int, asc>(x, *y); }
 
-// CHECK-LABEL: define {{[^@]+}}@second_i32_asc
-// CHECK-SAME: (i32 noundef [[X:%.*]], ptr noundef readonly captures(none) [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 24)) [[R:%.*]]) local_unnamed_addr #[[ATTR1]] {
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define void @second_i32_asc(
+// CHECK-SAME: i32 noundef [[X:%.*]], ptr noundef readonly captures(none) [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 24)) [[R:%.*]]) local_unnamed_addr #[[ATTR1]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    tail call void @llvm.memmove.p0.p0.i32(ptr noundef nonnull align 8 dereferenceable(24) [[R]], ptr noundef nonnull align 1 dereferenceable(24) [[Y]], i32 24, i1 false)
 // CHECK-NEXT:    ret void
 //
 void second_i32_asc(int x, asc *y, asc *r) { *r = second<int, asc>(x, *y); }
 
-// CHECK-LABEL: define {{[^@]+}}@first_asc_i32
-// CHECK-SAME: (ptr noundef readonly captures(none) [[X:%.*]], i32 noundef [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 24)) [[R:%.*]]) local_unnamed_addr #[[ATTR1]] {
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define void @first_asc_i32(
+// CHECK-SAME: ptr noundef readonly captures(none) [[X:%.*]], i32 noundef [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 24)) [[R:%.*]]) local_unnamed_addr #[[ATTR1]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    tail call void @llvm.memmove.p0.p0.i32(ptr noundef nonnull align 8 dereferenceable(24) [[R]], ptr noundef nonnull align 1 dereferenceable(24) [[X]], i32 24, i1 false)
 // CHECK-NEXT:    ret void
 //
 void first_asc_i32(asc *x, int y, asc *r) { *r = first<asc, int>(*x, y); }
 
-// CHECK-LABEL: define {{[^@]+}}@second_asc_i32
-// CHECK-SAME: (ptr noundef readonly captures(none) [[X:%.*]], i32 noundef returned [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define noundef i32 @second_asc_i32(
+// CHECK-SAME: ptr noundef readonly captures(none) [[X:%.*]], i32 noundef returned [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    ret i32 [[Y]]
 //
 int second_asc_i32(asc *x, int y) { return second<asc, int>(*x, y); }
 }
+//.
+// CHECK: [[INT_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0}
+// CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0}
+// CHECK: [[META4]] = !{!"Simple C++ TBAA"}
+//.
diff --git a/clang/test/CodeGenCXX/load-reference-metadata.cpp b/clang/test/CodeGenCXX/load-reference-metadata.cpp
index daceb75..abfdd05 100644
--- a/clang/test/CodeGenCXX/load-reference-metadata.cpp
+++ b/clang/test/CodeGenCXX/load-reference-metadata.cpp
@@ -1,4 +1,4 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
 // RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm -std=c++11 -O1 -disable-llvm-passes %s -o - | FileCheck %s
 
 struct alignas(32) F { int x; };
@@ -13,20 +13,20 @@ struct S {
 // CHECK-SAME: ptr noundef nonnull align 8 dereferenceable(24) [[S:%.*]]) #[[ATTR0:[0-9]+]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[S_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-NEXT:    store ptr [[S]], ptr [[S_ADDR]], align 8, !tbaa [[TBAA2:![0-9]+]]
-// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8, !tbaa [[TBAA2]], !nonnull [[META7:![0-9]+]], !align [[META8:![0-9]+]]
+// CHECK-NEXT:    store ptr [[S]], ptr [[S_ADDR]], align 8, !tbaa [[_ZTS1SPTR_TBAA2:![0-9]+]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8, !tbaa [[_ZTS1SPTR_TBAA2]], !nonnull [[META7:![0-9]+]], !align [[META8:![0-9]+]]
 // CHECK-NEXT:    [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_S:%.*]], ptr [[TMP0]], i32 0, i32 0
-// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A]], align 8, !tbaa [[TBAA9:![0-9]+]], !nonnull [[META7]]
-// CHECK-NEXT:    store i8 0, ptr [[TMP1]], align 1, !tbaa [[TBAA14:![0-9]+]]
-// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[S_ADDR]], align 8, !tbaa [[TBAA2]], !nonnull [[META7]], !align [[META8]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A]], align 8, !tbaa [[CHARPTR_TBAA9:![0-9]+]], !nonnull [[META7]]
+// CHECK-NEXT:    store i8 0, ptr [[TMP1]], align 1, !tbaa [[CHAR_TBAA14:![0-9]+]]
+// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[S_ADDR]], align 8, !tbaa [[_ZTS1SPTR_TBAA2]], !nonnull [[META7]], !align [[META8]]
 // CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr [[TMP2]], i32 0, i32 1
-// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[B]], align 8, !tbaa [[TBAA15:![0-9]+]], !nonnull [[META7]], !align [[META16:![0-9]+]]
-// CHECK-NEXT:    store i32 0, ptr [[TMP3]], align 4, !tbaa [[TBAA17:![0-9]+]]
-// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[S_ADDR]], align 8, !tbaa [[TBAA2]], !nonnull [[META7]], !align [[META8]]
+// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[B]], align 8, !tbaa [[INTPTR_TBAA15:![0-9]+]], !nonnull [[META7]], !align [[META16:![0-9]+]]
+// CHECK-NEXT:    store i32 0, ptr [[TMP3]], align 4, !tbaa [[INT_TBAA17:![0-9]+]]
+// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[S_ADDR]], align 8, !tbaa [[_ZTS1SPTR_TBAA2]], !nonnull [[META7]], !align [[META8]]
 // CHECK-NEXT:    [[C:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr [[TMP4]], i32 0, i32 2
-// CHECK-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[C]], align 8, !tbaa [[TBAA19:![0-9]+]], !nonnull [[META7]], !align [[META20:![0-9]+]]
+// CHECK-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[C]], align 8, !tbaa [[_ZTS1FPTR_TBAA19:![0-9]+]], !nonnull [[META7]], !align [[META20:![0-9]+]]
 // CHECK-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_F:%.*]], ptr [[TMP5]], i32 0, i32 0
-// CHECK-NEXT:    store i32 0, ptr [[X]], align 32, !tbaa [[TBAA21:![0-9]+]]
+// CHECK-NEXT:    store i32 0, ptr [[X]], align 32, !tbaa [[INT_TBAA21:![0-9]+]]
 // CHECK-NEXT:    ret void
 //
 void test(S &s) {
@@ -42,13 +42,13 @@ extern B (&bb)[2];
 // CHECK-LABEL: define dso_local void @_Z13test_externalv(
 // CHECK-SAME: ) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr @b, align 8, !tbaa [[TBAA23:![0-9]+]], !nonnull [[META7]], !align [[META8]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr @b, align 8, !tbaa [[_ZTS1BPTR_TBAA23:![0-9]+]], !nonnull [[META7]], !align [[META8]]
 // CHECK-NEXT:    [[C:%.*]] = getelementptr inbounds nuw [[STRUCT_B:%.*]], ptr [[TMP0]], i32 0, i32 2
-// CHECK-NEXT:    store i8 0, ptr [[C]], align 8, !tbaa [[TBAA25:![0-9]+]]
-// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr @bb, align 8, !tbaa [[TBAA23]], !nonnull [[META7]], !align [[META20]]
+// CHECK-NEXT:    store i8 0, ptr [[C]], align 8, !tbaa [[CHAR_TBAA25:![0-9]+]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr @bb, align 8, !tbaa [[_ZTS1BPTR_TBAA23]], !nonnull [[META7]], !align [[META20]]
 // CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x %struct.B], ptr [[TMP1]], i64 0, i64 0
 // CHECK-NEXT:    [[C1:%.*]] = getelementptr inbounds nuw [[STRUCT_B]], ptr [[ARRAYIDX]], i32 0, i32 2
-// CHECK-NEXT:    store i8 0, ptr [[C1]], align 16, !tbaa [[TBAA25]]
+// CHECK-NEXT:    store i8 0, ptr [[C1]], align 16, !tbaa [[CHAR_TBAA25]]
 // CHECK-NEXT:    ret void
 //
 void test_external() {
@@ -60,8 +60,8 @@ void test_external() {
 // CHECK-SAME: ptr noundef nonnull align 8 dereferenceable(17) [[S:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[S_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-NEXT:    store ptr [[S]], ptr [[S_ADDR]], align 8, !tbaa [[TBAA23]]
-// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8, !tbaa [[TBAA23]], !nonnull [[META7]], !align [[META8]]
+// CHECK-NEXT:    store ptr [[S]], ptr [[S_ADDR]], align 8, !tbaa [[_ZTS1BPTR_TBAA23]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8, !tbaa [[_ZTS1BPTR_TBAA23]], !nonnull [[META7]], !align [[META8]]
 // CHECK-NEXT:    [[C:%.*]] = getelementptr inbounds nuw [[STRUCT_B:%.*]], ptr [[TMP0]], i32 0, i32 2
 // CHECK-NEXT:    ret ptr [[C]]
 //
@@ -69,30 +69,30 @@ char* test_deref_only(B &s) {
   return &s.c;
 }
 //.
-// CHECK: [[TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0}
+// CHECK: [[_ZTS1SPTR_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0}
 // CHECK: [[META3]] = !{!"p1 _ZTS1S", [[META4:![0-9]+]], i64 0}
 // CHECK: [[META4]] = !{!"any pointer", [[META5:![0-9]+]], i64 0}
 // CHECK: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0}
 // CHECK: [[META6]] = !{!"Simple C++ TBAA"}
 // CHECK: [[META7]] = !{}
 // CHECK: [[META8]] = !{i64 8}
-// CHECK: [[TBAA9]] = !{[[META10:![0-9]+]], [[META11:![0-9]+]], i64 0}
+// CHECK: [[CHARPTR_TBAA9]] = !{[[META10:![0-9]+]], [[META11:![0-9]+]], i64 0}
 // CHECK: [[META10]] = !{!"_ZTS1S", [[META11]], i64 0, [[META12:![0-9]+]], i64 8, [[META13:![0-9]+]], i64 16}
 // CHECK: [[META11]] = !{!"p1 omnipotent char", [[META4]], i64 0}
 // CHECK: [[META12]] = !{!"p1 int", [[META4]], i64 0}
 // CHECK: [[META13]] = !{!"p1 _ZTS1F", [[META4]], i64 0}
-// CHECK: [[TBAA14]] = !{[[META5]], [[META5]], i64 0}
-// CHECK: [[TBAA15]] = !{[[META10]], [[META12]], i64 8}
+// CHECK: [[CHAR_TBAA14]] = !{[[META5]], [[META5]], i64 0}
+// CHECK: [[INTPTR_TBAA15]] = !{[[META10]], [[META12]], i64 8}
 // CHECK: [[META16]] = !{i64 4}
-// CHECK: [[TBAA17]] = !{[[META18:![0-9]+]], [[META18]], i64 0}
+// CHECK: [[INT_TBAA17]] = !{[[META18:![0-9]+]], [[META18]], i64 0}
 // CHECK: [[META18]] = !{!"int", [[META5]], i64 0}
-// CHECK: [[TBAA19]] = !{[[META10]], [[META13]], i64 16}
+// CHECK: [[_ZTS1FPTR_TBAA19]] = !{[[META10]], [[META13]], i64 16}
 // CHECK: [[META20]] = !{i64 32}
-// CHECK: [[TBAA21]] = !{[[META22:![0-9]+]], [[META18]], i64 0}
+// CHECK: [[INT_TBAA21]] = !{[[META22:![0-9]+]], [[META18]], i64 0}
 // CHECK: [[META22]] = !{!"_ZTS1F", [[META18]], i64 0}
-// CHECK: [[TBAA23]] = !{[[META24:![0-9]+]], [[META24]], i64 0}
+// CHECK: [[_ZTS1BPTR_TBAA23]] = !{[[META24:![0-9]+]], [[META24]], i64 0}
 // CHECK: [[META24]] = !{!"p1 _ZTS1B", [[META4]], i64 0}
-// CHECK: [[TBAA25]] = !{[[META26:![0-9]+]], [[META5]], i64 16}
+// CHECK: [[CHAR_TBAA25]] = !{[[META26:![0-9]+]], [[META5]], i64 16}
 // CHECK: [[META26]] = !{!"_ZTS1B", [[META27:![0-9]+]], i64 8, [[META5]], i64 16}
 // CHECK: [[META27]] = !{!"long long", [[META5]], i64 0}
 //.
diff --git a/clang/test/CodeGenOpenCL/amdgcn-buffer-rsrc-type.cl b/clang/test/CodeGenOpenCL/amdgcn-buffer-rsrc-type.cl
index f9d7968..b55f663 100644
--- a/clang/test/CodeGenOpenCL/amdgcn-buffer-rsrc-type.cl
+++ b/clang/test/CodeGenOpenCL/amdgcn-buffer-rsrc-type.cl
@@ -1,4 +1,4 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
  // REQUIRES: amdgpu-registered-target
  // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu verde -emit-llvm -o - %s | FileCheck %s
 
@@ -11,9 +11,9 @@ AA getAA(void *p);
 __amdgpu_buffer_rsrc_t getBufferImpl(void *p);
 void consumeBuffer(__amdgpu_buffer_rsrc_t);
 
-// CHECK-LABEL: define {{[^@]+}}@getBuffer
-// CHECK-SAME: (ptr addrspace(5) noundef [[P:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define dso_local ptr addrspace(8) @getBuffer(
+// CHECK-SAME: ptr addrspace(5) noundef [[P:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[CALL:%.*]] = tail call ptr addrspace(8) @getBufferImpl(ptr addrspace(5) noundef [[P]]) #[[ATTR2:[0-9]+]]
 // CHECK-NEXT:    ret ptr addrspace(8) [[CALL]]
 //
@@ -21,16 +21,16 @@ __amdgpu_buffer_rsrc_t getBuffer(void *p) {
   return getBufferImpl(p);
 }
 
-// CHECK-LABEL: define {{[^@]+}}@consumeBufferPtr
-// CHECK-SAME: (ptr addrspace(5) noundef readonly captures(address) [[P:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define dso_local void @consumeBufferPtr(
+// CHECK-SAME: ptr addrspace(5) noundef readonly captures(address) [[P:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq ptr addrspace(5) [[P]], addrspacecast (ptr null to ptr addrspace(5))
-// CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
-// CHECK:       if.then:
-// CHECK-NEXT:    [[TMP0:%.*]] = load ptr addrspace(8), ptr addrspace(5) [[P]], align 16, !tbaa [[TBAA4:![0-9]+]]
+// CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label %[[IF_END:.*]], label %[[IF_THEN:.*]]
+// CHECK:       [[IF_THEN]]:
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr addrspace(8), ptr addrspace(5) [[P]], align 16, !tbaa [[__AMDGPU_BUFFER_RSRC_T_TBAA4:![0-9]+]]
 // CHECK-NEXT:    tail call void @consumeBuffer(ptr addrspace(8) [[TMP0]]) #[[ATTR2]]
-// CHECK-NEXT:    br label [[IF_END]]
-// CHECK:       if.end:
+// CHECK-NEXT:    br label %[[IF_END]]
+// CHECK:       [[IF_END]]:
 // CHECK-NEXT:    ret void
 //
 void consumeBufferPtr(__amdgpu_buffer_rsrc_t *p) {
@@ -38,20 +38,20 @@ void consumeBufferPtr(__amdgpu_buffer_rsrc_t *p) {
     consumeBuffer(*p);
 }
 
-// CHECK-LABEL: define {{[^@]+}}@test
-// CHECK-SAME: (ptr addrspace(5) noundef readonly captures(address) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[A]], align 16, !tbaa [[TBAA8:![0-9]+]]
+// CHECK-LABEL: define dso_local void @test(
+// CHECK-SAME: ptr addrspace(5) noundef readonly captures(address) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[A]], align 16, !tbaa [[INT_TBAA8:![0-9]+]]
 // CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
 // CHECK-NEXT:    [[TOBOOL_NOT_I:%.*]] = icmp eq ptr addrspace(5) [[A]], addrspacecast (ptr null to ptr addrspace(5))
 // CHECK-NEXT:    [[OR_COND:%.*]] = select i1 [[TOBOOL_NOT]], i1 true, i1 [[TOBOOL_NOT_I]]
-// CHECK-NEXT:    br i1 [[OR_COND]], label [[IF_END:%.*]], label [[IF_THEN_I:%.*]]
-// CHECK:       if.then.i:
+// CHECK-NEXT:    br i1 [[OR_COND]], label %[[IF_END:.*]], label %[[IF_THEN_I:.*]]
+// CHECK:       [[IF_THEN_I]]:
 // CHECK-NEXT:    [[R:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(5) [[A]], i32 16
-// CHECK-NEXT:    [[TMP1:%.*]] = load ptr addrspace(8), ptr addrspace(5) [[R]], align 16, !tbaa [[TBAA4]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load ptr addrspace(8), ptr addrspace(5) [[R]], align 16, !tbaa [[__AMDGPU_BUFFER_RSRC_T_TBAA4]]
 // CHECK-NEXT:    tail call void @consumeBuffer(ptr addrspace(8) [[TMP1]]) #[[ATTR2]]
-// CHECK-NEXT:    br label [[IF_END]]
-// CHECK:       if.end:
+// CHECK-NEXT:    br label %[[IF_END]]
+// CHECK:       [[IF_END]]:
 // CHECK-NEXT:    ret void
 //
 void test(AA *a) {
@@ -59,18 +59,18 @@ void test(AA *a) {
     consumeBufferPtr(&(a->r));
 }
 
-// CHECK-LABEL: define {{[^@]+}}@bar
-// CHECK-SAME: (ptr addrspace(5) noundef [[P:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define dso_local %struct.AA_ty @bar(
+// CHECK-SAME: ptr addrspace(5) noundef [[P:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[CALL:%.*]] = tail call [[STRUCT_AA_TY:%.*]] @[[GETAA:[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr addrspace(5) noundef [[P]]) #[[ATTR2]]
 // CHECK-NEXT:    [[TMP0:%.*]] = extractvalue [[STRUCT_AA_TY]] [[CALL]], 0
 // CHECK-NEXT:    [[CALL_I:%.*]] = tail call ptr addrspace(8) @getBufferImpl(ptr addrspace(5) noundef [[P]]) #[[ATTR2]]
 // CHECK-NEXT:    [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[TMP0]], 0
-// CHECK-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[TEST_EXIT:%.*]], label [[IF_THEN_I_I:%.*]]
-// CHECK:       if.then.i.i:
+// CHECK-NEXT:    br i1 [[TOBOOL_NOT_I]], label %[[TEST_EXIT:.*]], label %[[IF_THEN_I_I:.*]]
+// CHECK:       [[IF_THEN_I_I]]:
 // CHECK-NEXT:    tail call void @consumeBuffer(ptr addrspace(8) [[CALL_I]]) #[[ATTR2]]
-// CHECK-NEXT:    br label [[TEST_EXIT]]
-// CHECK:       test.exit:
+// CHECK-NEXT:    br label %[[TEST_EXIT]]
+// CHECK:       [[TEST_EXIT]]:
 // CHECK-NEXT:    [[DOTFCA_1_INSERT:%.*]] = insertvalue [[STRUCT_AA_TY]] [[CALL]], ptr addrspace(8) [[CALL_I]], 1
 // CHECK-NEXT:    ret [[STRUCT_AA_TY]] [[DOTFCA_1_INSERT]]
 //
@@ -80,3 +80,12 @@ AA bar(void *p) {
   test(&a);
   return a;
 }
+//.
+// CHECK: [[__AMDGPU_BUFFER_RSRC_T_TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0}
+// CHECK: [[META5]] = !{!"__amdgpu_buffer_rsrc_t", [[META6:![0-9]+]], i64 0}
+// CHECK: [[META6]] = !{!"omnipotent char", [[META7:![0-9]+]], i64 0}
+// CHECK: [[META7]] = !{!"Simple C/C++ TBAA"}
+// CHECK: [[INT_TBAA8]] = !{[[META9:![0-9]+]], [[META10:![0-9]+]], i64 0}
+// CHECK: [[META9]] = !{!"AA_ty", [[META10]], i64 0, [[META5]], i64 16}
+// CHECK: [[META10]] = !{!"int", [[META6]], i64 0}
+//.
diff --git a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
index d71c898..6d57323 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
@@ -1,4 +1,4 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-attributes --check-globals all --include-generated-funcs --prefix-filecheck-ir-name VAR --version 5
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-attributes --check-globals all --include-generated-funcs --prefix-filecheck-ir-name VAR --version 6
 // RUN: %clang_cc1 -cl-std=CL2.0 -O0 -disable-llvm-passes -fno-ident -emit-llvm -o - -triple amdgcn-amd-amdhsa %s -fdenormal-fp-math-f32=preserve-sign | FileCheck %s --check-prefixes=CHECK,NOCPU
 
 // // Check no-optnone and target-cpu behavior
@@ -451,13 +451,13 @@ kernel void test_target_features_kernel(global int *i) {
 // GFX900-NEXT:    [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
 // GFX900-NEXT:    [[ID_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ID_ADDR]] to ptr
 // GFX900-NEXT:    [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
-// GFX900-NEXT:    store i64 [[ID]], ptr [[ID_ADDR_ASCAST]], align 8, !tbaa [[TBAA3:![0-9]+]]
-// GFX900-NEXT:    store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR_ASCAST]], align 8, !tbaa [[TBAA7:![0-9]+]]
-// GFX900-NEXT:    [[TMP0:%.*]] = load i64, ptr [[ID_ADDR_ASCAST]], align 8, !tbaa [[TBAA3]]
-// GFX900-NEXT:    [[TMP1:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8, !tbaa [[TBAA7]]
-// GFX900-NEXT:    [[TMP2:%.*]] = load i64, ptr [[ID_ADDR_ASCAST]], align 8, !tbaa [[TBAA3]]
+// GFX900-NEXT:    store i64 [[ID]], ptr [[ID_ADDR_ASCAST]], align 8, !tbaa [[LONG_TBAA3:![0-9]+]]
+// GFX900-NEXT:    store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR_ASCAST]], align 8, !tbaa [[LONGPTR_TBAA7:![0-9]+]]
+// GFX900-NEXT:    [[TMP0:%.*]] = load i64, ptr [[ID_ADDR_ASCAST]], align 8, !tbaa [[LONG_TBAA3]]
+// GFX900-NEXT:    [[TMP1:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8, !tbaa [[LONGPTR_TBAA7]]
+// GFX900-NEXT:    [[TMP2:%.*]] = load i64, ptr [[ID_ADDR_ASCAST]], align 8, !tbaa [[LONG_TBAA3]]
 // GFX900-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[TMP1]], i64 [[TMP2]]
-// GFX900-NEXT:    store i64 [[TMP0]], ptr addrspace(1) [[ARRAYIDX]], align 8, !tbaa [[TBAA3]]
+// GFX900-NEXT:    store i64 [[TMP0]], ptr addrspace(1) [[ARRAYIDX]], align 8, !tbaa [[LONG_TBAA3]]
 // GFX900-NEXT:    ret void
 //
 //
@@ -473,14 +473,14 @@ kernel void test_target_features_kernel(global int *i) {
 // GFX900-NEXT:    [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
 // GFX900-NEXT:    [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
 // GFX900-NEXT:    [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr
-// GFX900-NEXT:    store ptr addrspace(1) [[A]], ptr [[A_ADDR_ASCAST]], align 8, !tbaa [[TBAA14:![0-9]+]]
-// GFX900-NEXT:    store i8 [[B]], ptr [[B_ADDR_ASCAST]], align 1, !tbaa [[TBAA16:![0-9]+]]
-// GFX900-NEXT:    store ptr addrspace(1) [[C]], ptr [[C_ADDR_ASCAST]], align 8, !tbaa [[TBAA7]]
-// GFX900-NEXT:    store i64 [[D]], ptr [[D_ADDR_ASCAST]], align 8, !tbaa [[TBAA3]]
-// GFX900-NEXT:    [[TMP0:%.*]] = load ptr addrspace(1), ptr [[A_ADDR_ASCAST]], align 8, !tbaa [[TBAA14]]
-// GFX900-NEXT:    [[TMP1:%.*]] = load i8, ptr [[B_ADDR_ASCAST]], align 1, !tbaa [[TBAA16]]
-// GFX900-NEXT:    [[TMP2:%.*]] = load ptr addrspace(1), ptr [[C_ADDR_ASCAST]], align 8, !tbaa [[TBAA7]]
-// GFX900-NEXT:    [[TMP3:%.*]] = load i64, ptr [[D_ADDR_ASCAST]], align 8, !tbaa [[TBAA3]]
+// GFX900-NEXT:    store ptr addrspace(1) [[A]], ptr [[A_ADDR_ASCAST]], align 8, !tbaa [[CHARPTR_TBAA14:![0-9]+]]
+// GFX900-NEXT:    store i8 [[B]], ptr [[B_ADDR_ASCAST]], align 1, !tbaa [[CHAR_TBAA16:![0-9]+]]
+// GFX900-NEXT:    store ptr addrspace(1) [[C]], ptr [[C_ADDR_ASCAST]], align 8, !tbaa [[LONGPTR_TBAA7]]
+// GFX900-NEXT:    store i64 [[D]], ptr [[D_ADDR_ASCAST]], align 8, !tbaa [[LONG_TBAA3]]
+// GFX900-NEXT:    [[TMP0:%.*]] = load ptr addrspace(1), ptr [[A_ADDR_ASCAST]], align 8, !tbaa [[CHARPTR_TBAA14]]
+// GFX900-NEXT:    [[TMP1:%.*]] = load i8, ptr [[B_ADDR_ASCAST]], align 1, !tbaa [[CHAR_TBAA16]]
+// GFX900-NEXT:    [[TMP2:%.*]] = load ptr addrspace(1), ptr [[C_ADDR_ASCAST]], align 8, !tbaa [[LONGPTR_TBAA7]]
+// GFX900-NEXT:    [[TMP3:%.*]] = load i64, ptr [[D_ADDR_ASCAST]], align 8, !tbaa [[LONG_TBAA3]]
 // GFX900-NEXT:    call void @__clang_ocl_kern_imp_test(ptr addrspace(1) noundef align 1 [[TMP0]], i8 noundef signext [[TMP1]], ptr addrspace(1) noundef align 8 [[TMP2]], i64 noundef [[TMP3]]) #[[ATTR8:[0-9]+]]
 // GFX900-NEXT:    ret void
 //
@@ -519,16 +519,16 @@ kernel void test_target_features_kernel(global int *i) {
 // GFX900-NEXT:    [[BLOCK_SIZES_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK_SIZES]] to ptr
 // GFX900-NEXT:    [[BLOCK21_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK21]] to ptr
 // GFX900-NEXT:    [[TMP27_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VARTMP27]] to ptr
-// GFX900-NEXT:    store ptr addrspace(1) [[A]], ptr [[A_ADDR_ASCAST]], align 8, !tbaa [[TBAA14]]
-// GFX900-NEXT:    store i8 [[B]], ptr [[B_ADDR_ASCAST]], align 1, !tbaa [[TBAA16]]
-// GFX900-NEXT:    store ptr addrspace(1) [[C]], ptr [[C_ADDR_ASCAST]], align 8, !tbaa [[TBAA7]]
-// GFX900-NEXT:    store i64 [[D]], ptr [[D_ADDR_ASCAST]], align 8, !tbaa [[TBAA3]]
+// GFX900-NEXT:    store ptr addrspace(1) [[A]], ptr [[A_ADDR_ASCAST]], align 8, !tbaa [[CHARPTR_TBAA14]]
+// GFX900-NEXT:    store i8 [[B]], ptr [[B_ADDR_ASCAST]], align 1, !tbaa [[CHAR_TBAA16]]
+// GFX900-NEXT:    store ptr addrspace(1) [[C]], ptr [[C_ADDR_ASCAST]], align 8, !tbaa [[LONGPTR_TBAA7]]
+// GFX900-NEXT:    store i64 [[D]], ptr [[D_ADDR_ASCAST]], align 8, !tbaa [[LONG_TBAA3]]
 // GFX900-NEXT:    call void @llvm.lifetime.start.p5(ptr addrspace(5) [[DEFAULT_QUEUE]]) #[[ATTR9:[0-9]+]]
 // GFX900-NEXT:    call void @llvm.lifetime.start.p5(ptr addrspace(5) [[FLAGS]]) #[[ATTR9]]
-// GFX900-NEXT:    store i32 0, ptr addrspace(5) [[FLAGS]], align 4, !tbaa [[TBAA17:![0-9]+]]
+// GFX900-NEXT:    store i32 0, ptr addrspace(5) [[FLAGS]], align 4, !tbaa [[INT_TBAA17:![0-9]+]]
 // GFX900-NEXT:    call void @llvm.lifetime.start.p5(ptr addrspace(5) [[NDRANGE]]) #[[ATTR9]]
-// GFX900-NEXT:    [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[DEFAULT_QUEUE]], align 8, !tbaa [[TBAA19:![0-9]+]]
-// GFX900-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 4, !tbaa [[TBAA17]]
+// GFX900-NEXT:    [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[DEFAULT_QUEUE]], align 8, !tbaa [[QUEUE_T_TBAA19:![0-9]+]]
+// GFX900-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 4, !tbaa [[INT_TBAA17]]
 // GFX900-NEXT:    call void @llvm.memcpy.p0.p5.i64(ptr align 4 [[TMP_ASCAST]], ptr addrspace(5) align 4 [[NDRANGE]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT21:![0-9]+]]
 // GFX900-NEXT:    [[BLOCK_SIZE:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[BLOCK_ASCAST]], i32 0, i32 0
 // GFX900-NEXT:    store i32 25, ptr [[BLOCK_SIZE]], align 8
@@ -537,14 +537,14 @@ kernel void test_target_features_kernel(global int *i) {
 // GFX900-NEXT:    [[BLOCK_INVOKE:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[BLOCK_ASCAST]], i32 0, i32 2
 // GFX900-NEXT:    store ptr @__test_block_invoke, ptr [[BLOCK_INVOKE]], align 8
 // GFX900-NEXT:    [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[BLOCK_ASCAST]], i32 0, i32 3
-// GFX900-NEXT:    [[TMP2:%.*]] = load ptr addrspace(1), ptr [[A_ADDR_ASCAST]], align 8, !tbaa [[TBAA14]]
-// GFX900-NEXT:    store ptr addrspace(1) [[TMP2]], ptr [[BLOCK_CAPTURED]], align 8, !tbaa [[TBAA14]]
+// GFX900-NEXT:    [[TMP2:%.*]] = load ptr addrspace(1), ptr [[A_ADDR_ASCAST]], align 8, !tbaa [[CHARPTR_TBAA14]]
+// GFX900-NEXT:    store ptr addrspace(1) [[TMP2]], ptr [[BLOCK_CAPTURED]], align 8, !tbaa [[CHARPTR_TBAA14]]
 // GFX900-NEXT:    [[BLOCK_CAPTURED1:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[BLOCK_ASCAST]], i32 0, i32 4
-// GFX900-NEXT:    [[TMP3:%.*]] = load i8, ptr [[B_ADDR_ASCAST]], align 1, !tbaa [[TBAA16]]
-// GFX900-NEXT:    store i8 [[TMP3]], ptr [[BLOCK_CAPTURED1]], align 8, !tbaa [[TBAA16]]
+// GFX900-NEXT:    [[TMP3:%.*]] = load i8, ptr [[B_ADDR_ASCAST]], align 1, !tbaa [[CHAR_TBAA16]]
+// GFX900-NEXT:    store i8 [[TMP3]], ptr [[BLOCK_CAPTURED1]], align 8, !tbaa [[CHAR_TBAA16]]
 // GFX900-NEXT:    [[TMP4:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP0]], i32 [[TMP1]], ptr addrspace(5) [[TMP]], ptr addrspacecast (ptr addrspace(1) @__test_block_invoke_kernel.runtime.handle to ptr), ptr [[BLOCK_ASCAST]])
-// GFX900-NEXT:    [[TMP5:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[DEFAULT_QUEUE]], align 8, !tbaa [[TBAA19]]
-// GFX900-NEXT:    [[TMP6:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 4, !tbaa [[TBAA17]]
+// GFX900-NEXT:    [[TMP5:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[DEFAULT_QUEUE]], align 8, !tbaa [[QUEUE_T_TBAA19]]
+// GFX900-NEXT:    [[TMP6:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 4, !tbaa [[INT_TBAA17]]
 // GFX900-NEXT:    call void @llvm.memcpy.p0.p5.i64(ptr align 4 [[TMP2_ASCAST]], ptr addrspace(5) align 4 [[NDRANGE]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT21]]
 // GFX900-NEXT:    [[BLOCK_SIZE4:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 0
 // GFX900-NEXT:    store i32 41, ptr [[BLOCK_SIZE4]], align 8
@@ -553,20 +553,20 @@ kernel void test_target_features_kernel(global int *i) {
 // GFX900-NEXT:    [[BLOCK_INVOKE6:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 2
 // GFX900-NEXT:    store ptr @__test_block_invoke_2, ptr [[BLOCK_INVOKE6]], align 8
 // GFX900-NEXT:    [[BLOCK_CAPTURED7:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 3
-// GFX900-NEXT:    [[TMP7:%.*]] = load ptr addrspace(1), ptr [[A_ADDR_ASCAST]], align 8, !tbaa [[TBAA14]]
-// GFX900-NEXT:    store ptr addrspace(1) [[TMP7]], ptr [[BLOCK_CAPTURED7]], align 8, !tbaa [[TBAA14]]
+// GFX900-NEXT:    [[TMP7:%.*]] = load ptr addrspace(1), ptr [[A_ADDR_ASCAST]], align 8, !tbaa [[CHARPTR_TBAA14]]
+// GFX900-NEXT:    store ptr addrspace(1) [[TMP7]], ptr [[BLOCK_CAPTURED7]], align 8, !tbaa [[CHARPTR_TBAA14]]
 // GFX900-NEXT:    [[BLOCK_CAPTURED8:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 6
-// GFX900-NEXT:    [[TMP8:%.*]] = load i8, ptr [[B_ADDR_ASCAST]], align 1, !tbaa [[TBAA16]]
-// GFX900-NEXT:    store i8 [[TMP8]], ptr [[BLOCK_CAPTURED8]], align 8, !tbaa [[TBAA16]]
+// GFX900-NEXT:    [[TMP8:%.*]] = load i8, ptr [[B_ADDR_ASCAST]], align 1, !tbaa [[CHAR_TBAA16]]
+// GFX900-NEXT:    store i8 [[TMP8]], ptr [[BLOCK_CAPTURED8]], align 8, !tbaa [[CHAR_TBAA16]]
 // GFX900-NEXT:    [[BLOCK_CAPTURED9:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 4
-// GFX900-NEXT:    [[TMP9:%.*]] = load ptr addrspace(1), ptr [[C_ADDR_ASCAST]], align 8, !tbaa [[TBAA7]]
-// GFX900-NEXT:    store ptr addrspace(1) [[TMP9]], ptr [[BLOCK_CAPTURED9]], align 8, !tbaa [[TBAA7]]
+// GFX900-NEXT:    [[TMP9:%.*]] = load ptr addrspace(1), ptr [[C_ADDR_ASCAST]], align 8, !tbaa [[LONGPTR_TBAA7]]
+// GFX900-NEXT:    store ptr addrspace(1) [[TMP9]], ptr [[BLOCK_CAPTURED9]], align 8, !tbaa [[LONGPTR_TBAA7]]
 // GFX900-NEXT:    [[BLOCK_CAPTURED10:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 5
-// GFX900-NEXT:    [[TMP10:%.*]] = load i64, ptr [[D_ADDR_ASCAST]], align 8, !tbaa [[TBAA3]]
-// GFX900-NEXT:    store i64 [[TMP10]], ptr [[BLOCK_CAPTURED10]], align 8, !tbaa [[TBAA3]]
+// GFX900-NEXT:    [[TMP10:%.*]] = load i64, ptr [[D_ADDR_ASCAST]], align 8, !tbaa [[LONG_TBAA3]]
+// GFX900-NEXT:    store i64 [[TMP10]], ptr [[BLOCK_CAPTURED10]], align 8, !tbaa [[LONG_TBAA3]]
 // GFX900-NEXT:    [[TMP11:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP5]], i32 [[TMP6]], ptr addrspace(5) [[VARTMP2]], ptr addrspacecast (ptr addrspace(1) @__test_block_invoke_2_kernel.runtime.handle to ptr), ptr [[BLOCK3_ASCAST]])
-// GFX900-NEXT:    [[TMP12:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[DEFAULT_QUEUE]], align 8, !tbaa [[TBAA19]]
-// GFX900-NEXT:    [[TMP13:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 4, !tbaa [[TBAA17]]
+// GFX900-NEXT:    [[TMP12:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[DEFAULT_QUEUE]], align 8, !tbaa [[QUEUE_T_TBAA19]]
+// GFX900-NEXT:    [[TMP13:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 4, !tbaa [[INT_TBAA17]]
 // GFX900-NEXT:    call void @llvm.memcpy.p0.p5.i64(ptr align 4 [[TMP11_ASCAST]], ptr addrspace(5) align 4 [[NDRANGE]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT21]]
 // GFX900-NEXT:    [[BLOCK_SIZE13:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 0
 // GFX900-NEXT:    store i32 41, ptr [[BLOCK_SIZE13]], align 8
@@ -575,17 +575,17 @@ kernel void test_target_features_kernel(global int *i) {
 // GFX900-NEXT:    [[BLOCK_INVOKE15:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 2
 // GFX900-NEXT:    store ptr @__test_block_invoke_3, ptr [[BLOCK_INVOKE15]], align 8
 // GFX900-NEXT:    [[BLOCK_CAPTURED16:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 3
-// GFX900-NEXT:    [[TMP14:%.*]] = load ptr addrspace(1), ptr [[A_ADDR_ASCAST]], align 8, !tbaa [[TBAA14]]
-// GFX900-NEXT:    store ptr addrspace(1) [[TMP14]], ptr [[BLOCK_CAPTURED16]], align 8, !tbaa [[TBAA14]]
+// GFX900-NEXT:    [[TMP14:%.*]] = load ptr addrspace(1), ptr [[A_ADDR_ASCAST]], align 8, !tbaa [[CHARPTR_TBAA14]]
+// GFX900-NEXT:    store ptr addrspace(1) [[TMP14]], ptr [[BLOCK_CAPTURED16]], align 8, !tbaa [[CHARPTR_TBAA14]]
 // GFX900-NEXT:    [[BLOCK_CAPTURED17:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 6
-// GFX900-NEXT:    [[TMP15:%.*]] = load i8, ptr [[B_ADDR_ASCAST]], align 1, !tbaa [[TBAA16]]
-// GFX900-NEXT:    store i8 [[TMP15]], ptr [[BLOCK_CAPTURED17]], align 8, !tbaa [[TBAA16]]
+// GFX900-NEXT:    [[TMP15:%.*]] = load i8, ptr [[B_ADDR_ASCAST]], align 1, !tbaa [[CHAR_TBAA16]]
+// GFX900-NEXT:    store i8 [[TMP15]], ptr [[BLOCK_CAPTURED17]], align 8, !tbaa [[CHAR_TBAA16]]
 // GFX900-NEXT:    [[BLOCK_CAPTURED18:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 4
-// GFX900-NEXT:    [[TMP16:%.*]] = load ptr addrspace(1), ptr [[C_ADDR_ASCAST]], align 8, !tbaa [[TBAA7]]
-// GFX900-NEXT:    store ptr addrspace(1) [[TMP16]], ptr [[BLOCK_CAPTURED18]], align 8, !tbaa [[TBAA7]]
+// GFX900-NEXT:    [[TMP16:%.*]] = load ptr addrspace(1), ptr [[C_ADDR_ASCAST]], align 8, !tbaa [[LONGPTR_TBAA7]]
+// GFX900-NEXT:    store ptr addrspace(1) [[TMP16]], ptr [[BLOCK_CAPTURED18]], align 8, !tbaa [[LONGPTR_TBAA7]]
 // GFX900-NEXT:    [[BLOCK_CAPTURED19:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 5
-// GFX900-NEXT:    [[TMP17:%.*]] = load i64, ptr [[D_ADDR_ASCAST]], align 8, !tbaa [[TBAA3]]
-// GFX900-NEXT:    store i64 [[TMP17]], ptr [[BLOCK_CAPTURED19]], align 8, !tbaa [[TBAA3]]
+// GFX900-NEXT:    [[TMP17:%.*]] = load i64, ptr [[D_ADDR_ASCAST]], align 8, !tbaa [[LONG_TBAA3]]
+// GFX900-NEXT:    store i64 [[TMP17]], ptr [[BLOCK_CAPTURED19]], align 8, !tbaa [[LONG_TBAA3]]
 // GFX900-NEXT:    call void @llvm.lifetime.start.p5(ptr addrspace(5) [[BLOCK_SIZES]]) #[[ATTR9]]
 // GFX900-NEXT:    [[TMP18:%.*]] = getelementptr [1 x i64], ptr addrspace(5) [[BLOCK_SIZES]], i32 0, i32 0
 // GFX900-NEXT:    store i64 100, ptr addrspace(5) [[TMP18]], align 8
@@ -599,16 +599,16 @@ kernel void test_target_features_kernel(global int *i) {
 // GFX900-NEXT:    [[BLOCK_INVOKE24:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[BLOCK21_ASCAST]], i32 0, i32 2
 // GFX900-NEXT:    store ptr @__test_block_invoke_4, ptr [[BLOCK_INVOKE24]], align 8
 // GFX900-NEXT:    [[BLOCK_CAPTURED25:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[BLOCK21_ASCAST]], i32 0, i32 3
-// GFX900-NEXT:    [[TMP20:%.*]] = load i64, ptr [[D_ADDR_ASCAST]], align 8, !tbaa [[TBAA3]]
-// GFX900-NEXT:    store i64 [[TMP20]], ptr [[BLOCK_CAPTURED25]], align 8, !tbaa [[TBAA3]]
+// GFX900-NEXT:    [[TMP20:%.*]] = load i64, ptr [[D_ADDR_ASCAST]], align 8, !tbaa [[LONG_TBAA3]]
+// GFX900-NEXT:    store i64 [[TMP20]], ptr [[BLOCK_CAPTURED25]], align 8, !tbaa [[LONG_TBAA3]]
 // GFX900-NEXT:    [[BLOCK_CAPTURED26:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[BLOCK21_ASCAST]], i32 0, i32 4
-// GFX900-NEXT:    [[TMP21:%.*]] = load ptr addrspace(1), ptr [[C_ADDR_ASCAST]], align 8, !tbaa [[TBAA7]]
-// GFX900-NEXT:    store ptr addrspace(1) [[TMP21]], ptr [[BLOCK_CAPTURED26]], align 8, !tbaa [[TBAA7]]
-// GFX900-NEXT:    store ptr [[BLOCK21_ASCAST]], ptr addrspace(5) [[BLOCK20]], align 8, !tbaa [[TBAA16]]
-// GFX900-NEXT:    [[TMP22:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[DEFAULT_QUEUE]], align 8, !tbaa [[TBAA19]]
-// GFX900-NEXT:    [[TMP23:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 4, !tbaa [[TBAA17]]
+// GFX900-NEXT:    [[TMP21:%.*]] = load ptr addrspace(1), ptr [[C_ADDR_ASCAST]], align 8, !tbaa [[LONGPTR_TBAA7]]
+// GFX900-NEXT:    store ptr addrspace(1) [[TMP21]], ptr [[BLOCK_CAPTURED26]], align 8, !tbaa [[LONGPTR_TBAA7]]
+// GFX900-NEXT:    store ptr [[BLOCK21_ASCAST]], ptr addrspace(5) [[BLOCK20]], align 8, !tbaa [[CHAR_TBAA16]]
+// GFX900-NEXT:    [[TMP22:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[DEFAULT_QUEUE]], align 8, !tbaa [[QUEUE_T_TBAA19]]
+// GFX900-NEXT:    [[TMP23:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 4, !tbaa [[INT_TBAA17]]
 // GFX900-NEXT:    call void @llvm.memcpy.p0.p5.i64(ptr align 4 [[TMP27_ASCAST]], ptr addrspace(5) align 4 [[NDRANGE]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT21]]
-// GFX900-NEXT:    [[TMP24:%.*]] = load ptr, ptr addrspace(5) [[BLOCK20]], align 8, !tbaa [[TBAA16]]
+// GFX900-NEXT:    [[TMP24:%.*]] = load ptr, ptr addrspace(5) [[BLOCK20]], align 8, !tbaa [[CHAR_TBAA16]]
 // GFX900-NEXT:    [[TMP25:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP22]], i32 [[TMP23]], ptr addrspace(5) [[VARTMP27]], ptr addrspacecast (ptr addrspace(1) @__test_block_invoke_4_kernel.runtime.handle to ptr), ptr [[BLOCK21_ASCAST]])
 // GFX900-NEXT:    call void @llvm.lifetime.end.p5(ptr addrspace(5) [[BLOCK20]]) #[[ATTR9]]
 // GFX900-NEXT:    call void @llvm.lifetime.end.p5(ptr addrspace(5) [[NDRANGE]]) #[[ATTR9]]
@@ -623,8 +623,8 @@ kernel void test_target_features_kernel(global int *i) {
 // GFX900-NEXT:  [[ENTRY:.*:]]
 // GFX900-NEXT:    [[I_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
 // GFX900-NEXT:    [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr
-// GFX900-NEXT:    store ptr addrspace(1) [[I]], ptr [[I_ADDR_ASCAST]], align 8, !tbaa [[TBAA26:![0-9]+]]
-// GFX900-NEXT:    [[TMP0:%.*]] = load ptr addrspace(1), ptr [[I_ADDR_ASCAST]], align 8, !tbaa [[TBAA26]]
+// GFX900-NEXT:    store ptr addrspace(1) [[I]], ptr [[I_ADDR_ASCAST]], align 8, !tbaa [[INTPTR_TBAA26:![0-9]+]]
+// GFX900-NEXT:    [[TMP0:%.*]] = load ptr addrspace(1), ptr [[I_ADDR_ASCAST]], align 8, !tbaa [[INTPTR_TBAA26]]
 // GFX900-NEXT:    call void @__clang_ocl_kern_imp_test_target_features_kernel(ptr addrspace(1) noundef align 4 [[TMP0]]) #[[ATTR8]]
 // GFX900-NEXT:    ret void
 //
@@ -640,14 +640,14 @@ kernel void test_target_features_kernel(global int *i) {
 // GFX900-NEXT:    [[TMP:%.*]] = alloca [[STRUCT_NDRANGE_T]], align 4, addrspace(5)
 // GFX900-NEXT:    [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr
 // GFX900-NEXT:    [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
-// GFX900-NEXT:    store ptr addrspace(1) [[I]], ptr [[I_ADDR_ASCAST]], align 8, !tbaa [[TBAA26]]
+// GFX900-NEXT:    store ptr addrspace(1) [[I]], ptr [[I_ADDR_ASCAST]], align 8, !tbaa [[INTPTR_TBAA26]]
 // GFX900-NEXT:    call void @llvm.lifetime.start.p5(ptr addrspace(5) [[DEFAULT_QUEUE]]) #[[ATTR9]]
 // GFX900-NEXT:    call void @llvm.lifetime.start.p5(ptr addrspace(5) [[FLAGS]]) #[[ATTR9]]
-// GFX900-NEXT:    store i32 0, ptr addrspace(5) [[FLAGS]], align 4, !tbaa [[TBAA17]]
+// GFX900-NEXT:    store i32 0, ptr addrspace(5) [[FLAGS]], align 4, !tbaa [[INT_TBAA17]]
 // GFX900-NEXT:    call void @llvm.lifetime.start.p5(ptr addrspace(5) [[NDRANGE]]) #[[ATTR9]]
 // GFX900-NEXT:    [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.memtime()
-// GFX900-NEXT:    [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[DEFAULT_QUEUE]], align 8, !tbaa [[TBAA19]]
-// GFX900-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 4, !tbaa [[TBAA17]]
+// GFX900-NEXT:    [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[DEFAULT_QUEUE]], align 8, !tbaa [[QUEUE_T_TBAA19]]
+// GFX900-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 4, !tbaa [[INT_TBAA17]]
 // GFX900-NEXT:    call void @llvm.memcpy.p0.p5.i64(ptr align 4 [[TMP_ASCAST]], ptr addrspace(5) align 4 [[NDRANGE]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT21]]
 // GFX900-NEXT:    [[TMP3:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP1]], i32 [[TMP2]], ptr addrspace(5) [[TMP]], ptr addrspacecast (ptr addrspace(1) @__test_target_features_kernel_block_invoke_kernel.runtime.handle to ptr), ptr addrspacecast (ptr addrspace(1) @__block_literal_global to ptr))
 // GFX900-NEXT:    call void @llvm.lifetime.end.p5(ptr addrspace(5) [[NDRANGE]]) #[[ATTR9]]
@@ -664,11 +664,11 @@ kernel void test_target_features_kernel(global int *i) {
 // GFX900-NEXT:    [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]] to ptr
 // GFX900-NEXT:    store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST]], align 8
 // GFX900-NEXT:    [[BLOCK_CAPTURE_ADDR:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 4
-// GFX900-NEXT:    [[TMP0:%.*]] = load i8, ptr [[BLOCK_CAPTURE_ADDR]], align 8, !tbaa [[TBAA16]]
+// GFX900-NEXT:    [[TMP0:%.*]] = load i8, ptr [[BLOCK_CAPTURE_ADDR]], align 8, !tbaa [[CHAR_TBAA16]]
 // GFX900-NEXT:    [[BLOCK_CAPTURE_ADDR1:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 3
-// GFX900-NEXT:    [[TMP1:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR1]], align 8, !tbaa [[TBAA14]]
+// GFX900-NEXT:    [[TMP1:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR1]], align 8, !tbaa [[CHARPTR_TBAA14]]
 // GFX900-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP1]], i64 0
-// GFX900-NEXT:    store i8 [[TMP0]], ptr addrspace(1) [[ARRAYIDX]], align 1, !tbaa [[TBAA16]]
+// GFX900-NEXT:    store i8 [[TMP0]], ptr addrspace(1) [[ARRAYIDX]], align 1, !tbaa [[CHAR_TBAA16]]
 // GFX900-NEXT:    ret void
 //
 //
@@ -691,17 +691,17 @@ kernel void test_target_features_kernel(global int *i) {
 // GFX900-NEXT:    [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]] to ptr
 // GFX900-NEXT:    store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST]], align 8
 // GFX900-NEXT:    [[BLOCK_CAPTURE_ADDR:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 6
-// GFX900-NEXT:    [[TMP0:%.*]] = load i8, ptr [[BLOCK_CAPTURE_ADDR]], align 8, !tbaa [[TBAA16]]
+// GFX900-NEXT:    [[TMP0:%.*]] = load i8, ptr [[BLOCK_CAPTURE_ADDR]], align 8, !tbaa [[CHAR_TBAA16]]
 // GFX900-NEXT:    [[BLOCK_CAPTURE_ADDR1:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 3
-// GFX900-NEXT:    [[TMP1:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR1]], align 8, !tbaa [[TBAA14]]
+// GFX900-NEXT:    [[TMP1:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR1]], align 8, !tbaa [[CHARPTR_TBAA14]]
 // GFX900-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP1]], i64 0
-// GFX900-NEXT:    store i8 [[TMP0]], ptr addrspace(1) [[ARRAYIDX]], align 1, !tbaa [[TBAA16]]
+// GFX900-NEXT:    store i8 [[TMP0]], ptr addrspace(1) [[ARRAYIDX]], align 1, !tbaa [[CHAR_TBAA16]]
 // GFX900-NEXT:    [[BLOCK_CAPTURE_ADDR2:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 5
-// GFX900-NEXT:    [[TMP2:%.*]] = load i64, ptr [[BLOCK_CAPTURE_ADDR2]], align 8, !tbaa [[TBAA3]]
+// GFX900-NEXT:    [[TMP2:%.*]] = load i64, ptr [[BLOCK_CAPTURE_ADDR2]], align 8, !tbaa [[LONG_TBAA3]]
 // GFX900-NEXT:    [[BLOCK_CAPTURE_ADDR3:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 4
-// GFX900-NEXT:    [[TMP3:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR3]], align 8, !tbaa [[TBAA7]]
+// GFX900-NEXT:    [[TMP3:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR3]], align 8, !tbaa [[LONGPTR_TBAA7]]
 // GFX900-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[TMP3]], i64 0
-// GFX900-NEXT:    store i64 [[TMP2]], ptr addrspace(1) [[ARRAYIDX4]], align 8, !tbaa [[TBAA3]]
+// GFX900-NEXT:    store i64 [[TMP2]], ptr addrspace(1) [[ARRAYIDX4]], align 8, !tbaa [[LONG_TBAA3]]
 // GFX900-NEXT:    ret void
 //
 //
@@ -725,22 +725,22 @@ kernel void test_target_features_kernel(global int *i) {
 // GFX900-NEXT:    [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]] to ptr
 // GFX900-NEXT:    [[LP_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LP_ADDR]] to ptr
 // GFX900-NEXT:    store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST]], align 8
-// GFX900-NEXT:    store ptr addrspace(3) [[LP]], ptr [[LP_ADDR_ASCAST]], align 4, !tbaa [[TBAA32:![0-9]+]]
+// GFX900-NEXT:    store ptr addrspace(3) [[LP]], ptr [[LP_ADDR_ASCAST]], align 4, !tbaa [[ANYPTR_TBAA32:![0-9]+]]
 // GFX900-NEXT:    [[BLOCK_CAPTURE_ADDR:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 6
-// GFX900-NEXT:    [[TMP0:%.*]] = load i8, ptr [[BLOCK_CAPTURE_ADDR]], align 8, !tbaa [[TBAA16]]
+// GFX900-NEXT:    [[TMP0:%.*]] = load i8, ptr [[BLOCK_CAPTURE_ADDR]], align 8, !tbaa [[CHAR_TBAA16]]
 // GFX900-NEXT:    [[BLOCK_CAPTURE_ADDR1:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 3
-// GFX900-NEXT:    [[TMP1:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR1]], align 8, !tbaa [[TBAA14]]
+// GFX900-NEXT:    [[TMP1:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR1]], align 8, !tbaa [[CHARPTR_TBAA14]]
 // GFX900-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP1]], i64 0
-// GFX900-NEXT:    store i8 [[TMP0]], ptr addrspace(1) [[ARRAYIDX]], align 1, !tbaa [[TBAA16]]
+// GFX900-NEXT:    store i8 [[TMP0]], ptr addrspace(1) [[ARRAYIDX]], align 1, !tbaa [[CHAR_TBAA16]]
 // GFX900-NEXT:    [[BLOCK_CAPTURE_ADDR2:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 5
-// GFX900-NEXT:    [[TMP2:%.*]] = load i64, ptr [[BLOCK_CAPTURE_ADDR2]], align 8, !tbaa [[TBAA3]]
+// GFX900-NEXT:    [[TMP2:%.*]] = load i64, ptr [[BLOCK_CAPTURE_ADDR2]], align 8, !tbaa [[LONG_TBAA3]]
 // GFX900-NEXT:    [[BLOCK_CAPTURE_ADDR3:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 4
-// GFX900-NEXT:    [[TMP3:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR3]], align 8, !tbaa [[TBAA7]]
+// GFX900-NEXT:    [[TMP3:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR3]], align 8, !tbaa [[LONGPTR_TBAA7]]
 // GFX900-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[TMP3]], i64 0
-// GFX900-NEXT:    store i64 [[TMP2]], ptr addrspace(1) [[ARRAYIDX4]], align 8, !tbaa [[TBAA3]]
-// GFX900-NEXT:    [[TMP4:%.*]] = load ptr addrspace(3), ptr [[LP_ADDR_ASCAST]], align 4, !tbaa [[TBAA32]]
+// GFX900-NEXT:    store i64 [[TMP2]], ptr addrspace(1) [[ARRAYIDX4]], align 8, !tbaa [[LONG_TBAA3]]
+// GFX900-NEXT:    [[TMP4:%.*]] = load ptr addrspace(3), ptr [[LP_ADDR_ASCAST]], align 4, !tbaa [[ANYPTR_TBAA32]]
 // GFX900-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[TMP4]], i64 0
-// GFX900-NEXT:    store i32 1, ptr addrspace(3) [[ARRAYIDX5]], align 4, !tbaa [[TBAA17]]
+// GFX900-NEXT:    store i32 1, ptr addrspace(3) [[ARRAYIDX5]], align 4, !tbaa [[INT_TBAA17]]
 // GFX900-NEXT:    ret void
 //
 //
@@ -763,9 +763,9 @@ kernel void test_target_features_kernel(global int *i) {
 // GFX900-NEXT:    [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]] to ptr
 // GFX900-NEXT:    store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST]], align 8
 // GFX900-NEXT:    [[BLOCK_CAPTURE_ADDR:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 3
-// GFX900-NEXT:    [[TMP0:%.*]] = load i64, ptr [[BLOCK_CAPTURE_ADDR]], align 8, !tbaa [[TBAA3]]
+// GFX900-NEXT:    [[TMP0:%.*]] = load i64, ptr [[BLOCK_CAPTURE_ADDR]], align 8, !tbaa [[LONG_TBAA3]]
 // GFX900-NEXT:    [[BLOCK_CAPTURE_ADDR1:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 4
-// GFX900-NEXT:    [[TMP1:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR1]], align 8, !tbaa [[TBAA7]]
+// GFX900-NEXT:    [[TMP1:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR1]], align 8, !tbaa [[LONGPTR_TBAA7]]
 // GFX900-NEXT:    call void @callee(i64 noundef [[TMP0]], ptr addrspace(1) noundef [[TMP1]]) #[[ATTR8]]
 // GFX900-NEXT:    ret void
 //
@@ -852,36 +852,36 @@ kernel void test_target_features_kernel(global int *i) {
 // GFX900: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600}
 // GFX900: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
 // GFX900: [[META2:![0-9]+]] = !{i32 2, i32 0}
-// GFX900: [[TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0}
+// GFX900: [[LONG_TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0}
 // GFX900: [[META4]] = !{!"long", [[META5:![0-9]+]], i64 0}
 // GFX900: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0}
 // GFX900: [[META6]] = !{!"Simple C/C++ TBAA"}
-// GFX900: [[TBAA7]] = !{[[META8:![0-9]+]], [[META8]], i64 0}
+// GFX900: [[LONGPTR_TBAA7]] = !{[[META8:![0-9]+]], [[META8]], i64 0}
 // GFX900: [[META8]] = !{!"p1 long", [[META9:![0-9]+]], i64 0}
 // GFX900: [[META9]] = !{!"any pointer", [[META5]], i64 0}
 // GFX900: [[META10]] = !{i32 1, i32 0, i32 1, i32 0}
 // GFX900: [[META11]] = !{!"none", !"none", !"none", !"none"}
 // GFX900: [[META12]] = !{!"char*", !"char", !"long*", !"long"}
 // GFX900: [[META13]] = !{!"", !"", !"", !""}
-// GFX900: [[TBAA14]] = !{[[META15:![0-9]+]], [[META15]], i64 0}
+// GFX900: [[CHARPTR_TBAA14]] = !{[[META15:![0-9]+]], [[META15]], i64 0}
 // GFX900: [[META15]] = !{!"p1 omnipotent char", [[META9]], i64 0}
-// GFX900: [[TBAA16]] = !{[[META5]], [[META5]], i64 0}
-// GFX900: [[TBAA17]] = !{[[META18:![0-9]+]], [[META18]], i64 0}
+// GFX900: [[CHAR_TBAA16]] = !{[[META5]], [[META5]], i64 0}
+// GFX900: [[INT_TBAA17]] = !{[[META18:![0-9]+]], [[META18]], i64 0}
 // GFX900: [[META18]] = !{!"int", [[META5]], i64 0}
-// GFX900: [[TBAA19]] = !{[[META20:![0-9]+]], [[META20]], i64 0}
+// GFX900: [[QUEUE_T_TBAA19]] = !{[[META20:![0-9]+]], [[META20]], i64 0}
 // GFX900: [[META20]] = !{!"queue_t", [[META5]], i64 0}
-// GFX900: [[TBAA_STRUCT21]] = !{i64 0, i64 4, [[TBAA17]]}
+// GFX900: [[TBAA_STRUCT21]] = !{i64 0, i64 4, [[INT_TBAA17]]}
 // GFX900: [[META22]] = !{i32 1}
 // GFX900: [[META23]] = !{!"none"}
 // GFX900: [[META24]] = !{!"int*"}
 // GFX900: [[META25]] = !{!""}
-// GFX900: [[TBAA26]] = !{[[META27:![0-9]+]], [[META27]], i64 0}
+// GFX900: [[INTPTR_TBAA26]] = !{[[META27:![0-9]+]], [[META27]], i64 0}
 // GFX900: [[META27]] = !{!"p1 int", [[META9]], i64 0}
 // GFX900: [[META28]] = !{ptr addrspace(1) @__test_block_invoke_kernel.runtime.handle}
 // GFX900: [[META29]] = !{i32 0}
 // GFX900: [[META30]] = !{!"__block_literal"}
 // GFX900: [[META31]] = !{ptr addrspace(1) @__test_block_invoke_2_kernel.runtime.handle}
-// GFX900: [[TBAA32]] = !{[[META9]], [[META9]], i64 0}
+// GFX900: [[ANYPTR_TBAA32]] = !{[[META9]], [[META9]], i64 0}
 // GFX900: [[META33]] = !{ptr addrspace(1) @__test_block_invoke_3_kernel.runtime.handle}
 // GFX900: [[META34]] = !{i32 0, i32 3}
 // GFX900: [[META35]] = !{!"none", !"none"}
diff --git a/clang/test/CodeGenOpenCL/amdgpu-printf.cl b/clang/test/CodeGenOpenCL/amdgpu-printf.cl
index b9e2517..cea7ee5 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-printf.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-printf.cl
@@ -1,4 +1,4 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --version 4
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --version 6
 // RUN: %clang_cc1 -cl-std=CL1.2 -triple amdgcn-amd-amdhsa -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s
 
 int printf(__constant const char* st, ...) __attribute__((format(printf, 1, 2)));
@@ -17,60 +17,60 @@ __kernel void test_printf_str_int(int i) {
 }
 // CHECK-LABEL: define dso_local amdgpu_kernel void @test_printf_noargs(
 // CHECK-SAME: ) #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META4:![0-9]+]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META4]] !kernel_arg_base_type [[META4]] !kernel_arg_type_qual [[META4]] {
-// CHECK-NEXT:  entry:
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    call void @__clang_ocl_kern_imp_test_printf_noargs() #[[ATTR5:[0-9]+]]
 // CHECK-NEXT:    ret void
 //
 //
 // CHECK-LABEL: define dso_local void @__clang_ocl_kern_imp_test_printf_noargs(
 // CHECK-SAME: ) #[[ATTR1:[0-9]+]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META4]] !kernel_arg_base_type [[META4]] !kernel_arg_type_qual [[META4]] {
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[CALL:%.*]] = call i32 (ptr addrspace(4), ...) @printf(ptr addrspace(4) noundef @.str) #[[ATTR6:[0-9]+]]
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[CALL:%.*]] = call i32 (ptr addrspace(4), ...) @printf(ptr addrspace(4) noundef @.str) #[[ATTR5]]
 // CHECK-NEXT:    ret void
 //
 //
 // CHECK-LABEL: define dso_local amdgpu_kernel void @test_printf_int(
 // CHECK-SAME: i32 noundef [[I:%.*]]) #[[ATTR0]] !kernel_arg_addr_space [[META5:![0-9]+]] !kernel_arg_access_qual [[META6:![0-9]+]] !kernel_arg_type [[META7:![0-9]+]] !kernel_arg_base_type [[META7]] !kernel_arg_type_qual [[META8:![0-9]+]] {
-// CHECK-NEXT:  entry:
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[I_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    store i32 [[I]], ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[TBAA9:![0-9]+]]
-// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[TBAA9]]
+// CHECK-NEXT:    store i32 [[I]], ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[INT_TBAA9:![0-9]+]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[INT_TBAA9]]
 // CHECK-NEXT:    call void @__clang_ocl_kern_imp_test_printf_int(i32 noundef [[TMP0]]) #[[ATTR5]]
 // CHECK-NEXT:    ret void
 //
 //
 // CHECK-LABEL: define dso_local void @__clang_ocl_kern_imp_test_printf_int(
 // CHECK-SAME: i32 noundef [[I:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META5]] !kernel_arg_access_qual [[META6]] !kernel_arg_type [[META7]] !kernel_arg_base_type [[META7]] !kernel_arg_type_qual [[META8]] {
-// CHECK-NEXT:  entry:
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[I_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    store i32 [[I]], ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[TBAA9]]
-// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[TBAA9]]
-// CHECK-NEXT:    [[CALL:%.*]] = call i32 (ptr addrspace(4), ...) @printf(ptr addrspace(4) noundef @.str.1, i32 noundef [[TMP0]]) #[[ATTR6]]
+// CHECK-NEXT:    store i32 [[I]], ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[INT_TBAA9]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[INT_TBAA9]]
+// CHECK-NEXT:    [[CALL:%.*]] = call i32 (ptr addrspace(4), ...) @printf(ptr addrspace(4) noundef @.str.1, i32 noundef [[TMP0]]) #[[ATTR5]]
 // CHECK-NEXT:    ret void
 //
 //
 // CHECK-LABEL: define dso_local amdgpu_kernel void @test_printf_str_int(
 // CHECK-SAME: i32 noundef [[I:%.*]]) #[[ATTR0]] !kernel_arg_addr_space [[META5]] !kernel_arg_access_qual [[META6]] !kernel_arg_type [[META7]] !kernel_arg_base_type [[META7]] !kernel_arg_type_qual [[META8]] {
-// CHECK-NEXT:  entry:
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[I_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    store i32 [[I]], ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[TBAA9]]
-// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[TBAA9]]
+// CHECK-NEXT:    store i32 [[I]], ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[INT_TBAA9]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[INT_TBAA9]]
 // CHECK-NEXT:    call void @__clang_ocl_kern_imp_test_printf_str_int(i32 noundef [[TMP0]]) #[[ATTR5]]
 // CHECK-NEXT:    ret void
 //
 //
 // CHECK-LABEL: define dso_local void @__clang_ocl_kern_imp_test_printf_str_int(
 // CHECK-SAME: i32 noundef [[I:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META5]] !kernel_arg_access_qual [[META6]] !kernel_arg_type [[META7]] !kernel_arg_base_type [[META7]] !kernel_arg_type_qual [[META8]] {
-// CHECK-NEXT:  entry:
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[I_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
 // CHECK-NEXT:    [[S:%.*]] = alloca [4 x i8], align 1, addrspace(5)
-// CHECK-NEXT:    store i32 [[I]], ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[TBAA9]]
-// CHECK-NEXT:    call void @llvm.lifetime.start.p5(ptr addrspace(5) [[S]]) #[[ATTR7:[0-9]+]]
+// CHECK-NEXT:    store i32 [[I]], ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[INT_TBAA9]]
+// CHECK-NEXT:    call void @llvm.lifetime.start.p5(ptr addrspace(5) [[S]]) #[[ATTR6:[0-9]+]]
 // CHECK-NEXT:    call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 1 [[S]], ptr addrspace(4) align 1 @__const.test_printf_str_int.s, i64 4, i1 false)
 // CHECK-NEXT:    [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x i8], ptr addrspace(5) [[S]], i64 0, i64 0
-// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[TBAA9]]
-// CHECK-NEXT:    [[CALL:%.*]] = call i32 (ptr addrspace(4), ...) @printf(ptr addrspace(4) noundef @.str.2, ptr addrspace(5) noundef [[ARRAYDECAY]], i32 noundef [[TMP0]]) #[[ATTR6]]
-// CHECK-NEXT:    call void @llvm.lifetime.end.p5(ptr addrspace(5) [[S]]) #[[ATTR7]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[INT_TBAA9]]
+// CHECK-NEXT:    [[CALL:%.*]] = call i32 (ptr addrspace(4), ...) @printf(ptr addrspace(4) noundef @.str.2, ptr addrspace(5) noundef [[ARRAYDECAY]], i32 noundef [[TMP0]]) #[[ATTR5]]
+// CHECK-NEXT:    call void @llvm.lifetime.end.p5(ptr addrspace(5) [[S]]) #[[ATTR6]]
 // CHECK-NEXT:    ret void
 //
 //.
@@ -79,7 +79,7 @@ __kernel void test_printf_str_int(int i) {
 // CHECK: [[META6]] = !{!"none"}
 // CHECK: [[META7]] = !{!"int"}
 // CHECK: [[META8]] = !{!""}
-// CHECK: [[TBAA9]] = !{[[META10:![0-9]+]], [[META10]], i64 0}
+// CHECK: [[INT_TBAA9]] = !{[[META10:![0-9]+]], [[META10]], i64 0}
 // CHECK: [[META10]] = !{!"int", [[META11:![0-9]+]], i64 0}
 // CHECK: [[META11]] = !{!"omnipotent char", [[META12:![0-9]+]], i64 0}
 // CHECK: [[META12]] = !{!"Simple C/C++ TBAA"}
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12-wmma-w32.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12-wmma-w32.cl
index cddc323..321835c 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12-wmma-w32.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12-wmma-w32.cl
@@ -1,4 +1,4 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
 // REQUIRES: amdgpu-registered-target
 // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1200 -target-feature +wavefrontsize32 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-GFX1200
 
@@ -14,10 +14,11 @@ typedef int    v8i   __attribute__((ext_vector_type(8)));
 // amdgcn_wmma_f32_16x16x16_f16
 //
 
-// CHECK-GFX1200-LABEL: @test_amdgcn_wmma_f32_16x16x16_f16_w32(
-// CHECK-GFX1200-NEXT:  entry:
-// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v8f32.v8f16(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x float> [[C:%.*]])
-// CHECK-GFX1200-NEXT:    store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4:![0-9]+]]
+// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_wmma_f32_16x16x16_f16_w32(
+// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]], <8 x float> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-GFX1200-NEXT:  [[ENTRY:.*:]]
+// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v8f32.v8f16(<8 x half> [[A]], <8 x half> [[B]], <8 x float> [[C]])
+// CHECK-GFX1200-NEXT:    store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA4:![0-9]+]]
 // CHECK-GFX1200-NEXT:    ret void
 //
 void test_amdgcn_wmma_f32_16x16x16_f16_w32(global v8f* out, v8h a, v8h b, v8f c)
@@ -29,10 +30,11 @@ void test_amdgcn_wmma_f32_16x16x16_f16_w32(global v8f* out, v8h a, v8h b, v8f c)
 // amdgcn_wmma_f32_16x16x16_bf16
 //
 
-// CHECK-GFX1200-LABEL: @test_amdgcn_wmma_f32_16x16x16_bf16_w32(
-// CHECK-GFX1200-NEXT:  entry:
-// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16.v8f32.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x float> [[C:%.*]])
-// CHECK-GFX1200-NEXT:    store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]]
+// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_wmma_f32_16x16x16_bf16_w32(
+// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]], <8 x float> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-GFX1200-NEXT:  [[ENTRY:.*:]]
+// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16.v8f32.v8i16(<8 x i16> [[A]], <8 x i16> [[B]], <8 x float> [[C]])
+// CHECK-GFX1200-NEXT:    store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA4]]
 // CHECK-GFX1200-NEXT:    ret void
 //
 void test_amdgcn_wmma_f32_16x16x16_bf16_w32(global v8f* out, v8s a, v8s b, v8f c)
@@ -44,10 +46,11 @@ void test_amdgcn_wmma_f32_16x16x16_bf16_w32(global v8f* out, v8s a, v8s b, v8f c
 // amdgcn_wmma_f16_16x16x16_f16
 //
 
-// CHECK-GFX1200-LABEL: @test_amdgcn_wmma_f16_16x16x16_f16_w32(
-// CHECK-GFX1200-NEXT:  entry:
-// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <8 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v8f16.v8f16(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x half> [[C:%.*]], i1 false)
-// CHECK-GFX1200-NEXT:    store <8 x half> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]]
+// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_wmma_f16_16x16x16_f16_w32(
+// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], <8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]], <8 x half> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-GFX1200-NEXT:  [[ENTRY:.*:]]
+// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <8 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v8f16.v8f16(<8 x half> [[A]], <8 x half> [[B]], <8 x half> [[C]], i1 false)
+// CHECK-GFX1200-NEXT:    store <8 x half> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4]]
 // CHECK-GFX1200-NEXT:    ret void
 //
 void test_amdgcn_wmma_f16_16x16x16_f16_w32(global v8h* out, v8h a, v8h b, v8h c)
@@ -59,10 +62,11 @@ void test_amdgcn_wmma_f16_16x16x16_f16_w32(global v8h* out, v8h a, v8h b, v8h c)
 // amdgcn_wmma_bf16_16x16x16_bf16
 //
 
-// CHECK-GFX1200-LABEL: @test_amdgcn_wmma_bf16_16x16x16_bf16_w32(
-// CHECK-GFX1200-NEXT:  entry:
-// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <8 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16.v8i16.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i16> [[C:%.*]], i1 false)
-// CHECK-GFX1200-NEXT:    store <8 x i16> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]]
+// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_wmma_bf16_16x16x16_bf16_w32(
+// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]], <8 x i16> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-GFX1200-NEXT:  [[ENTRY:.*:]]
+// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <8 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16.v8i16.v8i16(<8 x i16> [[A]], <8 x i16> [[B]], <8 x i16> [[C]], i1 false)
+// CHECK-GFX1200-NEXT:    store <8 x i16> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4]]
 // CHECK-GFX1200-NEXT:    ret void
 //
 void test_amdgcn_wmma_bf16_16x16x16_bf16_w32(global v8s* out, v8s a, v8s b, v8s c)
@@ -74,10 +78,11 @@ void test_amdgcn_wmma_bf16_16x16x16_bf16_w32(global v8s* out, v8s a, v8s b, v8s
 // amdgcn_wmma_i32_16x16x16_iu8
 //
 
-// CHECK-GFX1200-LABEL: @test_amdgcn_wmma_i32_16x16x16_iu8_w32(
-// CHECK-GFX1200-NEXT:  entry:
-// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8.v8i32.v2i32(i1 true, <2 x i32> [[A:%.*]], i1 true, <2 x i32> [[B:%.*]], <8 x i32> [[C:%.*]], i1 false)
-// CHECK-GFX1200-NEXT:    store <8 x i32> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]]
+// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_wmma_i32_16x16x16_iu8_w32(
+// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <8 x i32> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-GFX1200-NEXT:  [[ENTRY:.*:]]
+// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8.v8i32.v2i32(i1 true, <2 x i32> [[A]], i1 true, <2 x i32> [[B]], <8 x i32> [[C]], i1 false)
+// CHECK-GFX1200-NEXT:    store <8 x i32> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA4]]
 // CHECK-GFX1200-NEXT:    ret void
 //
 void test_amdgcn_wmma_i32_16x16x16_iu8_w32(global v8i* out, v2i a, v2i b, v8i c)
@@ -89,10 +94,11 @@ void test_amdgcn_wmma_i32_16x16x16_iu8_w32(global v8i* out, v2i a, v2i b, v8i c)
 // amdgcn_wmma_i32_16x16x16_iu4
 //
 
-// CHECK-GFX1200-LABEL: @test_amdgcn_wmma_i32_16x16x16_iu4_w32(
-// CHECK-GFX1200-NEXT:  entry:
-// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4.v8i32.i32(i1 true, i32 [[A:%.*]], i1 true, i32 [[B:%.*]], <8 x i32> [[C:%.*]], i1 false)
-// CHECK-GFX1200-NEXT:    store <8 x i32> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]]
+// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_wmma_i32_16x16x16_iu4_w32(
+// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]], <8 x i32> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-GFX1200-NEXT:  [[ENTRY:.*:]]
+// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4.v8i32.i32(i1 true, i32 [[A]], i1 true, i32 [[B]], <8 x i32> [[C]], i1 false)
+// CHECK-GFX1200-NEXT:    store <8 x i32> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA4]]
 // CHECK-GFX1200-NEXT:    ret void
 //
 void test_amdgcn_wmma_i32_16x16x16_iu4_w32(global v8i* out, int a, int b, v8i c)
@@ -100,10 +106,11 @@ void test_amdgcn_wmma_i32_16x16x16_iu4_w32(global v8i* out, int a, int b, v8i c)
   *out = __builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12(true, a, true, b, c, false);
 }
 
-// CHECK-GFX1200-LABEL: @test_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32(
-// CHECK-GFX1200-NEXT:  entry:
-// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.fp8.v8f32.v2i32(<2 x i32> [[A:%.*]], <2 x i32> [[B:%.*]], <8 x float> [[C:%.*]])
-// CHECK-GFX1200-NEXT:    store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]]
+// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32(
+// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <8 x float> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-GFX1200-NEXT:  [[ENTRY:.*:]]
+// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.fp8.v8f32.v2i32(<2 x i32> [[A]], <2 x i32> [[B]], <8 x float> [[C]])
+// CHECK-GFX1200-NEXT:    store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA4]]
 // CHECK-GFX1200-NEXT:    ret void
 //
 void test_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32(global v8f* out, v2i a, v2i b, v8f c)
@@ -111,10 +118,11 @@ void test_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32(global v8f* out, v2i a, v2i b, v8
   *out = __builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12(a, b, c);
 }
 
-// CHECK-GFX1200-LABEL: @test_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32(
-// CHECK-GFX1200-NEXT:  entry:
-// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.bf8.v8f32.v2i32(<2 x i32> [[A:%.*]], <2 x i32> [[B:%.*]], <8 x float> [[C:%.*]])
-// CHECK-GFX1200-NEXT:    store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]]
+// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32(
+// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <8 x float> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-GFX1200-NEXT:  [[ENTRY:.*:]]
+// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.bf8.v8f32.v2i32(<2 x i32> [[A]], <2 x i32> [[B]], <8 x float> [[C]])
+// CHECK-GFX1200-NEXT:    store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA4]]
 // CHECK-GFX1200-NEXT:    ret void
 //
 void test_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32(global v8f* out, v2i a, v2i b, v8f c)
@@ -122,10 +130,11 @@ void test_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32(global v8f* out, v2i a, v2i b, v8
   *out = __builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12(a, b, c);
 }
 
-// CHECK-GFX1200-LABEL: @test_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32(
-// CHECK-GFX1200-NEXT:  entry:
-// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.fp8.v8f32.v2i32(<2 x i32> [[A:%.*]], <2 x i32> [[B:%.*]], <8 x float> [[C:%.*]])
-// CHECK-GFX1200-NEXT:    store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]]
+// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32(
+// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <8 x float> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-GFX1200-NEXT:  [[ENTRY:.*:]]
+// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.fp8.v8f32.v2i32(<2 x i32> [[A]], <2 x i32> [[B]], <8 x float> [[C]])
+// CHECK-GFX1200-NEXT:    store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA4]]
 // CHECK-GFX1200-NEXT:    ret void
 //
 void test_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32(global v8f* out, v2i a, v2i b, v8f c)
@@ -133,10 +142,11 @@ void test_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32(global v8f* out, v2i a, v2i b, v8
   *out = __builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12(a, b, c);
 }
 
-// CHECK-GFX1200-LABEL: @test_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32(
-// CHECK-GFX1200-NEXT:  entry:
-// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.bf8.v8f32.v2i32(<2 x i32> [[A:%.*]], <2 x i32> [[B:%.*]], <8 x float> [[C:%.*]])
-// CHECK-GFX1200-NEXT:    store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]]
+// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32(
+// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <8 x float> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-GFX1200-NEXT:  [[ENTRY:.*:]]
+// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.bf8.v8f32.v2i32(<2 x i32> [[A]], <2 x i32> [[B]], <8 x float> [[C]])
+// CHECK-GFX1200-NEXT:    store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA4]]
 // CHECK-GFX1200-NEXT:    ret void
 //
 void test_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32(global v8f* out, v2i a, v2i b, v8f c)
@@ -144,13 +154,19 @@ void test_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32(global v8f* out, v2i a, v2i b, v8
   *out = __builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12(a, b, c);
 }
 
-// CHECK-GFX1200-LABEL: @test_amdgcn_wmma_i32_16x16x32_iu4_w32(
-// CHECK-GFX1200-NEXT:  entry:
-// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x32.iu4.v8i32.v2i32(i1 true, <2 x i32> [[A:%.*]], i1 true, <2 x i32> [[B:%.*]], <8 x i32> [[C:%.*]], i1 false)
-// CHECK-GFX1200-NEXT:    store <8 x i32> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]]
+// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_wmma_i32_16x16x32_iu4_w32(
+// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <8 x i32> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-GFX1200-NEXT:  [[ENTRY:.*:]]
+// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x32.iu4.v8i32.v2i32(i1 true, <2 x i32> [[A]], i1 true, <2 x i32> [[B]], <8 x i32> [[C]], i1 false)
+// CHECK-GFX1200-NEXT:    store <8 x i32> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA4]]
 // CHECK-GFX1200-NEXT:    ret void
 //
 void test_amdgcn_wmma_i32_16x16x32_iu4_w32(global v8i* out, v2i a, v2i b, v8i c)
 {
   *out = __builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12(true, a, true, b, c, false);
 }
+//.
+// CHECK-GFX1200: [[CHAR_TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0}
+// CHECK-GFX1200: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0}
+// CHECK-GFX1200: [[META6]] = !{!"Simple C/C++ TBAA"}
+//.
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12-wmma-w64.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12-wmma-w64.cl
index 1c1d273..8b5b315 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12-wmma-w64.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12-wmma-w64.cl
@@ -1,4 +1,4 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
 // REQUIRES: amdgpu-registered-target
 // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1200 -target-feature +wavefrontsize64 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-GFX1200
 
@@ -13,10 +13,11 @@ typedef int    v4i   __attribute__((ext_vector_type(4)));
 // amdgcn_wmma_f32_16x16x16_f16
 //
 
-// CHECK-GFX1200-LABEL: @test_amdgcn_wmma_f32_16x16x16_f16_w64(
-// CHECK-GFX1200-NEXT:  entry:
-// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v4f32.v4f16(<4 x half> [[A:%.*]], <4 x half> [[B:%.*]], <4 x float> [[C:%.*]])
-// CHECK-GFX1200-NEXT:    store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4:![0-9]+]]
+// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_wmma_f32_16x16x16_f16_w64(
+// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], <4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]], <4 x float> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-GFX1200-NEXT:  [[ENTRY:.*:]]
+// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v4f32.v4f16(<4 x half> [[A]], <4 x half> [[B]], <4 x float> [[C]])
+// CHECK-GFX1200-NEXT:    store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4:![0-9]+]]
 // CHECK-GFX1200-NEXT:    ret void
 //
 void test_amdgcn_wmma_f32_16x16x16_f16_w64(global v4f* out, v4h a, v4h b, v4f c)
@@ -28,10 +29,11 @@ void test_amdgcn_wmma_f32_16x16x16_f16_w64(global v4f* out, v4h a, v4h b, v4f c)
 // amdgcn_wmma_f32_16x16x16_bf16
 //
 
-// CHECK-GFX1200-LABEL: @test_amdgcn_wmma_f32_16x16x16_bf16_w64(
-// CHECK-GFX1200-NEXT:  entry:
-// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16.v4f32.v4i16(<4 x i16> [[A:%.*]], <4 x i16> [[B:%.*]], <4 x float> [[C:%.*]])
-// CHECK-GFX1200-NEXT:    store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]]
+// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_wmma_f32_16x16x16_bf16_w64(
+// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]], <4 x float> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-GFX1200-NEXT:  [[ENTRY:.*:]]
+// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16.v4f32.v4i16(<4 x i16> [[A]], <4 x i16> [[B]], <4 x float> [[C]])
+// CHECK-GFX1200-NEXT:    store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4]]
 // CHECK-GFX1200-NEXT:    ret void
 //
 void test_amdgcn_wmma_f32_16x16x16_bf16_w64(global v4f* out, v4s a, v4s b, v4f c)
@@ -43,10 +45,11 @@ void test_amdgcn_wmma_f32_16x16x16_bf16_w64(global v4f* out, v4s a, v4s b, v4f c
 // amdgcn_wmma_f16_16x16x16_f16
 //
 
-// CHECK-GFX1200-LABEL: @test_amdgcn_wmma_f16_16x16x16_f16_w64(
-// CHECK-GFX1200-NEXT:  entry:
-// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <4 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v4f16.v4f16(<4 x half> [[A:%.*]], <4 x half> [[B:%.*]], <4 x half> [[C:%.*]], i1 false)
-// CHECK-GFX1200-NEXT:    store <4 x half> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 8, !tbaa [[TBAA4]]
+// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_wmma_f16_16x16x16_f16_w64(
+// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 8)) [[OUT:%.*]], <4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]], <4 x half> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-GFX1200-NEXT:  [[ENTRY:.*:]]
+// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <4 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v4f16.v4f16(<4 x half> [[A]], <4 x half> [[B]], <4 x half> [[C]], i1 false)
+// CHECK-GFX1200-NEXT:    store <4 x half> [[TMP0]], ptr addrspace(1) [[OUT]], align 8, !tbaa [[CHAR_TBAA4]]
 // CHECK-GFX1200-NEXT:    ret void
 //
 void test_amdgcn_wmma_f16_16x16x16_f16_w64(global v4h* out, v4h a, v4h b, v4h c)
@@ -58,10 +61,11 @@ void test_amdgcn_wmma_f16_16x16x16_f16_w64(global v4h* out, v4h a, v4h b, v4h c)
 // amdgcn_wmma_bf16_16x16x16_bf16
 //
 
-// CHECK-GFX1200-LABEL: @test_amdgcn_wmma_bf16_16x16x16_bf16_w64(
-// CHECK-GFX1200-NEXT:  entry:
-// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <4 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16.v4i16.v4i16(<4 x i16> [[A:%.*]], <4 x i16> [[B:%.*]], <4 x i16> [[C:%.*]], i1 false)
-// CHECK-GFX1200-NEXT:    store <4 x i16> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 8, !tbaa [[TBAA4]]
+// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_wmma_bf16_16x16x16_bf16_w64(
+// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 8)) [[OUT:%.*]], <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]], <4 x i16> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-GFX1200-NEXT:  [[ENTRY:.*:]]
+// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <4 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16.v4i16.v4i16(<4 x i16> [[A]], <4 x i16> [[B]], <4 x i16> [[C]], i1 false)
+// CHECK-GFX1200-NEXT:    store <4 x i16> [[TMP0]], ptr addrspace(1) [[OUT]], align 8, !tbaa [[CHAR_TBAA4]]
 // CHECK-GFX1200-NEXT:    ret void
 //
 void test_amdgcn_wmma_bf16_16x16x16_bf16_w64(global v4s* out, v4s a, v4s b, v4s c)
@@ -73,10 +77,11 @@ void test_amdgcn_wmma_bf16_16x16x16_bf16_w64(global v4s* out, v4s a, v4s b, v4s
 // amdgcn_wmma_i32_16x16x16_iu8
 //
 
-// CHECK-GFX1200-LABEL: @test_amdgcn_wmma_i32_16x16x16_iu8_w64(
-// CHECK-GFX1200-NEXT:  entry:
-// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8.v4i32.i32(i1 true, i32 [[A:%.*]], i1 true, i32 [[B:%.*]], <4 x i32> [[C:%.*]], i1 false)
-// CHECK-GFX1200-NEXT:    store <4 x i32> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]]
+// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_wmma_i32_16x16x16_iu8_w64(
+// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]], <4 x i32> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-GFX1200-NEXT:  [[ENTRY:.*:]]
+// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8.v4i32.i32(i1 true, i32 [[A]], i1 true, i32 [[B]], <4 x i32> [[C]], i1 false)
+// CHECK-GFX1200-NEXT:    store <4 x i32> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4]]
 // CHECK-GFX1200-NEXT:    ret void
 //
 void test_amdgcn_wmma_i32_16x16x16_iu8_w64(global v4i* out, int a, int b, v4i c)
@@ -88,10 +93,11 @@ void test_amdgcn_wmma_i32_16x16x16_iu8_w64(global v4i* out, int a, int b, v4i c)
 // amdgcn_wmma_i32_16x16x16_iu4
 //
 
-// CHECK-GFX1200-LABEL: @test_amdgcn_wmma_i32_16x16x16_iu4_w64(
-// CHECK-GFX1200-NEXT:  entry:
-// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4.v4i32.i32(i1 true, i32 [[A:%.*]], i1 true, i32 [[B:%.*]], <4 x i32> [[C:%.*]], i1 false)
-// CHECK-GFX1200-NEXT:    store <4 x i32> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]]
+// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_wmma_i32_16x16x16_iu4_w64(
+// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]], <4 x i32> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-GFX1200-NEXT:  [[ENTRY:.*:]]
+// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4.v4i32.i32(i1 true, i32 [[A]], i1 true, i32 [[B]], <4 x i32> [[C]], i1 false)
+// CHECK-GFX1200-NEXT:    store <4 x i32> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4]]
 // CHECK-GFX1200-NEXT:    ret void
 //
 void test_amdgcn_wmma_i32_16x16x16_iu4_w64(global v4i* out, int a, int b, v4i c)
@@ -99,10 +105,11 @@ void test_amdgcn_wmma_i32_16x16x16_iu4_w64(global v4i* out, int a, int b, v4i c)
   *out = __builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12(true, a, true, b, c, false);
 }
 
-// CHECK-GFX1200-LABEL: @test_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32(
-// CHECK-GFX1200-NEXT:  entry:
-// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.fp8.v4f32.i32(i32 [[A:%.*]], i32 [[B:%.*]], <4 x float> [[C:%.*]])
-// CHECK-GFX1200-NEXT:    store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]]
+// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32(
+// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]], <4 x float> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-GFX1200-NEXT:  [[ENTRY:.*:]]
+// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.fp8.v4f32.i32(i32 [[A]], i32 [[B]], <4 x float> [[C]])
+// CHECK-GFX1200-NEXT:    store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4]]
 // CHECK-GFX1200-NEXT:    ret void
 //
 void test_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32(global v4f* out, int a, int b, v4f c)
@@ -110,10 +117,11 @@ void test_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32(global v4f* out, int a, int b, v4
   *out = __builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12(a, b, c);
 }
 
-// CHECK-GFX1200-LABEL: @test_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32(
-// CHECK-GFX1200-NEXT:  entry:
-// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.bf8.v4f32.i32(i32 [[A:%.*]], i32 [[B:%.*]], <4 x float> [[C:%.*]])
-// CHECK-GFX1200-NEXT:    store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]]
+// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32(
+// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]], <4 x float> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-GFX1200-NEXT:  [[ENTRY:.*:]]
+// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.bf8.v4f32.i32(i32 [[A]], i32 [[B]], <4 x float> [[C]])
+// CHECK-GFX1200-NEXT:    store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4]]
 // CHECK-GFX1200-NEXT:    ret void
 //
 void test_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32(global v4f* out, int a, int b, v4f c)
@@ -121,10 +129,11 @@ void test_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32(global v4f* out, int a, int b, v4
   *out = __builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12(a, b, c);
 }
 
-// CHECK-GFX1200-LABEL: @test_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32(
-// CHECK-GFX1200-NEXT:  entry:
-// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.fp8.v4f32.i32(i32 [[A:%.*]], i32 [[B:%.*]], <4 x float> [[C:%.*]])
-// CHECK-GFX1200-NEXT:    store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]]
+// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32(
+// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]], <4 x float> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-GFX1200-NEXT:  [[ENTRY:.*:]]
+// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.fp8.v4f32.i32(i32 [[A]], i32 [[B]], <4 x float> [[C]])
+// CHECK-GFX1200-NEXT:    store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4]]
 // CHECK-GFX1200-NEXT:    ret void
 //
 void test_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32(global v4f* out, int a, int b, v4f c)
@@ -132,10 +141,11 @@ void test_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32(global v4f* out, int a, int b, v4
   *out = __builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12(a, b, c);
 }
 
-// CHECK-GFX1200-LABEL: @test_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32(
-// CHECK-GFX1200-NEXT:  entry:
-// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.bf8.v4f32.i32(i32 [[A:%.*]], i32 [[B:%.*]], <4 x float> [[C:%.*]])
-// CHECK-GFX1200-NEXT:    store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]]
+// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32(
+// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]], <4 x float> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-GFX1200-NEXT:  [[ENTRY:.*:]]
+// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.bf8.v4f32.i32(i32 [[A]], i32 [[B]], <4 x float> [[C]])
+// CHECK-GFX1200-NEXT:    store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4]]
 // CHECK-GFX1200-NEXT:    ret void
 //
 void test_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32(global v4f* out, int a, int b, v4f c)
@@ -143,13 +153,19 @@ void test_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32(global v4f* out, int a, int b, v4
   *out = __builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12(a, b, c);
 }
 
-// CHECK-GFX1200-LABEL: @test_amdgcn_wmma_i32_16x16x32_iu4_w32(
-// CHECK-GFX1200-NEXT:  entry:
-// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x32.iu4.v4i32.i32(i1 true, i32 [[A:%.*]], i1 true, i32 [[B:%.*]], <4 x i32> [[C:%.*]], i1 false)
-// CHECK-GFX1200-NEXT:    store <4 x i32> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]]
+// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_wmma_i32_16x16x32_iu4_w32(
+// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]], <4 x i32> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-GFX1200-NEXT:  [[ENTRY:.*:]]
+// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x32.iu4.v4i32.i32(i1 true, i32 [[A]], i1 true, i32 [[B]], <4 x i32> [[C]], i1 false)
+// CHECK-GFX1200-NEXT:    store <4 x i32> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4]]
 // CHECK-GFX1200-NEXT:    ret void
 //
 void test_amdgcn_wmma_i32_16x16x32_iu4_w32(global v4i* out, int a, int b, v4i c)
 {
   *out = __builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12(true, a, true, b, c, false);
 }
+//.
+// CHECK-GFX1200: [[CHAR_TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0}
+// CHECK-GFX1200: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0}
+// CHECK-GFX1200: [[META6]] = !{!"Simple C/C++ TBAA"}
+//.
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-async-load-store-lds.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-async-load-store-lds.cl
index c645d52..e03ae66 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-async-load-store-lds.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-async-load-store-lds.cl
@@ -1,13 +1,14 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
 // REQUIRES: amdgpu-registered-target
 // RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu gfx1250 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-GFX1250
 
 typedef int    v2i   __attribute__((ext_vector_type(2)));
 typedef int    v4i   __attribute__((ext_vector_type(4)));
 
-// CHECK-GFX1250-LABEL: @test_amdgcn_cluster_load_async_to_lds_b8(
-// CHECK-GFX1250-NEXT:  entry:
-// CHECK-GFX1250-NEXT:    tail call void @llvm.amdgcn.cluster.load.async.to.lds.b8(ptr addrspace(1) [[GADDR:%.*]], ptr addrspace(3) [[LADDR:%.*]], i32 16, i32 0, i32 [[MASK:%.*]])
+// CHECK-GFX1250-LABEL: define dso_local void @test_amdgcn_cluster_load_async_to_lds_b8(
+// CHECK-GFX1250-SAME: ptr addrspace(1) noundef readonly captures(none) [[GADDR:%.*]], ptr addrspace(3) noundef writeonly captures(none) [[LADDR:%.*]], i32 noundef [[MASK:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-GFX1250-NEXT:  [[ENTRY:.*:]]
+// CHECK-GFX1250-NEXT:    tail call void @llvm.amdgcn.cluster.load.async.to.lds.b8(ptr addrspace(1) [[GADDR]], ptr addrspace(3) [[LADDR]], i32 16, i32 0, i32 [[MASK]])
 // CHECK-GFX1250-NEXT:    ret void
 //
 void test_amdgcn_cluster_load_async_to_lds_b8(global char* gaddr, local char* laddr, int mask)
@@ -15,9 +16,10 @@ void test_amdgcn_cluster_load_async_to_lds_b8(global char* gaddr, local char* la
   __builtin_amdgcn_cluster_load_async_to_lds_b8(gaddr, laddr, 16, 0, mask);
 }
 
-// CHECK-GFX1250-LABEL: @test_amdgcn_cluster_load_async_to_lds_b32(
-// CHECK-GFX1250-NEXT:  entry:
-// CHECK-GFX1250-NEXT:    tail call void @llvm.amdgcn.cluster.load.async.to.lds.b32(ptr addrspace(1) [[GADDR:%.*]], ptr addrspace(3) [[LADDR:%.*]], i32 16, i32 0, i32 [[MASK:%.*]])
+// CHECK-GFX1250-LABEL: define dso_local void @test_amdgcn_cluster_load_async_to_lds_b32(
+// CHECK-GFX1250-SAME: ptr addrspace(1) noundef readonly captures(none) [[GADDR:%.*]], ptr addrspace(3) noundef writeonly captures(none) [[LADDR:%.*]], i32 noundef [[MASK:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-GFX1250-NEXT:  [[ENTRY:.*:]]
+// CHECK-GFX1250-NEXT:    tail call void @llvm.amdgcn.cluster.load.async.to.lds.b32(ptr addrspace(1) [[GADDR]], ptr addrspace(3) [[LADDR]], i32 16, i32 0, i32 [[MASK]])
 // CHECK-GFX1250-NEXT:    ret void
 //
 void test_amdgcn_cluster_load_async_to_lds_b32(global int* gaddr, local int* laddr, int mask)
@@ -25,9 +27,10 @@ void test_amdgcn_cluster_load_async_to_lds_b32(global int* gaddr, local int* lad
   __builtin_amdgcn_cluster_load_async_to_lds_b32(gaddr, laddr, 16, 0, mask);
 }
 
-// CHECK-GFX1250-LABEL: @test_amdgcn_cluster_load_async_to_lds_b64(
-// CHECK-GFX1250-NEXT:  entry:
-// CHECK-GFX1250-NEXT:    tail call void @llvm.amdgcn.cluster.load.async.to.lds.b64(ptr addrspace(1) [[GADDR:%.*]], ptr addrspace(3) [[LADDR:%.*]], i32 16, i32 0, i32 [[MASK:%.*]])
+// CHECK-GFX1250-LABEL: define dso_local void @test_amdgcn_cluster_load_async_to_lds_b64(
+// CHECK-GFX1250-SAME: ptr addrspace(1) noundef readonly captures(none) [[GADDR:%.*]], ptr addrspace(3) noundef writeonly captures(none) [[LADDR:%.*]], i32 noundef [[MASK:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-GFX1250-NEXT:  [[ENTRY:.*:]]
+// CHECK-GFX1250-NEXT:    tail call void @llvm.amdgcn.cluster.load.async.to.lds.b64(ptr addrspace(1) [[GADDR]], ptr addrspace(3) [[LADDR]], i32 16, i32 0, i32 [[MASK]])
 // CHECK-GFX1250-NEXT:    ret void
 //
 void test_amdgcn_cluster_load_async_to_lds_b64(global v2i* gaddr, local v2i* laddr, int mask)
@@ -35,9 +38,10 @@ void test_amdgcn_cluster_load_async_to_lds_b64(global v2i* gaddr, local v2i* lad
   __builtin_amdgcn_cluster_load_async_to_lds_b64(gaddr, laddr, 16, 0, mask);
 }
 
-// CHECK-GFX1250-LABEL: @test_amdgcn_cluster_load_async_to_lds_b128(
-// CHECK-GFX1250-NEXT:  entry:
-// CHECK-GFX1250-NEXT:    tail call void @llvm.amdgcn.cluster.load.async.to.lds.b128(ptr addrspace(1) [[GADDR:%.*]], ptr addrspace(3) [[LADDR:%.*]], i32 16, i32 0, i32 [[MASK:%.*]])
+// CHECK-GFX1250-LABEL: define dso_local void @test_amdgcn_cluster_load_async_to_lds_b128(
+// CHECK-GFX1250-SAME: ptr addrspace(1) noundef readonly captures(none) [[GADDR:%.*]], ptr addrspace(3) noundef writeonly captures(none) [[LADDR:%.*]], i32 noundef [[MASK:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-GFX1250-NEXT:  [[ENTRY:.*:]]
+// CHECK-GFX1250-NEXT:    tail call void @llvm.amdgcn.cluster.load.async.to.lds.b128(ptr addrspace(1) [[GADDR]], ptr addrspace(3) [[LADDR]], i32 16, i32 0, i32 [[MASK]])
 // CHECK-GFX1250-NEXT:    ret void
 //
 void test_amdgcn_cluster_load_async_to_lds_b128(global v4i* gaddr, local v4i* laddr, int mask)
@@ -45,9 +49,10 @@ void test_amdgcn_cluster_load_async_to_lds_b128(global v4i* gaddr, local v4i* la
   __builtin_amdgcn_cluster_load_async_to_lds_b128(gaddr, laddr, 16, 0, mask);
 }
 
-// CHECK-GFX1250-LABEL: @test_amdgcn_global_load_async_to_lds_b8(
-// CHECK-GFX1250-NEXT:  entry:
-// CHECK-GFX1250-NEXT:    tail call void @llvm.amdgcn.global.load.async.to.lds.b8(ptr addrspace(1) [[GADDR:%.*]], ptr addrspace(3) [[LADDR:%.*]], i32 16, i32 0)
+// CHECK-GFX1250-LABEL: define dso_local void @test_amdgcn_global_load_async_to_lds_b8(
+// CHECK-GFX1250-SAME: ptr addrspace(1) noundef readonly captures(none) [[GADDR:%.*]], ptr addrspace(3) noundef writeonly captures(none) [[LADDR:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-GFX1250-NEXT:  [[ENTRY:.*:]]
+// CHECK-GFX1250-NEXT:    tail call void @llvm.amdgcn.global.load.async.to.lds.b8(ptr addrspace(1) [[GADDR]], ptr addrspace(3) [[LADDR]], i32 16, i32 0)
 // CHECK-GFX1250-NEXT:    ret void
 //
 void test_amdgcn_global_load_async_to_lds_b8( global char* gaddr, local char* laddr)
@@ -55,9 +60,10 @@ void test_amdgcn_global_load_async_to_lds_b8( global char* gaddr, local char* la
   __builtin_amdgcn_global_load_async_to_lds_b8(gaddr, laddr, 16, 0);
 }
 
-// CHECK-GFX1250-LABEL: @test_amdgcn_global_load_async_to_lds_b32(
-// CHECK-GFX1250-NEXT:  entry:
-// CHECK-GFX1250-NEXT:    tail call void @llvm.amdgcn.global.load.async.to.lds.b32(ptr addrspace(1) [[GADDR:%.*]], ptr addrspace(3) [[LADDR:%.*]], i32 16, i32 0)
+// CHECK-GFX1250-LABEL: define dso_local void @test_amdgcn_global_load_async_to_lds_b32(
+// CHECK-GFX1250-SAME: ptr addrspace(1) noundef readonly captures(none) [[GADDR:%.*]], ptr addrspace(3) noundef writeonly captures(none) [[LADDR:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-GFX1250-NEXT:  [[ENTRY:.*:]]
+// CHECK-GFX1250-NEXT:    tail call void @llvm.amdgcn.global.load.async.to.lds.b32(ptr addrspace(1) [[GADDR]], ptr addrspace(3) [[LADDR]], i32 16, i32 0)
 // CHECK-GFX1250-NEXT:    ret void
 //
 void test_amdgcn_global_load_async_to_lds_b32(global int* gaddr, local int* laddr)
@@ -65,9 +71,10 @@ void test_amdgcn_global_load_async_to_lds_b32(global int* gaddr, local int* ladd
   __builtin_amdgcn_global_load_async_to_lds_b32(gaddr, laddr, 16, 0);
 }
 
-// CHECK-GFX1250-LABEL: @test_amdgcn_global_load_async_to_lds_b64(
-// CHECK-GFX1250-NEXT:  entry:
-// CHECK-GFX1250-NEXT:    tail call void @llvm.amdgcn.global.load.async.to.lds.b64(ptr addrspace(1) [[GADDR:%.*]], ptr addrspace(3) [[LADDR:%.*]], i32 16, i32 0)
+// CHECK-GFX1250-LABEL: define dso_local void @test_amdgcn_global_load_async_to_lds_b64(
+// CHECK-GFX1250-SAME: ptr addrspace(1) noundef readonly captures(none) [[GADDR:%.*]], ptr addrspace(3) noundef writeonly captures(none) [[LADDR:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-GFX1250-NEXT:  [[ENTRY:.*:]]
+// CHECK-GFX1250-NEXT:    tail call void @llvm.amdgcn.global.load.async.to.lds.b64(ptr addrspace(1) [[GADDR]], ptr addrspace(3) [[LADDR]], i32 16, i32 0)
 // CHECK-GFX1250-NEXT:    ret void
 //
 void test_amdgcn_global_load_async_to_lds_b64(global v2i* gaddr, local v2i* laddr)
@@ -75,9 +82,10 @@ void test_amdgcn_global_load_async_to_lds_b64(global v2i* gaddr, local v2i* ladd
   __builtin_amdgcn_global_load_async_to_lds_b64(gaddr, laddr, 16, 0);
 }
 
-// CHECK-GFX1250-LABEL: @test_amdgcn_global_load_async_to_lds_b128(
-// CHECK-GFX1250-NEXT:  entry:
-// CHECK-GFX1250-NEXT:    tail call void @llvm.amdgcn.global.load.async.to.lds.b128(ptr addrspace(1) [[GADDR:%.*]], ptr addrspace(3) [[LADDR:%.*]], i32 16, i32 0)
+// CHECK-GFX1250-LABEL: define dso_local void @test_amdgcn_global_load_async_to_lds_b128(
+// CHECK-GFX1250-SAME: ptr addrspace(1) noundef readonly captures(none) [[GADDR:%.*]], ptr addrspace(3) noundef writeonly captures(none) [[LADDR:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-GFX1250-NEXT:  [[ENTRY:.*:]]
+// CHECK-GFX1250-NEXT:    tail call void @llvm.amdgcn.global.load.async.to.lds.b128(ptr addrspace(1) [[GADDR]], ptr addrspace(3) [[LADDR]], i32 16, i32 0)
 // CHECK-GFX1250-NEXT:    ret void
 //
 void test_amdgcn_global_load_async_to_lds_b128( global v4i* gaddr, local v4i* laddr)
@@ -85,9 +93,10 @@ void test_amdgcn_global_load_async_to_lds_b128( global v4i* gaddr, local v4i* la
   __builtin_amdgcn_global_load_async_to_lds_b128(gaddr, laddr, 16, 0);
 }
 
-// CHECK-GFX1250-LABEL: @test_amdgcn_global_store_async_from_lds_b8(
-// CHECK-GFX1250-NEXT:  entry:
-// CHECK-GFX1250-NEXT:    tail call void @llvm.amdgcn.global.store.async.from.lds.b8(ptr addrspace(1) [[GADDR:%.*]], ptr addrspace(3) [[LADDR:%.*]], i32 16, i32 0)
+// CHECK-GFX1250-LABEL: define dso_local void @test_amdgcn_global_store_async_from_lds_b8(
+// CHECK-GFX1250-SAME: ptr addrspace(1) noundef writeonly captures(none) [[GADDR:%.*]], ptr addrspace(3) noundef readonly captures(none) [[LADDR:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-GFX1250-NEXT:  [[ENTRY:.*:]]
+// CHECK-GFX1250-NEXT:    tail call void @llvm.amdgcn.global.store.async.from.lds.b8(ptr addrspace(1) [[GADDR]], ptr addrspace(3) [[LADDR]], i32 16, i32 0)
 // CHECK-GFX1250-NEXT:    ret void
 //
 void test_amdgcn_global_store_async_from_lds_b8(global char* gaddr, local char* laddr)
@@ -95,9 +104,10 @@ void test_amdgcn_global_store_async_from_lds_b8(global char* gaddr, local char*
   __builtin_amdgcn_global_store_async_from_lds_b8(gaddr, laddr, 16, 0);
 }
 
-// CHECK-GFX1250-LABEL: @test_amdgcn_global_store_async_from_lds_b32(
-// CHECK-GFX1250-NEXT:  entry:
-// CHECK-GFX1250-NEXT:    tail call void @llvm.amdgcn.global.store.async.from.lds.b32(ptr addrspace(1) [[GADDR:%.*]], ptr addrspace(3) [[LADDR:%.*]], i32 16, i32 0)
+// CHECK-GFX1250-LABEL: define dso_local void @test_amdgcn_global_store_async_from_lds_b32(
+// CHECK-GFX1250-SAME: ptr addrspace(1) noundef writeonly captures(none) [[GADDR:%.*]], ptr addrspace(3) noundef readonly captures(none) [[LADDR:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-GFX1250-NEXT:  [[ENTRY:.*:]]
+// CHECK-GFX1250-NEXT:    tail call void @llvm.amdgcn.global.store.async.from.lds.b32(ptr addrspace(1) [[GADDR]], ptr addrspace(3) [[LADDR]], i32 16, i32 0)
 // CHECK-GFX1250-NEXT:    ret void
 //
 void test_amdgcn_global_store_async_from_lds_b32(global int* gaddr, local int* laddr)
@@ -105,9 +115,10 @@ void test_amdgcn_global_store_async_from_lds_b32(global int* gaddr, local int* l
   __builtin_amdgcn_global_store_async_from_lds_b32(gaddr, laddr, 16, 0);
 }
 
-// CHECK-GFX1250-LABEL: @test_amdgcn_global_store_async_from_lds_b64(
-// CHECK-GFX1250-NEXT:  entry:
-// CHECK-GFX1250-NEXT:    tail call void @llvm.amdgcn.global.store.async.from.lds.b64(ptr addrspace(1) [[GADDR:%.*]], ptr addrspace(3) [[LADDR:%.*]], i32 16, i32 0)
+// CHECK-GFX1250-LABEL: define dso_local void @test_amdgcn_global_store_async_from_lds_b64(
+// CHECK-GFX1250-SAME: ptr addrspace(1) noundef writeonly captures(none) [[GADDR:%.*]], ptr addrspace(3) noundef readonly captures(none) [[LADDR:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-GFX1250-NEXT:  [[ENTRY:.*:]]
+// CHECK-GFX1250-NEXT:    tail call void @llvm.amdgcn.global.store.async.from.lds.b64(ptr addrspace(1) [[GADDR]], ptr addrspace(3) [[LADDR]], i32 16, i32 0)
 // CHECK-GFX1250-NEXT:    ret void
 //
 void test_amdgcn_global_store_async_from_lds_b64(global v2i* gaddr, local v2i* laddr)
@@ -115,9 +126,10 @@ void test_amdgcn_global_store_async_from_lds_b64(global v2i* gaddr, local v2i* l
   __builtin_amdgcn_global_store_async_from_lds_b64(gaddr, laddr, 16, 0);
 }
 
-// CHECK-GFX1250-LABEL: @test_amdgcn_global_store_async_from_lds_b128(
-// CHECK-GFX1250-NEXT:  entry:
-// CHECK-GFX1250-NEXT:    tail call void @llvm.amdgcn.global.store.async.from.lds.b128(ptr addrspace(1) [[GADDR:%.*]], ptr addrspace(3) [[LADDR:%.*]], i32 16, i32 0)
+// CHECK-GFX1250-LABEL: define dso_local void @test_amdgcn_global_store_async_from_lds_b128(
+// CHECK-GFX1250-SAME: ptr addrspace(1) noundef writeonly captures(none) [[GADDR:%.*]], ptr addrspace(3) noundef readonly captures(none) [[LADDR:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-GFX1250-NEXT:  [[ENTRY:.*:]]
+// CHECK-GFX1250-NEXT:    tail call void @llvm.amdgcn.global.store.async.from.lds.b128(ptr addrspace(1) [[GADDR]], ptr addrspace(3) [[LADDR]], i32 16, i32 0)
 // CHECK-GFX1250-NEXT:    ret void
 //
 void test_amdgcn_global_store_async_from_lds_b128(global v4i* gaddr, local v4i* laddr)
@@ -125,9 +137,10 @@ void test_amdgcn_global_store_async_from_lds_b128(global v4i* gaddr, local v4i*
   __builtin_amdgcn_global_store_async_from_lds_b128(gaddr, laddr, 16, 0);
 }
 
-// CHECK-GFX1250-LABEL: @test_amdgcn_ds_atomic_async_barrier_arrive_b64(
-// CHECK-GFX1250-NEXT:  entry:
-// CHECK-GFX1250-NEXT:    tail call void @llvm.amdgcn.ds.atomic.async.barrier.arrive.b64(ptr addrspace(3) [[ADDR:%.*]])
+// CHECK-GFX1250-LABEL: define dso_local void @test_amdgcn_ds_atomic_async_barrier_arrive_b64(
+// CHECK-GFX1250-SAME: ptr addrspace(3) noundef [[ADDR:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] {
+// CHECK-GFX1250-NEXT:  [[ENTRY:.*:]]
+// CHECK-GFX1250-NEXT:    tail call void @llvm.amdgcn.ds.atomic.async.barrier.arrive.b64(ptr addrspace(3) [[ADDR]])
 // CHECK-GFX1250-NEXT:    ret void
 //
 void test_amdgcn_ds_atomic_async_barrier_arrive_b64(local long* addr)
@@ -135,13 +148,20 @@ void test_amdgcn_ds_atomic_async_barrier_arrive_b64(local long* addr)
   __builtin_amdgcn_ds_atomic_async_barrier_arrive_b64(addr);
 }
 
-// CHECK-GFX1250-LABEL: @test_amdgcn_ds_atomic_barrier_arrive_rtn_b64(
-// CHECK-GFX1250-NEXT:  entry:
-// CHECK-GFX1250-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.amdgcn.ds.atomic.barrier.arrive.rtn.b64(ptr addrspace(3) [[ADDR:%.*]], i64 [[DATA:%.*]])
-// CHECK-GFX1250-NEXT:    store i64 [[TMP0]], ptr [[OUT:%.*]], align 8, !tbaa [[TBAA4:![0-9]+]]
+// CHECK-GFX1250-LABEL: define dso_local void @test_amdgcn_ds_atomic_barrier_arrive_rtn_b64(
+// CHECK-GFX1250-SAME: ptr addrspace(3) noundef captures(none) [[ADDR:%.*]], i64 noundef [[DATA:%.*]], ptr noundef writeonly captures(none) initializes((0, 8)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR4:[0-9]+]] {
+// CHECK-GFX1250-NEXT:  [[ENTRY:.*:]]
+// CHECK-GFX1250-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.amdgcn.ds.atomic.barrier.arrive.rtn.b64(ptr addrspace(3) [[ADDR]], i64 [[DATA]])
+// CHECK-GFX1250-NEXT:    store i64 [[TMP0]], ptr [[OUT]], align 8, !tbaa [[LONG_TBAA4:![0-9]+]]
 // CHECK-GFX1250-NEXT:    ret void
 //
 void test_amdgcn_ds_atomic_barrier_arrive_rtn_b64(local long* addr, long data, long *out)
 {
   *out = __builtin_amdgcn_ds_atomic_barrier_arrive_rtn_b64(addr, data);
 }
+//.
+// CHECK-GFX1250: [[LONG_TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0}
+// CHECK-GFX1250: [[META5]] = !{!"long", [[META6:![0-9]+]], i64 0}
+// CHECK-GFX1250: [[META6]] = !{!"omnipotent char", [[META7:![0-9]+]], i64 0}
+// CHECK-GFX1250: [[META7]] = !{!"Simple C/C++ TBAA"}
+//.
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-swmmac-w32.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-swmmac-w32.cl
index 9927bb33..2143901 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-swmmac-w32.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-swmmac-w32.cl
@@ -1,4 +1,4 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
 // REQUIRES: amdgpu-registered-target
 // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1200 -target-feature +wavefrontsize32 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-GFX1200
 
@@ -13,10 +13,11 @@ typedef short v16s   __attribute__((ext_vector_type(16)));
 
 // Wave32
 
-// CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_f32_16x16x32_f16_w32(
-// CHECK-GFX1200-NEXT:  entry:
-// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.f16.v8f32.v8f16.v16f16.i32(<8 x half> [[A:%.*]], <16 x half> [[B:%.*]], <8 x float> [[C:%.*]], i32 [[INDEX:%.*]])
-// CHECK-GFX1200-NEXT:    store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4:![0-9]+]]
+// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_swmmac_f32_16x16x32_f16_w32(
+// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <8 x half> noundef [[A:%.*]], <16 x half> noundef [[B:%.*]], <8 x float> noundef [[C:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-GFX1200-NEXT:  [[ENTRY:.*:]]
+// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.f16.v8f32.v8f16.v16f16.i32(<8 x half> [[A]], <16 x half> [[B]], <8 x float> [[C]], i32 [[INDEX]])
+// CHECK-GFX1200-NEXT:    store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA4:![0-9]+]]
 // CHECK-GFX1200-NEXT:    ret void
 //
 void test_amdgcn_swmmac_f32_16x16x32_f16_w32(global v8f* out, v8h a, v16h b, v8f c, int index)
@@ -24,10 +25,11 @@ void test_amdgcn_swmmac_f32_16x16x32_f16_w32(global v8f* out, v8h a, v16h b, v8f
   *out = __builtin_amdgcn_swmmac_f32_16x16x32_f16_w32(a, b, c, index);
 }
 
-// CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_f32_16x16x32_bf16_w32(
-// CHECK-GFX1200-NEXT:  entry:
-// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf16.v8f32.v8i16.v16i16.i32(<8 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <8 x float> [[C:%.*]], i32 [[INDEX:%.*]])
-// CHECK-GFX1200-NEXT:    store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]]
+// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_swmmac_f32_16x16x32_bf16_w32(
+// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <8 x i16> noundef [[A:%.*]], <16 x i16> noundef [[B:%.*]], <8 x float> noundef [[C:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-GFX1200-NEXT:  [[ENTRY:.*:]]
+// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf16.v8f32.v8i16.v16i16.i32(<8 x i16> [[A]], <16 x i16> [[B]], <8 x float> [[C]], i32 [[INDEX]])
+// CHECK-GFX1200-NEXT:    store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA4]]
 // CHECK-GFX1200-NEXT:    ret void
 //
 void test_amdgcn_swmmac_f32_16x16x32_bf16_w32(global v8f* out, v8s a, v16s b, v8f c, int index)
@@ -35,10 +37,11 @@ void test_amdgcn_swmmac_f32_16x16x32_bf16_w32(global v8f* out, v8s a, v16s b, v8
   *out = __builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32(a, b, c, index);
 }
 
-// CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_f16_16x16x32_f16_w32(
-// CHECK-GFX1200-NEXT:  entry:
-// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <8 x half> @llvm.amdgcn.swmmac.f16.16x16x32.f16.v8f16.v8f16.v16f16.i32(<8 x half> [[A:%.*]], <16 x half> [[B:%.*]], <8 x half> [[C:%.*]], i32 [[INDEX:%.*]])
-// CHECK-GFX1200-NEXT:    store <8 x half> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]]
+// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_swmmac_f16_16x16x32_f16_w32(
+// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], <8 x half> noundef [[A:%.*]], <16 x half> noundef [[B:%.*]], <8 x half> noundef [[C:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-GFX1200-NEXT:  [[ENTRY:.*:]]
+// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <8 x half> @llvm.amdgcn.swmmac.f16.16x16x32.f16.v8f16.v8f16.v16f16.i32(<8 x half> [[A]], <16 x half> [[B]], <8 x half> [[C]], i32 [[INDEX]])
+// CHECK-GFX1200-NEXT:    store <8 x half> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4]]
 // CHECK-GFX1200-NEXT:    ret void
 //
 void test_amdgcn_swmmac_f16_16x16x32_f16_w32(global v8h* out, v8h a, v16h b, v8h c, int index)
@@ -46,10 +49,11 @@ void test_amdgcn_swmmac_f16_16x16x32_f16_w32(global v8h* out, v8h a, v16h b, v8h
   *out = __builtin_amdgcn_swmmac_f16_16x16x32_f16_w32(a, b, c, index);
 }
 
-// CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_bf16_16x16x32_bf16_w32(
-// CHECK-GFX1200-NEXT:  entry:
-// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <8 x i16> @llvm.amdgcn.swmmac.bf16.16x16x32.bf16.v8i16.v8i16.v16i16.i32(<8 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <8 x i16> [[C:%.*]], i32 [[INDEX:%.*]])
-// CHECK-GFX1200-NEXT:    store <8 x i16> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]]
+// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_swmmac_bf16_16x16x32_bf16_w32(
+// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], <8 x i16> noundef [[A:%.*]], <16 x i16> noundef [[B:%.*]], <8 x i16> noundef [[C:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-GFX1200-NEXT:  [[ENTRY:.*:]]
+// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <8 x i16> @llvm.amdgcn.swmmac.bf16.16x16x32.bf16.v8i16.v8i16.v16i16.i32(<8 x i16> [[A]], <16 x i16> [[B]], <8 x i16> [[C]], i32 [[INDEX]])
+// CHECK-GFX1200-NEXT:    store <8 x i16> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4]]
 // CHECK-GFX1200-NEXT:    ret void
 //
 void test_amdgcn_swmmac_bf16_16x16x32_bf16_w32(global v8s* out, v8s a, v16s b, v8s c, int index)
@@ -57,10 +61,11 @@ void test_amdgcn_swmmac_bf16_16x16x32_bf16_w32(global v8s* out, v8s a, v16s b, v
   *out = __builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32(a, b, c, index);
 }
 
-// CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_i32_16x16x32_iu8_w32(
-// CHECK-GFX1200-NEXT:  entry:
-// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu8.v8i32.v2i32.v4i32.i32(i1 true, <2 x i32> [[A:%.*]], i1 true, <4 x i32> [[B:%.*]], <8 x i32> [[C:%.*]], i32 [[INDEX:%.*]], i1 true)
-// CHECK-GFX1200-NEXT:    store <8 x i32> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]]
+// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_swmmac_i32_16x16x32_iu8_w32(
+// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <2 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]], <8 x i32> noundef [[C:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-GFX1200-NEXT:  [[ENTRY:.*:]]
+// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu8.v8i32.v2i32.v4i32.i32(i1 true, <2 x i32> [[A]], i1 true, <4 x i32> [[B]], <8 x i32> [[C]], i32 [[INDEX]], i1 true)
+// CHECK-GFX1200-NEXT:    store <8 x i32> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA4]]
 // CHECK-GFX1200-NEXT:    ret void
 //
 void test_amdgcn_swmmac_i32_16x16x32_iu8_w32(global v8i* out, v2i a, v4i b, v8i c, int index)
@@ -68,10 +73,11 @@ void test_amdgcn_swmmac_i32_16x16x32_iu8_w32(global v8i* out, v2i a, v4i b, v8i
   *out = __builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32(true, a, true, b, c, index, true);
 }
 
-// CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_i32_16x16x32_iu4_w32(
-// CHECK-GFX1200-NEXT:  entry:
-// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu4.v8i32.i32.v2i32.i32(i1 true, i32 [[A:%.*]], i1 true, <2 x i32> [[B:%.*]], <8 x i32> [[C:%.*]], i32 [[INDEX:%.*]], i1 true)
-// CHECK-GFX1200-NEXT:    store <8 x i32> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]]
+// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_swmmac_i32_16x16x32_iu4_w32(
+// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], i32 noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <8 x i32> noundef [[C:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-GFX1200-NEXT:  [[ENTRY:.*:]]
+// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu4.v8i32.i32.v2i32.i32(i1 true, i32 [[A]], i1 true, <2 x i32> [[B]], <8 x i32> [[C]], i32 [[INDEX]], i1 true)
+// CHECK-GFX1200-NEXT:    store <8 x i32> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA4]]
 // CHECK-GFX1200-NEXT:    ret void
 //
 void test_amdgcn_swmmac_i32_16x16x32_iu4_w32(global v8i* out, int a, v2i b, v8i c, int index)
@@ -79,10 +85,11 @@ void test_amdgcn_swmmac_i32_16x16x32_iu4_w32(global v8i* out, int a, v2i b, v8i
   *out = __builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32(true, a, true, b, c, index, true);
 }
 
-// CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_i32_16x16x64_iu4_w32(
-// CHECK-GFX1200-NEXT:  entry:
-// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x64.iu4.v8i32.v2i32.v4i32.i32(i1 true, <2 x i32> [[A:%.*]], i1 true, <4 x i32> [[B:%.*]], <8 x i32> [[C:%.*]], i32 [[INDEX:%.*]], i1 true)
-// CHECK-GFX1200-NEXT:    store <8 x i32> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]]
+// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_swmmac_i32_16x16x64_iu4_w32(
+// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <2 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]], <8 x i32> noundef [[C:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-GFX1200-NEXT:  [[ENTRY:.*:]]
+// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x64.iu4.v8i32.v2i32.v4i32.i32(i1 true, <2 x i32> [[A]], i1 true, <4 x i32> [[B]], <8 x i32> [[C]], i32 [[INDEX]], i1 true)
+// CHECK-GFX1200-NEXT:    store <8 x i32> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA4]]
 // CHECK-GFX1200-NEXT:    ret void
 //
 void test_amdgcn_swmmac_i32_16x16x64_iu4_w32(global v8i* out, v2i a, v4i b, v8i c, int index)
@@ -90,10 +97,11 @@ void test_amdgcn_swmmac_i32_16x16x64_iu4_w32(global v8i* out, v2i a, v4i b, v8i
   *out = __builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32(true, a, true, b, c, index, true);
 }
 
-// CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32(
-// CHECK-GFX1200-NEXT:  entry:
-// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.fp8.v8f32.v2i32.v4i32.i32(<2 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <8 x float> [[C:%.*]], i32 [[INDEX:%.*]])
-// CHECK-GFX1200-NEXT:    store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]]
+// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32(
+// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <2 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]], <8 x float> noundef [[C:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-GFX1200-NEXT:  [[ENTRY:.*:]]
+// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.fp8.v8f32.v2i32.v4i32.i32(<2 x i32> [[A]], <4 x i32> [[B]], <8 x float> [[C]], i32 [[INDEX]])
+// CHECK-GFX1200-NEXT:    store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA4]]
 // CHECK-GFX1200-NEXT:    ret void
 //
 void test_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32(global v8f* out, v2i a, v4i b, v8f c, int index)
@@ -101,10 +109,11 @@ void test_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32(global v8f* out, v2i a, v4i b,
   *out = __builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32(a, b, c, index);
 }
 
-// CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32(
-// CHECK-GFX1200-NEXT:  entry:
-// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.bf8.v8f32.v2i32.v4i32.i32(<2 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <8 x float> [[C:%.*]], i32 [[INDEX:%.*]])
-// CHECK-GFX1200-NEXT:    store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]]
+// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32(
+// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <2 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]], <8 x float> noundef [[C:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-GFX1200-NEXT:  [[ENTRY:.*:]]
+// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.bf8.v8f32.v2i32.v4i32.i32(<2 x i32> [[A]], <4 x i32> [[B]], <8 x float> [[C]], i32 [[INDEX]])
+// CHECK-GFX1200-NEXT:    store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA4]]
 // CHECK-GFX1200-NEXT:    ret void
 //
 void test_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32(global v8f* out, v2i a, v4i b, v8f c, int index)
@@ -112,10 +121,11 @@ void test_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32(global v8f* out, v2i a, v4i b,
   *out = __builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32(a, b, c, index);
 }
 
-// CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32(
-// CHECK-GFX1200-NEXT:  entry:
-// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.fp8.v8f32.v2i32.v4i32.i32(<2 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <8 x float> [[C:%.*]], i32 [[INDEX:%.*]])
-// CHECK-GFX1200-NEXT:    store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]]
+// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32(
+// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <2 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]], <8 x float> noundef [[C:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-GFX1200-NEXT:  [[ENTRY:.*:]]
+// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.fp8.v8f32.v2i32.v4i32.i32(<2 x i32> [[A]], <4 x i32> [[B]], <8 x float> [[C]], i32 [[INDEX]])
+// CHECK-GFX1200-NEXT:    store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA4]]
 // CHECK-GFX1200-NEXT:    ret void
 //
 void test_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32(global v8f* out, v2i a, v4i b, v8f c, int index)
@@ -123,13 +133,19 @@ void test_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32(global v8f* out, v2i a, v4i b,
   *out = __builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32(a, b, c, index);
 }
 
-// CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32(
-// CHECK-GFX1200-NEXT:  entry:
-// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.bf8.v8f32.v2i32.v4i32.i32(<2 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <8 x float> [[C:%.*]], i32 [[INDEX:%.*]])
-// CHECK-GFX1200-NEXT:    store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]]
+// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32(
+// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <2 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]], <8 x float> noundef [[C:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-GFX1200-NEXT:  [[ENTRY:.*:]]
+// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.bf8.v8f32.v2i32.v4i32.i32(<2 x i32> [[A]], <4 x i32> [[B]], <8 x float> [[C]], i32 [[INDEX]])
+// CHECK-GFX1200-NEXT:    store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA4]]
 // CHECK-GFX1200-NEXT:    ret void
 //
 void test_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32(global v8f* out, v2i a, v4i b, v8f c, int index)
 {
   *out = __builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32(a, b, c, index);
 }
+//.
+// CHECK-GFX1200: [[CHAR_TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0}
+// CHECK-GFX1200: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0}
+// CHECK-GFX1200: [[META6]] = !{!"Simple C/C++ TBAA"}
+//.
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-swmmac-w64.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-swmmac-w64.cl
index eaa6b14..47753af 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-swmmac-w64.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-swmmac-w64.cl
@@ -1,4 +1,4 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
 // REQUIRES: amdgpu-registered-target
 // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1200 -target-feature +wavefrontsize64 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-GFX1200
 
@@ -12,10 +12,11 @@ typedef short  v8s   __attribute__((ext_vector_type(8)));
 
 // Wave64
 
-// CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_f32_16x16x32_f16_w64(
-// CHECK-GFX1200-NEXT:  entry:
-// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.f16.v4f32.v4f16.v8f16.i32(<4 x half> [[A:%.*]], <8 x half> [[B:%.*]], <4 x float> [[C:%.*]], i32 [[INDEX:%.*]])
-// CHECK-GFX1200-NEXT:    store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4:![0-9]+]]
+// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_swmmac_f32_16x16x32_f16_w64(
+// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], <4 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]], <4 x float> noundef [[C:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-GFX1200-NEXT:  [[ENTRY:.*:]]
+// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.f16.v4f32.v4f16.v8f16.i32(<4 x half> [[A]], <8 x half> [[B]], <4 x float> [[C]], i32 [[INDEX]])
+// CHECK-GFX1200-NEXT:    store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4:![0-9]+]]
 // CHECK-GFX1200-NEXT:    ret void
 //
 void test_amdgcn_swmmac_f32_16x16x32_f16_w64(global v4f* out, v4h a, v8h b, v4f c, int index)
@@ -23,10 +24,11 @@ void test_amdgcn_swmmac_f32_16x16x32_f16_w64(global v4f* out, v4h a, v8h b, v4f
   *out = __builtin_amdgcn_swmmac_f32_16x16x32_f16_w64(a, b, c, index);
 }
 
-// CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_f32_16x16x32_bf16_w64(
-// CHECK-GFX1200-NEXT:  entry:
-// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf16.v4f32.v4i16.v8i16.i32(<4 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <4 x float> [[C:%.*]], i32 [[INDEX:%.*]])
-// CHECK-GFX1200-NEXT:    store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]]
+// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_swmmac_f32_16x16x32_bf16_w64(
+// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], <4 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]], <4 x float> noundef [[C:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-GFX1200-NEXT:  [[ENTRY:.*:]]
+// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf16.v4f32.v4i16.v8i16.i32(<4 x i16> [[A]], <8 x i16> [[B]], <4 x float> [[C]], i32 [[INDEX]])
+// CHECK-GFX1200-NEXT:    store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4]]
 // CHECK-GFX1200-NEXT:    ret void
 //
 void test_amdgcn_swmmac_f32_16x16x32_bf16_w64(global v4f* out, v4s a, v8s b, v4f c, int index)
@@ -34,10 +36,11 @@ void test_amdgcn_swmmac_f32_16x16x32_bf16_w64(global v4f* out, v4s a, v8s b, v4f
   *out = __builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64(a, b, c, index);
 }
 
-// CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_f16_16x16x32_f16_w64(
-// CHECK-GFX1200-NEXT:  entry:
-// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <4 x half> @llvm.amdgcn.swmmac.f16.16x16x32.f16.v4f16.v4f16.v8f16.i32(<4 x half> [[A:%.*]], <8 x half> [[B:%.*]], <4 x half> [[C:%.*]], i32 [[INDEX:%.*]])
-// CHECK-GFX1200-NEXT:    store <4 x half> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 8, !tbaa [[TBAA4]]
+// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_swmmac_f16_16x16x32_f16_w64(
+// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 8)) [[OUT:%.*]], <4 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]], <4 x half> noundef [[C:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-GFX1200-NEXT:  [[ENTRY:.*:]]
+// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <4 x half> @llvm.amdgcn.swmmac.f16.16x16x32.f16.v4f16.v4f16.v8f16.i32(<4 x half> [[A]], <8 x half> [[B]], <4 x half> [[C]], i32 [[INDEX]])
+// CHECK-GFX1200-NEXT:    store <4 x half> [[TMP0]], ptr addrspace(1) [[OUT]], align 8, !tbaa [[CHAR_TBAA4]]
 // CHECK-GFX1200-NEXT:    ret void
 //
 void test_amdgcn_swmmac_f16_16x16x32_f16_w64(global v4h* out, v4h a, v8h b, v4h c, int index)
@@ -45,10 +48,11 @@ void test_amdgcn_swmmac_f16_16x16x32_f16_w64(global v4h* out, v4h a, v8h b, v4h
   *out = __builtin_amdgcn_swmmac_f16_16x16x32_f16_w64(a, b, c, index);
 }
 
-// CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_bf16_16x16x32_bf16_w64(
-// CHECK-GFX1200-NEXT:  entry:
-// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <4 x i16> @llvm.amdgcn.swmmac.bf16.16x16x32.bf16.v4i16.v4i16.v8i16.i32(<4 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <4 x i16> [[C:%.*]], i32 [[INDEX:%.*]])
-// CHECK-GFX1200-NEXT:    store <4 x i16> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 8, !tbaa [[TBAA4]]
+// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_swmmac_bf16_16x16x32_bf16_w64(
+// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 8)) [[OUT:%.*]], <4 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]], <4 x i16> noundef [[C:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-GFX1200-NEXT:  [[ENTRY:.*:]]
+// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <4 x i16> @llvm.amdgcn.swmmac.bf16.16x16x32.bf16.v4i16.v4i16.v8i16.i32(<4 x i16> [[A]], <8 x i16> [[B]], <4 x i16> [[C]], i32 [[INDEX]])
+// CHECK-GFX1200-NEXT:    store <4 x i16> [[TMP0]], ptr addrspace(1) [[OUT]], align 8, !tbaa [[CHAR_TBAA4]]
 // CHECK-GFX1200-NEXT:    ret void
 //
 void test_amdgcn_swmmac_bf16_16x16x32_bf16_w64(global v4s* out, v4s a, v8s b, v4s c, int index)
@@ -56,10 +60,11 @@ void test_amdgcn_swmmac_bf16_16x16x32_bf16_w64(global v4s* out, v4s a, v8s b, v4
   *out = __builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64(a, b, c, index);
 }
 
-// CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_i32_16x16x32_iu8_w64(
-// CHECK-GFX1200-NEXT:  entry:
-// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu8.v4i32.i32.v2i32.i32(i1 true, i32 [[A:%.*]], i1 true, <2 x i32> [[B:%.*]], <4 x i32> [[C:%.*]], i32 [[INDEX:%.*]], i1 true)
-// CHECK-GFX1200-NEXT:    store <4 x i32> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]]
+// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_swmmac_i32_16x16x32_iu8_w64(
+// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], i32 noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <4 x i32> noundef [[C:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-GFX1200-NEXT:  [[ENTRY:.*:]]
+// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu8.v4i32.i32.v2i32.i32(i1 true, i32 [[A]], i1 true, <2 x i32> [[B]], <4 x i32> [[C]], i32 [[INDEX]], i1 true)
+// CHECK-GFX1200-NEXT:    store <4 x i32> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4]]
 // CHECK-GFX1200-NEXT:    ret void
 //
 void test_amdgcn_swmmac_i32_16x16x32_iu8_w64(global v4i* out, int a, v2i b, v4i c, int index)
@@ -67,10 +72,11 @@ void test_amdgcn_swmmac_i32_16x16x32_iu8_w64(global v4i* out, int a, v2i b, v4i
   *out = __builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64(true, a, true, b, c, index, true);
 }
 
-// CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_i32_16x16x32_iu4_w64(
-// CHECK-GFX1200-NEXT:  entry:
-// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu4.v4i32.i32.i32.i32(i1 true, i32 [[A:%.*]], i1 true, i32 [[B:%.*]], <4 x i32> [[C:%.*]], i32 [[INDEX:%.*]], i1 true)
-// CHECK-GFX1200-NEXT:    store <4 x i32> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]]
+// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_swmmac_i32_16x16x32_iu4_w64(
+// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]], <4 x i32> noundef [[C:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-GFX1200-NEXT:  [[ENTRY:.*:]]
+// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu4.v4i32.i32.i32.i32(i1 true, i32 [[A]], i1 true, i32 [[B]], <4 x i32> [[C]], i32 [[INDEX]], i1 true)
+// CHECK-GFX1200-NEXT:    store <4 x i32> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4]]
 // CHECK-GFX1200-NEXT:    ret void
 //
 void test_amdgcn_swmmac_i32_16x16x32_iu4_w64(global v4i* out, int a, int b, v4i c, int index)
@@ -78,10 +84,11 @@ void test_amdgcn_swmmac_i32_16x16x32_iu4_w64(global v4i* out, int a, int b, v4i
   *out = __builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64(true, a, true, b, c, index, true);
 }
 
-// CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_i32_16x16x64_iu4_w64(
-// CHECK-GFX1200-NEXT:  entry:
-// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.swmmac.i32.16x16x64.iu4.v4i32.i32.v2i32.i32(i1 true, i32 [[A:%.*]], i1 true, <2 x i32> [[B:%.*]], <4 x i32> [[C:%.*]], i32 [[INDEX:%.*]], i1 true)
-// CHECK-GFX1200-NEXT:    store <4 x i32> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]]
+// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_swmmac_i32_16x16x64_iu4_w64(
+// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], i32 noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <4 x i32> noundef [[C:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-GFX1200-NEXT:  [[ENTRY:.*:]]
+// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.swmmac.i32.16x16x64.iu4.v4i32.i32.v2i32.i32(i1 true, i32 [[A]], i1 true, <2 x i32> [[B]], <4 x i32> [[C]], i32 [[INDEX]], i1 true)
+// CHECK-GFX1200-NEXT:    store <4 x i32> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4]]
 // CHECK-GFX1200-NEXT:    ret void
 //
 void test_amdgcn_swmmac_i32_16x16x64_iu4_w64(global v4i* out, int a, v2i b, v4i c, int index)
@@ -89,10 +96,11 @@ void test_amdgcn_swmmac_i32_16x16x64_iu4_w64(global v4i* out, int a, v2i b, v4i
   *out = __builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64(true, a, true, b, c, index, true);
 }
 
-// CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64(
-// CHECK-GFX1200-NEXT:  entry:
-// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.fp8.v4f32.i32.v2i32.i32(i32 [[A:%.*]], <2 x i32> [[B:%.*]], <4 x float> [[C:%.*]], i32 [[INDEX:%.*]])
-// CHECK-GFX1200-NEXT:    store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]]
+// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64(
+// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], i32 noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <4 x float> noundef [[C:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-GFX1200-NEXT:  [[ENTRY:.*:]]
+// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.fp8.v4f32.i32.v2i32.i32(i32 [[A]], <2 x i32> [[B]], <4 x float> [[C]], i32 [[INDEX]])
+// CHECK-GFX1200-NEXT:    store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4]]
 // CHECK-GFX1200-NEXT:    ret void
 //
 void test_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64(global v4f* out, int a, v2i b, v4f c, int index)
@@ -100,10 +108,11 @@ void test_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64(global v4f* out, int a, v2i b,
   *out = __builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64(a, b, c, index);
 }
 
-// CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64(
-// CHECK-GFX1200-NEXT:  entry:
-// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.bf8.v4f32.i32.v2i32.i32(i32 [[A:%.*]], <2 x i32> [[B:%.*]], <4 x float> [[C:%.*]], i32 [[INDEX:%.*]])
-// CHECK-GFX1200-NEXT:    store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]]
+// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64(
+// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], i32 noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <4 x float> noundef [[C:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-GFX1200-NEXT:  [[ENTRY:.*:]]
+// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.bf8.v4f32.i32.v2i32.i32(i32 [[A]], <2 x i32> [[B]], <4 x float> [[C]], i32 [[INDEX]])
+// CHECK-GFX1200-NEXT:    store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4]]
 // CHECK-GFX1200-NEXT:    ret void
 //
 void test_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64(global v4f* out, int a, v2i b, v4f c, int index)
@@ -111,10 +120,11 @@ void test_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64(global v4f* out, int a, v2i b,
   *out = __builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64(a, b, c, index);
 }
 
-// CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64(
-// CHECK-GFX1200-NEXT:  entry:
-// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.fp8.v4f32.i32.v2i32.i32(i32 [[A:%.*]], <2 x i32> [[B:%.*]], <4 x float> [[C:%.*]], i32 [[INDEX:%.*]])
-// CHECK-GFX1200-NEXT:    store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]]
+// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64(
+// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], i32 noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <4 x float> noundef [[C:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-GFX1200-NEXT:  [[ENTRY:.*:]]
+// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.fp8.v4f32.i32.v2i32.i32(i32 [[A]], <2 x i32> [[B]], <4 x float> [[C]], i32 [[INDEX]])
+// CHECK-GFX1200-NEXT:    store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4]]
 // CHECK-GFX1200-NEXT:    ret void
 //
 void test_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64(global v4f* out, int a, v2i b, v4f c, int index)
@@ -122,13 +132,19 @@ void test_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64(global v4f* out, int a, v2i b,
   *out = __builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64(a, b, c, index);
 }
 
-// CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64(
-// CHECK-GFX1200-NEXT:  entry:
-// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.bf8.v4f32.i32.v2i32.i32(i32 [[A:%.*]], <2 x i32> [[B:%.*]], <4 x float> [[C:%.*]], i32 [[INDEX:%.*]])
-// CHECK-GFX1200-NEXT:    store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]]
+// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64(
+// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], i32 noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <4 x float> noundef [[C:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-GFX1200-NEXT:  [[ENTRY:.*:]]
+// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.bf8.v4f32.i32.v2i32.i32(i32 [[A]], <2 x i32> [[B]], <4 x float> [[C]], i32 [[INDEX]])
+// CHECK-GFX1200-NEXT:    store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4]]
 // CHECK-GFX1200-NEXT:    ret void
 //
 void test_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64(global v4f* out, int a, v2i b, v4f c, int index)
 {
   *out = __builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64(a, b, c, index);
 }
+//.
+// CHECK-GFX1200: [[CHAR_TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0}
+// CHECK-GFX1200: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0}
+// CHECK-GFX1200: [[META6]] = !{!"Simple C/C++ TBAA"}
+//.
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-wmma-w32.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-wmma-w32.cl
index 2f9a367..853cd32 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-wmma-w32.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-wmma-w32.cl
@@ -1,4 +1,4 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
 // REQUIRES: amdgpu-registered-target
 // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1100 -DWMMA_GFX1100_TESTS -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-GFX1100
 
@@ -17,10 +17,11 @@ typedef short  v16s  __attribute__((ext_vector_type(16)));
 // amdgcn_wmma_f32_16x16x16_f16
 //
 
-// CHECK-GFX1100-LABEL: @test_amdgcn_wmma_f32_16x16x16_f16_w32(
-// CHECK-GFX1100-NEXT:  entry:
-// CHECK-GFX1100-NEXT:    [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v8f32.v16f16(<16 x half> [[A:%.*]], <16 x half> [[B:%.*]], <8 x float> [[C:%.*]])
-// CHECK-GFX1100-NEXT:    store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4:![0-9]+]]
+// CHECK-GFX1100-LABEL: define dso_local void @test_amdgcn_wmma_f32_16x16x16_f16_w32(
+// CHECK-GFX1100-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <16 x half> noundef [[A:%.*]], <16 x half> noundef [[B:%.*]], <8 x float> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-GFX1100-NEXT:  [[ENTRY:.*:]]
+// CHECK-GFX1100-NEXT:    [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v8f32.v16f16(<16 x half> [[A]], <16 x half> [[B]], <8 x float> [[C]])
+// CHECK-GFX1100-NEXT:    store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA4:![0-9]+]]
 // CHECK-GFX1100-NEXT:    ret void
 //
 void test_amdgcn_wmma_f32_16x16x16_f16_w32(global v8f* out, v16h a, v16h b, v8f c)
@@ -32,10 +33,11 @@ void test_amdgcn_wmma_f32_16x16x16_f16_w32(global v8f* out, v16h a, v16h b, v8f
 // amdgcn_wmma_f32_16x16x16_bf16
 //
 
-// CHECK-GFX1100-LABEL: @test_amdgcn_wmma_f32_16x16x16_bf16_w32(
-// CHECK-GFX1100-NEXT:  entry:
-// CHECK-GFX1100-NEXT:    [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16.v8f32.v16i16(<16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <8 x float> [[C:%.*]])
-// CHECK-GFX1100-NEXT:    store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]]
+// CHECK-GFX1100-LABEL: define dso_local void @test_amdgcn_wmma_f32_16x16x16_bf16_w32(
+// CHECK-GFX1100-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <16 x i16> noundef [[A:%.*]], <16 x i16> noundef [[B:%.*]], <8 x float> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-GFX1100-NEXT:  [[ENTRY:.*:]]
+// CHECK-GFX1100-NEXT:    [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16.v8f32.v16i16(<16 x i16> [[A]], <16 x i16> [[B]], <8 x float> [[C]])
+// CHECK-GFX1100-NEXT:    store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA4]]
 // CHECK-GFX1100-NEXT:    ret void
 //
 void test_amdgcn_wmma_f32_16x16x16_bf16_w32(global v8f* out, v16s a, v16s b, v8f c)
@@ -47,10 +49,11 @@ void test_amdgcn_wmma_f32_16x16x16_bf16_w32(global v8f* out, v16s a, v16s b, v8f
 // amdgcn_wmma_f16_16x16x16_f16
 //
 
-// CHECK-GFX1100-LABEL: @test_amdgcn_wmma_f16_16x16x16_f16_w32(
-// CHECK-GFX1100-NEXT:  entry:
-// CHECK-GFX1100-NEXT:    [[TMP0:%.*]] = tail call <16 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v16f16.v16f16(<16 x half> [[A:%.*]], <16 x half> [[B:%.*]], <16 x half> [[C:%.*]], i1 true)
-// CHECK-GFX1100-NEXT:    store <16 x half> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]]
+// CHECK-GFX1100-LABEL: define dso_local void @test_amdgcn_wmma_f16_16x16x16_f16_w32(
+// CHECK-GFX1100-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <16 x half> noundef [[A:%.*]], <16 x half> noundef [[B:%.*]], <16 x half> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-GFX1100-NEXT:  [[ENTRY:.*:]]
+// CHECK-GFX1100-NEXT:    [[TMP0:%.*]] = tail call <16 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v16f16.v16f16(<16 x half> [[A]], <16 x half> [[B]], <16 x half> [[C]], i1 true)
+// CHECK-GFX1100-NEXT:    store <16 x half> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA4]]
 // CHECK-GFX1100-NEXT:    ret void
 //
 void test_amdgcn_wmma_f16_16x16x16_f16_w32(global v16h* out, v16h a, v16h b, v16h c)
@@ -62,10 +65,11 @@ void test_amdgcn_wmma_f16_16x16x16_f16_w32(global v16h* out, v16h a, v16h b, v16
 // amdgcn_wmma_bf16_16x16x16_bf16
 //
 
-// CHECK-GFX1100-LABEL: @test_amdgcn_wmma_bf16_16x16x16_bf16_w32(
-// CHECK-GFX1100-NEXT:  entry:
-// CHECK-GFX1100-NEXT:    [[TMP0:%.*]] = tail call <16 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16.v16i16.v16i16(<16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i16> [[C:%.*]], i1 true)
-// CHECK-GFX1100-NEXT:    store <16 x i16> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]]
+// CHECK-GFX1100-LABEL: define dso_local void @test_amdgcn_wmma_bf16_16x16x16_bf16_w32(
+// CHECK-GFX1100-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <16 x i16> noundef [[A:%.*]], <16 x i16> noundef [[B:%.*]], <16 x i16> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-GFX1100-NEXT:  [[ENTRY:.*:]]
+// CHECK-GFX1100-NEXT:    [[TMP0:%.*]] = tail call <16 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16.v16i16.v16i16(<16 x i16> [[A]], <16 x i16> [[B]], <16 x i16> [[C]], i1 true)
+// CHECK-GFX1100-NEXT:    store <16 x i16> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA4]]
 // CHECK-GFX1100-NEXT:    ret void
 //
 void test_amdgcn_wmma_bf16_16x16x16_bf16_w32(global v16s* out, v16s a, v16s b, v16s c)
@@ -77,10 +81,11 @@ void test_amdgcn_wmma_bf16_16x16x16_bf16_w32(global v16s* out, v16s a, v16s b, v
 // amdgcn_wmma_f16_16x16x16_f16_tied
 //
 
-// CHECK-GFX1100-LABEL: @test_amdgcn_wmma_f16_16x16x16_f16_tied_w32(
-// CHECK-GFX1100-NEXT:  entry:
-// CHECK-GFX1100-NEXT:    [[TMP0:%.*]] = tail call <16 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.tied.v16f16.v16f16(<16 x half> [[A:%.*]], <16 x half> [[B:%.*]], <16 x half> [[C:%.*]], i1 true)
-// CHECK-GFX1100-NEXT:    store <16 x half> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]]
+// CHECK-GFX1100-LABEL: define dso_local void @test_amdgcn_wmma_f16_16x16x16_f16_tied_w32(
+// CHECK-GFX1100-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <16 x half> noundef [[A:%.*]], <16 x half> noundef [[B:%.*]], <16 x half> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-GFX1100-NEXT:  [[ENTRY:.*:]]
+// CHECK-GFX1100-NEXT:    [[TMP0:%.*]] = tail call <16 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.tied.v16f16.v16f16(<16 x half> [[A]], <16 x half> [[B]], <16 x half> [[C]], i1 true)
+// CHECK-GFX1100-NEXT:    store <16 x half> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA4]]
 // CHECK-GFX1100-NEXT:    ret void
 //
 void test_amdgcn_wmma_f16_16x16x16_f16_tied_w32(global v16h* out, v16h a, v16h b, v16h c)
@@ -92,10 +97,11 @@ void test_amdgcn_wmma_f16_16x16x16_f16_tied_w32(global v16h* out, v16h a, v16h b
 // amdgcn_wmma_bf16_16x16x16_bf16_tied
 //
 
-// CHECK-GFX1100-LABEL: @test_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32(
-// CHECK-GFX1100-NEXT:  entry:
-// CHECK-GFX1100-NEXT:    [[TMP0:%.*]] = tail call <16 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16.tied.v16i16.v16i16(<16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i16> [[C:%.*]], i1 true)
-// CHECK-GFX1100-NEXT:    store <16 x i16> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]]
+// CHECK-GFX1100-LABEL: define dso_local void @test_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32(
+// CHECK-GFX1100-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <16 x i16> noundef [[A:%.*]], <16 x i16> noundef [[B:%.*]], <16 x i16> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-GFX1100-NEXT:  [[ENTRY:.*:]]
+// CHECK-GFX1100-NEXT:    [[TMP0:%.*]] = tail call <16 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16.tied.v16i16.v16i16(<16 x i16> [[A]], <16 x i16> [[B]], <16 x i16> [[C]], i1 true)
+// CHECK-GFX1100-NEXT:    store <16 x i16> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA4]]
 // CHECK-GFX1100-NEXT:    ret void
 //
 void test_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32(global v16s* out, v16s a, v16s b, v16s c)
@@ -107,10 +113,11 @@ void test_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32(global v16s* out, v16s a, v16s
 // amdgcn_wmma_i32_16x16x16_iu8
 //
 
-// CHECK-GFX1100-LABEL: @test_amdgcn_wmma_i32_16x16x16_iu8_w32(
-// CHECK-GFX1100-NEXT:  entry:
-// CHECK-GFX1100-NEXT:    [[TMP0:%.*]] = tail call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8.v8i32.v4i32(i1 true, <4 x i32> [[A:%.*]], i1 true, <4 x i32> [[B:%.*]], <8 x i32> [[C:%.*]], i1 false)
-// CHECK-GFX1100-NEXT:    store <8 x i32> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]]
+// CHECK-GFX1100-LABEL: define dso_local void @test_amdgcn_wmma_i32_16x16x16_iu8_w32(
+// CHECK-GFX1100-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]], <8 x i32> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-GFX1100-NEXT:  [[ENTRY:.*:]]
+// CHECK-GFX1100-NEXT:    [[TMP0:%.*]] = tail call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8.v8i32.v4i32(i1 true, <4 x i32> [[A]], i1 true, <4 x i32> [[B]], <8 x i32> [[C]], i1 false)
+// CHECK-GFX1100-NEXT:    store <8 x i32> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA4]]
 // CHECK-GFX1100-NEXT:    ret void
 //
 void test_amdgcn_wmma_i32_16x16x16_iu8_w32(global v8i* out, v4i a, v4i b, v8i c)
@@ -122,10 +129,11 @@ void test_amdgcn_wmma_i32_16x16x16_iu8_w32(global v8i* out, v4i a, v4i b, v8i c)
 // amdgcn_wmma_i32_16x16x16_iu4
 //
 
-// CHECK-GFX1100-LABEL: @test_amdgcn_wmma_i32_16x16x16_iu4_w32(
-// CHECK-GFX1100-NEXT:  entry:
-// CHECK-GFX1100-NEXT:    [[TMP0:%.*]] = tail call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4.v8i32.v2i32(i1 true, <2 x i32> [[A:%.*]], i1 true, <2 x i32> [[B:%.*]], <8 x i32> [[C:%.*]], i1 false)
-// CHECK-GFX1100-NEXT:    store <8 x i32> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]]
+// CHECK-GFX1100-LABEL: define dso_local void @test_amdgcn_wmma_i32_16x16x16_iu4_w32(
+// CHECK-GFX1100-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <8 x i32> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-GFX1100-NEXT:  [[ENTRY:.*:]]
+// CHECK-GFX1100-NEXT:    [[TMP0:%.*]] = tail call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4.v8i32.v2i32(i1 true, <2 x i32> [[A]], i1 true, <2 x i32> [[B]], <8 x i32> [[C]], i1 false)
+// CHECK-GFX1100-NEXT:    store <8 x i32> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA4]]
 // CHECK-GFX1100-NEXT:    ret void
 //
 void test_amdgcn_wmma_i32_16x16x16_iu4_w32(global v8i* out, v2i a, v2i b, v8i c)
@@ -134,3 +142,8 @@ void test_amdgcn_wmma_i32_16x16x16_iu4_w32(global v8i* out, v2i a, v2i b, v8i c)
 }
 
 #endif
+//.
+// CHECK-GFX1100: [[CHAR_TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0}
+// CHECK-GFX1100: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0}
+// CHECK-GFX1100: [[META6]] = !{!"Simple C/C++ TBAA"}
+//.
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-wmma-w64.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-wmma-w64.cl
index 8dfe69b..9b6872f 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-wmma-w64.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-wmma-w64.cl
@@ -1,4 +1,4 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
 // REQUIRES: amdgpu-registered-target
 // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1100 -target-feature +wavefrontsize64 -DWMMA_GFX1100_TESTS -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-GFX1100
 
@@ -18,10 +18,11 @@ typedef short  v16s  __attribute__((ext_vector_type(16)));
 // amdgcn_wmma_f32_16x16x16_f16
 //
 
-// CHECK-GFX1100-LABEL: @test_amdgcn_wmma_f32_16x16x16_f16_w64(
-// CHECK-GFX1100-NEXT:  entry:
-// CHECK-GFX1100-NEXT:    [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v4f32.v16f16(<16 x half> [[A:%.*]], <16 x half> [[B:%.*]], <4 x float> [[C:%.*]])
-// CHECK-GFX1100-NEXT:    store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4:![0-9]+]]
+// CHECK-GFX1100-LABEL: define dso_local void @test_amdgcn_wmma_f32_16x16x16_f16_w64(
+// CHECK-GFX1100-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], <16 x half> noundef [[A:%.*]], <16 x half> noundef [[B:%.*]], <4 x float> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-GFX1100-NEXT:  [[ENTRY:.*:]]
+// CHECK-GFX1100-NEXT:    [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v4f32.v16f16(<16 x half> [[A]], <16 x half> [[B]], <4 x float> [[C]])
+// CHECK-GFX1100-NEXT:    store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4:![0-9]+]]
 // CHECK-GFX1100-NEXT:    ret void
 //
 void test_amdgcn_wmma_f32_16x16x16_f16_w64(global v4f* out, v16h a, v16h b, v4f c)
@@ -33,10 +34,11 @@ void test_amdgcn_wmma_f32_16x16x16_f16_w64(global v4f* out, v16h a, v16h b, v4f
 // amdgcn_wmma_f32_16x16x16_bf16
 //
 
-// CHECK-GFX1100-LABEL: @test_amdgcn_wmma_f32_16x16x16_bf16_w64(
-// CHECK-GFX1100-NEXT:  entry:
-// CHECK-GFX1100-NEXT:    [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16.v4f32.v16i16(<16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <4 x float> [[C:%.*]])
-// CHECK-GFX1100-NEXT:    store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]]
+// CHECK-GFX1100-LABEL: define dso_local void @test_amdgcn_wmma_f32_16x16x16_bf16_w64(
+// CHECK-GFX1100-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], <16 x i16> noundef [[A:%.*]], <16 x i16> noundef [[B:%.*]], <4 x float> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-GFX1100-NEXT:  [[ENTRY:.*:]]
+// CHECK-GFX1100-NEXT:    [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16.v4f32.v16i16(<16 x i16> [[A]], <16 x i16> [[B]], <4 x float> [[C]])
+// CHECK-GFX1100-NEXT:    store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4]]
 // CHECK-GFX1100-NEXT:    ret void
 //
 void test_amdgcn_wmma_f32_16x16x16_bf16_w64(global v4f* out, v16s a, v16s b, v4f c)
@@ -48,10 +50,11 @@ void test_amdgcn_wmma_f32_16x16x16_bf16_w64(global v4f* out, v16s a, v16s b, v4f
 // amdgcn_wmma_f16_16x16x16_f16
 //
 
-// CHECK-GFX1100-LABEL: @test_amdgcn_wmma_f16_16x16x16_f16_w64(
-// CHECK-GFX1100-NEXT:  entry:
-// CHECK-GFX1100-NEXT:    [[TMP0:%.*]] = tail call <8 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v8f16.v16f16(<16 x half> [[A:%.*]], <16 x half> [[B:%.*]], <8 x half> [[C:%.*]], i1 true)
-// CHECK-GFX1100-NEXT:    store <8 x half> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]]
+// CHECK-GFX1100-LABEL: define dso_local void @test_amdgcn_wmma_f16_16x16x16_f16_w64(
+// CHECK-GFX1100-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], <16 x half> noundef [[A:%.*]], <16 x half> noundef [[B:%.*]], <8 x half> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-GFX1100-NEXT:  [[ENTRY:.*:]]
+// CHECK-GFX1100-NEXT:    [[TMP0:%.*]] = tail call <8 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v8f16.v16f16(<16 x half> [[A]], <16 x half> [[B]], <8 x half> [[C]], i1 true)
+// CHECK-GFX1100-NEXT:    store <8 x half> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4]]
 // CHECK-GFX1100-NEXT:    ret void
 //
 void test_amdgcn_wmma_f16_16x16x16_f16_w64(global v8h* out, v16h a, v16h b, v8h c)
@@ -63,10 +66,11 @@ void test_amdgcn_wmma_f16_16x16x16_f16_w64(global v8h* out, v16h a, v16h b, v8h
 // amdgcn_wmma_bf16_16x16x16_bf16
 //
 
-// CHECK-GFX1100-LABEL: @test_amdgcn_wmma_bf16_16x16x16_bf16_w64(
-// CHECK-GFX1100-NEXT:  entry:
-// CHECK-GFX1100-NEXT:    [[TMP0:%.*]] = tail call <8 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16.v8i16.v16i16(<16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <8 x i16> [[C:%.*]], i1 true)
-// CHECK-GFX1100-NEXT:    store <8 x i16> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]]
+// CHECK-GFX1100-LABEL: define dso_local void @test_amdgcn_wmma_bf16_16x16x16_bf16_w64(
+// CHECK-GFX1100-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], <16 x i16> noundef [[A:%.*]], <16 x i16> noundef [[B:%.*]], <8 x i16> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-GFX1100-NEXT:  [[ENTRY:.*:]]
+// CHECK-GFX1100-NEXT:    [[TMP0:%.*]] = tail call <8 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16.v8i16.v16i16(<16 x i16> [[A]], <16 x i16> [[B]], <8 x i16> [[C]], i1 true)
+// CHECK-GFX1100-NEXT:    store <8 x i16> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4]]
 // CHECK-GFX1100-NEXT:    ret void
 //
 void test_amdgcn_wmma_bf16_16x16x16_bf16_w64(global v8s* out, v16s a, v16s b, v8s c)
@@ -78,10 +82,11 @@ void test_amdgcn_wmma_bf16_16x16x16_bf16_w64(global v8s* out, v16s a, v16s b, v8
 // amdgcn_wmma_f16_16x16x16_f16_tied
 //
 
-// CHECK-GFX1100-LABEL: @test_amdgcn_wmma_f16_16x16x16_f16_tied_w64(
-// CHECK-GFX1100-NEXT:  entry:
-// CHECK-GFX1100-NEXT:    [[TMP0:%.*]] = tail call <8 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.tied.v8f16.v16f16(<16 x half> [[A:%.*]], <16 x half> [[B:%.*]], <8 x half> [[C:%.*]], i1 true)
-// CHECK-GFX1100-NEXT:    store <8 x half> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]]
+// CHECK-GFX1100-LABEL: define dso_local void @test_amdgcn_wmma_f16_16x16x16_f16_tied_w64(
+// CHECK-GFX1100-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], <16 x half> noundef [[A:%.*]], <16 x half> noundef [[B:%.*]], <8 x half> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-GFX1100-NEXT:  [[ENTRY:.*:]]
+// CHECK-GFX1100-NEXT:    [[TMP0:%.*]] = tail call <8 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.tied.v8f16.v16f16(<16 x half> [[A]], <16 x half> [[B]], <8 x half> [[C]], i1 true)
+// CHECK-GFX1100-NEXT:    store <8 x half> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4]]
 // CHECK-GFX1100-NEXT:    ret void
 //
 void test_amdgcn_wmma_f16_16x16x16_f16_tied_w64(global v8h* out, v16h a, v16h b, v8h c)
@@ -93,10 +98,11 @@ void test_amdgcn_wmma_f16_16x16x16_f16_tied_w64(global v8h* out, v16h a, v16h b,
 // amdgcn_wmma_bf16_16x16x16_bf16_tied
 //
 
-// CHECK-GFX1100-LABEL: @test_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64(
-// CHECK-GFX1100-NEXT:  entry:
-// CHECK-GFX1100-NEXT:    [[TMP0:%.*]] = tail call <8 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16.tied.v8i16.v16i16(<16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <8 x i16> [[C:%.*]], i1 true)
-// CHECK-GFX1100-NEXT:    store <8 x i16> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]]
+// CHECK-GFX1100-LABEL: define dso_local void @test_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64(
+// CHECK-GFX1100-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], <16 x i16> noundef [[A:%.*]], <16 x i16> noundef [[B:%.*]], <8 x i16> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-GFX1100-NEXT:  [[ENTRY:.*:]]
+// CHECK-GFX1100-NEXT:    [[TMP0:%.*]] = tail call <8 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16.tied.v8i16.v16i16(<16 x i16> [[A]], <16 x i16> [[B]], <8 x i16> [[C]], i1 true)
+// CHECK-GFX1100-NEXT:    store <8 x i16> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4]]
 // CHECK-GFX1100-NEXT:    ret void
 //
 void test_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64(global v8s* out, v16s a, v16s b, v8s c)
@@ -108,10 +114,11 @@ void test_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64(global v8s* out, v16s a, v16s
 // amdgcn_wmma_i32_16x16x16_iu8
 //
 
-// CHECK-GFX1100-LABEL: @test_amdgcn_wmma_i32_16x16x16_iu8_w64(
-// CHECK-GFX1100-NEXT:  entry:
-// CHECK-GFX1100-NEXT:    [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8.v4i32.v4i32(i1 true, <4 x i32> [[A:%.*]], i1 true, <4 x i32> [[B:%.*]], <4 x i32> [[C:%.*]], i1 false)
-// CHECK-GFX1100-NEXT:    store <4 x i32> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]]
+// CHECK-GFX1100-LABEL: define dso_local void @test_amdgcn_wmma_i32_16x16x16_iu8_w64(
+// CHECK-GFX1100-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]], <4 x i32> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-GFX1100-NEXT:  [[ENTRY:.*:]]
+// CHECK-GFX1100-NEXT:    [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8.v4i32.v4i32(i1 true, <4 x i32> [[A]], i1 true, <4 x i32> [[B]], <4 x i32> [[C]], i1 false)
+// CHECK-GFX1100-NEXT:    store <4 x i32> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4]]
 // CHECK-GFX1100-NEXT:    ret void
 //
 void test_amdgcn_wmma_i32_16x16x16_iu8_w64(global v4i* out, v4i a, v4i b, v4i c)
@@ -123,10 +130,11 @@ void test_amdgcn_wmma_i32_16x16x16_iu8_w64(global v4i* out, v4i a, v4i b, v4i c)
 // amdgcn_wmma_i32_16x16x16_iu4
 //
 
-// CHECK-GFX1100-LABEL: @test_amdgcn_wmma_i32_16x16x16_iu4_w64(
-// CHECK-GFX1100-NEXT:  entry:
-// CHECK-GFX1100-NEXT:    [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4.v4i32.v2i32(i1 true, <2 x i32> [[A:%.*]], i1 true, <2 x i32> [[B:%.*]], <4 x i32> [[C:%.*]], i1 false)
-// CHECK-GFX1100-NEXT:    store <4 x i32> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]]
+// CHECK-GFX1100-LABEL: define dso_local void @test_amdgcn_wmma_i32_16x16x16_iu4_w64(
+// CHECK-GFX1100-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <4 x i32> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-GFX1100-NEXT:  [[ENTRY:.*:]]
+// CHECK-GFX1100-NEXT:    [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4.v4i32.v2i32(i1 true, <2 x i32> [[A]], i1 true, <2 x i32> [[B]], <4 x i32> [[C]], i1 false)
+// CHECK-GFX1100-NEXT:    store <4 x i32> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4]]
 // CHECK-GFX1100-NEXT:    ret void
 //
 void test_amdgcn_wmma_i32_16x16x16_iu4_w64(global v4i* out, v2i a, v2i b, v4i c)
@@ -135,3 +143,8 @@ void test_amdgcn_wmma_i32_16x16x16_iu4_w64(global v4i* out, v2i a, v2i b, v4i c)
 }
 
 #endif
+//.
+// CHECK-GFX1100: [[CHAR_TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0}
+// CHECK-GFX1100: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0}
+// CHECK-GFX1100: [[META6]] = !{!"Simple C/C++ TBAA"}
+//.
diff --git a/clang/test/CodeGenOpenCL/implicit-addrspacecast-function-parameter.cl b/clang/test/CodeGenOpenCL/implicit-addrspacecast-function-parameter.cl
index 4e40073..4f2a75a 100644
--- a/clang/test/CodeGenOpenCL/implicit-addrspacecast-function-parameter.cl
+++ b/clang/test/CodeGenOpenCL/implicit-addrspacecast-function-parameter.cl
@@ -1,4 +1,4 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --version 5
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --version 6
 // RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s
 
 // Check there's no assertion when passing a pointer to an address space
@@ -33,7 +33,7 @@ __kernel void use_of_local_var()
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[X:%.*]] = alloca i32, align 4, addrspace(5)
 // CHECK-NEXT:    call void @llvm.lifetime.start.p5(ptr addrspace(5) [[X]]) #[[ATTR5:[0-9]+]]
-// CHECK-NEXT:    store i32 0, ptr addrspace(5) [[X]], align 4, !tbaa [[TBAA4:![0-9]+]]
+// CHECK-NEXT:    store i32 0, ptr addrspace(5) [[X]], align 4, !tbaa [[INT_TBAA4:![0-9]+]]
 // CHECK-NEXT:    call void @private_ptr(ptr addrspace(5) noundef [[X]]) #[[ATTR6:[0-9]+]]
 // CHECK-NEXT:    [[X_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X]] to ptr
 // CHECK-NEXT:    call void @generic_ptr(ptr noundef [[X_ASCAST]]) #[[ATTR6]]
@@ -46,7 +46,7 @@ __kernel void use_of_local_var()
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[X_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
 // CHECK-NEXT:    [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X_ADDR]] to ptr
-// CHECK-NEXT:    store i32 [[X]], ptr [[X_ADDR_ASCAST]], align 4, !tbaa [[TBAA4]]
+// CHECK-NEXT:    store i32 [[X]], ptr [[X_ADDR_ASCAST]], align 4, !tbaa [[INT_TBAA4]]
 // CHECK-NEXT:    [[X_ADDR_ASCAST_ASCAST:%.*]] = addrspacecast ptr [[X_ADDR_ASCAST]] to ptr addrspace(5)
 // CHECK-NEXT:    call void @private_ptr(ptr addrspace(5) noundef [[X_ADDR_ASCAST_ASCAST]]) #[[ATTR6]]
 // CHECK-NEXT:    call void @generic_ptr(ptr noundef [[X_ADDR_ASCAST]]) #[[ATTR6]]
@@ -68,7 +68,7 @@ __kernel void use_of_local_var()
 // CHECK-NEXT:    ret void
 //
 //.
-// CHECK: [[TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0}
+// CHECK: [[INT_TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0}
 // CHECK: [[META5]] = !{!"int", [[META6:![0-9]+]], i64 0}
 // CHECK: [[META6]] = !{!"omnipotent char", [[META7:![0-9]+]], i64 0}
 // CHECK: [[META7]] = !{!"Simple C/C++ TBAA"}
diff --git a/clang/test/CodeGenOpenCL/preserve_vec3.cl b/clang/test/CodeGenOpenCL/preserve_vec3.cl
index e73657e..6e5c1c4 100644
--- a/clang/test/CodeGenOpenCL/preserve_vec3.cl
+++ b/clang/test/CodeGenOpenCL/preserve_vec3.cl
@@ -1,4 +1,4 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
 // RUN: %clang_cc1 %s -emit-llvm -o - -triple spir-unknown-unknown | FileCheck %s
 
 typedef char char3 __attribute__((ext_vector_type(3)));
@@ -12,8 +12,8 @@ typedef float float4 __attribute__((ext_vector_type(4)));
 // CHECK-SAME: ptr addrspace(1) noundef readonly align 16 captures(none) [[A:%.*]], ptr addrspace(1) noundef writeonly align 16 captures(none) initializes((0, 16)) [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META3:![0-9]+]] !kernel_arg_access_qual [[META4:![0-9]+]] !kernel_arg_type [[META5:![0-9]+]] !kernel_arg_base_type [[META6:![0-9]+]] !kernel_arg_type_qual [[META7:![0-9]+]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[TMP0:%.*]] = load <3 x float>, ptr addrspace(1) [[A]], align 16
-// CHECK-NEXT:    [[EXTRACTVEC1:%.*]] = shufflevector <3 x float> [[TMP0]], <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
-// CHECK-NEXT:    store <4 x float> [[EXTRACTVEC1]], ptr addrspace(1) [[B]], align 16, !tbaa [[TBAA8:![0-9]+]]
+// CHECK-NEXT:    [[EXTRACTVEC1_I:%.*]] = shufflevector <3 x float> [[TMP0]], <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
+// CHECK-NEXT:    store <4 x float> [[EXTRACTVEC1_I]], ptr addrspace(1) [[B]], align 16, !tbaa [[CHAR_TBAA8:![0-9]+]]
 // CHECK-NEXT:    ret void
 //
 void kernel foo(global float3 *a, global float3 *b) {
@@ -23,9 +23,9 @@ void kernel foo(global float3 *a, global float3 *b) {
 // CHECK-LABEL: define dso_local spir_kernel void @float4_to_float3(
 // CHECK-SAME: ptr addrspace(1) noundef writeonly align 16 captures(none) initializes((0, 16)) [[A:%.*]], ptr addrspace(1) noundef readonly align 16 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !kernel_arg_addr_space [[META3]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META11:![0-9]+]] !kernel_arg_base_type [[META12:![0-9]+]] !kernel_arg_type_qual [[META7]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = load <3 x float>, ptr addrspace(1) [[B]], align 16, !tbaa [[TBAA8]]
-// CHECK-NEXT:    [[EXTRACTVEC:%.*]] = shufflevector <3 x float> [[TMP0]], <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
-// CHECK-NEXT:    store <4 x float> [[EXTRACTVEC]], ptr addrspace(1) [[A]], align 16, !tbaa [[TBAA8]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <3 x float>, ptr addrspace(1) [[B]], align 16, !tbaa [[CHAR_TBAA8]]
+// CHECK-NEXT:    [[EXTRACTVEC_I:%.*]] = shufflevector <3 x float> [[TMP0]], <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
+// CHECK-NEXT:    store <4 x float> [[EXTRACTVEC_I]], ptr addrspace(1) [[A]], align 16, !tbaa [[CHAR_TBAA8]]
 // CHECK-NEXT:    ret void
 //
 void kernel float4_to_float3(global float3 *a, global float4 *b) {
@@ -36,8 +36,8 @@ void kernel float4_to_float3(global float3 *a, global float4 *b) {
 // CHECK-SAME: ptr addrspace(1) noundef readonly align 16 captures(none) [[A:%.*]], ptr addrspace(1) noundef writeonly align 16 captures(none) initializes((0, 16)) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !kernel_arg_addr_space [[META3]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META11]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META7]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[TMP0:%.*]] = load <3 x float>, ptr addrspace(1) [[A]], align 16
-// CHECK-NEXT:    [[ASTYPE:%.*]] = shufflevector <3 x float> [[TMP0]], <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
-// CHECK-NEXT:    store <4 x float> [[ASTYPE]], ptr addrspace(1) [[B]], align 16, !tbaa [[TBAA8]]
+// CHECK-NEXT:    [[ASTYPE_I:%.*]] = shufflevector <3 x float> [[TMP0]], <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
+// CHECK-NEXT:    store <4 x float> [[ASTYPE_I]], ptr addrspace(1) [[B]], align 16, !tbaa [[CHAR_TBAA8]]
 // CHECK-NEXT:    ret void
 //
 void kernel float3_to_float4(global float3 *a, global float4 *b) {
@@ -49,7 +49,7 @@ void kernel float3_to_float4(global float3 *a, global float4 *b) {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[TMP0:%.*]] = load <3 x float>, ptr addrspace(1) [[A]], align 16
 // CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <3 x float> [[TMP0]], <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
-// CHECK-NEXT:    store <4 x float> [[TMP1]], ptr addrspace(1) [[B]], align 16, !tbaa [[TBAA8]]
+// CHECK-NEXT:    store <4 x float> [[TMP1]], ptr addrspace(1) [[B]], align 16, !tbaa [[CHAR_TBAA8]]
 // CHECK-NEXT:    ret void
 //
 void kernel float3_to_double2(global float3 *a, global double2 *b) {
@@ -59,9 +59,9 @@ void kernel float3_to_double2(global float3 *a, global double2 *b) {
 // CHECK-LABEL: define dso_local spir_kernel void @char8_to_short3(
 // CHECK-SAME: ptr addrspace(1) noundef writeonly align 8 captures(none) initializes((0, 8)) [[A:%.*]], ptr addrspace(1) noundef readonly align 8 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !kernel_arg_addr_space [[META3]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META16:![0-9]+]] !kernel_arg_type_qual [[META7]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = load <3 x i16>, ptr addrspace(1) [[B]], align 8, !tbaa [[TBAA8]]
-// CHECK-NEXT:    [[EXTRACTVEC:%.*]] = shufflevector <3 x i16> [[TMP0]], <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
-// CHECK-NEXT:    store <4 x i16> [[EXTRACTVEC]], ptr addrspace(1) [[A]], align 8, !tbaa [[TBAA8]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <3 x i16>, ptr addrspace(1) [[B]], align 8, !tbaa [[CHAR_TBAA8]]
+// CHECK-NEXT:    [[EXTRACTVEC_I:%.*]] = shufflevector <3 x i16> [[TMP0]], <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
+// CHECK-NEXT:    store <4 x i16> [[EXTRACTVEC_I]], ptr addrspace(1) [[A]], align 8, !tbaa [[CHAR_TBAA8]]
 // CHECK-NEXT:    ret void
 //
 void kernel char8_to_short3(global short3 *a, global char8 *b) {
@@ -69,10 +69,10 @@ void kernel char8_to_short3(global short3 *a, global char8 *b) {
 }
 
 // CHECK-LABEL: define dso_local spir_func void @from_char3(
-// CHECK-SAME: <3 x i8> noundef [[A:%.*]], ptr addrspace(1) noundef writeonly captures(none) initializes((0, 4)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] {
+// CHECK-SAME: <3 x i8> noundef [[A:%.*]], ptr addrspace(1) noundef writeonly captures(none) initializes((0, 4)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[EXTRACTVEC:%.*]] = shufflevector <3 x i8> [[A]], <3 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
-// CHECK-NEXT:    store <4 x i8> [[EXTRACTVEC]], ptr addrspace(1) [[OUT]], align 4, !tbaa [[TBAA17:![0-9]+]]
+// CHECK-NEXT:    store <4 x i8> [[EXTRACTVEC]], ptr addrspace(1) [[OUT]], align 4, !tbaa [[INT_TBAA17:![0-9]+]]
 // CHECK-NEXT:    ret void
 //
 void from_char3(char3 a, global int *out) {
@@ -80,10 +80,10 @@ void from_char3(char3 a, global int *out) {
 }
 
 // CHECK-LABEL: define dso_local spir_func void @from_short3(
-// CHECK-SAME: <3 x i16> noundef [[A:%.*]], ptr addrspace(1) noundef writeonly captures(none) initializes((0, 8)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR1]] {
+// CHECK-SAME: <3 x i16> noundef [[A:%.*]], ptr addrspace(1) noundef writeonly captures(none) initializes((0, 8)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR2]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[EXTRACTVEC:%.*]] = shufflevector <3 x i16> [[A]], <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
-// CHECK-NEXT:    store <4 x i16> [[EXTRACTVEC]], ptr addrspace(1) [[OUT]], align 8, !tbaa [[TBAA19:![0-9]+]]
+// CHECK-NEXT:    store <4 x i16> [[EXTRACTVEC]], ptr addrspace(1) [[OUT]], align 8, !tbaa [[LONG_TBAA19:![0-9]+]]
 // CHECK-NEXT:    ret void
 //
 void from_short3(short3 a, global long *out) {
@@ -91,11 +91,11 @@ void from_short3(short3 a, global long *out) {
 }
 
 // CHECK-LABEL: define dso_local spir_func void @scalar_to_char3(
-// CHECK-SAME: i32 noundef [[A:%.*]], ptr addrspace(1) noundef writeonly captures(none) initializes((0, 4)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR1]] {
+// CHECK-SAME: i32 noundef [[A:%.*]], ptr addrspace(1) noundef writeonly captures(none) initializes((0, 4)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR2]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32 [[A]] to <4 x i8>
 // CHECK-NEXT:    [[EXTRACTVEC:%.*]] = shufflevector <4 x i8> [[TMP0]], <4 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
-// CHECK-NEXT:    store <4 x i8> [[EXTRACTVEC]], ptr addrspace(1) [[OUT]], align 4, !tbaa [[TBAA8]]
+// CHECK-NEXT:    store <4 x i8> [[EXTRACTVEC]], ptr addrspace(1) [[OUT]], align 4, !tbaa [[CHAR_TBAA8]]
 // CHECK-NEXT:    ret void
 //
 void scalar_to_char3(int a, global char3 *out) {
@@ -103,11 +103,11 @@ void scalar_to_char3(int a, global char3 *out) {
 }
 
 // CHECK-LABEL: define dso_local spir_func void @scalar_to_short3(
-// CHECK-SAME: i64 noundef [[A:%.*]], ptr addrspace(1) noundef writeonly captures(none) initializes((0, 8)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR1]] {
+// CHECK-SAME: i64 noundef [[A:%.*]], ptr addrspace(1) noundef writeonly captures(none) initializes((0, 8)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR2]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i64 [[A]] to <4 x i16>
 // CHECK-NEXT:    [[EXTRACTVEC:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
-// CHECK-NEXT:    store <4 x i16> [[EXTRACTVEC]], ptr addrspace(1) [[OUT]], align 8, !tbaa [[TBAA8]]
+// CHECK-NEXT:    store <4 x i16> [[EXTRACTVEC]], ptr addrspace(1) [[OUT]], align 8, !tbaa [[CHAR_TBAA8]]
 // CHECK-NEXT:    ret void
 //
 void scalar_to_short3(long a, global short3 *out) {
@@ -120,7 +120,7 @@ void scalar_to_short3(long a, global short3 *out) {
 // CHECK: [[META5]] = !{!"float3*", !"float3*"}
 // CHECK: [[META6]] = !{!"float __attribute__((ext_vector_type(3)))*", !"float __attribute__((ext_vector_type(3)))*"}
 // CHECK: [[META7]] = !{!"", !""}
-// CHECK: [[TBAA8]] = !{[[META9:![0-9]+]], [[META9]], i64 0}
+// CHECK: [[CHAR_TBAA8]] = !{[[META9:![0-9]+]], [[META9]], i64 0}
 // CHECK: [[META9]] = !{!"omnipotent char", [[META10:![0-9]+]], i64 0}
 // CHECK: [[META10]] = !{!"Simple C/C++ TBAA"}
 // CHECK: [[META11]] = !{!"float3*", !"float4*"}
@@ -129,8 +129,8 @@ void scalar_to_short3(long a, global short3 *out) {
 // CHECK: [[META14]] = !{!"float __attribute__((ext_vector_type(3)))*", !"double __attribute__((ext_vector_type(2)))*"}
 // CHECK: [[META15]] = !{!"short3*", !"char8*"}
 // CHECK: [[META16]] = !{!"short __attribute__((ext_vector_type(3)))*", !"char __attribute__((ext_vector_type(8)))*"}
-// CHECK: [[TBAA17]] = !{[[META18:![0-9]+]], [[META18]], i64 0}
+// CHECK: [[INT_TBAA17]] = !{[[META18:![0-9]+]], [[META18]], i64 0}
 // CHECK: [[META18]] = !{!"int", [[META9]], i64 0}
-// CHECK: [[TBAA19]] = !{[[META20:![0-9]+]], [[META20]], i64 0}
+// CHECK: [[LONG_TBAA19]] = !{[[META20:![0-9]+]], [[META20]], i64 0}
 // CHECK: [[META20]] = !{!"long", [[META9]], i64 0}
 //.
diff --git a/clang/test/CodeGenOpenCLCXX/array-type-infinite-loop.clcpp b/clang/test/CodeGenOpenCLCXX/array-type-infinite-loop.clcpp
index 8d8f0b0..e932e75 100644
--- a/clang/test/CodeGenOpenCLCXX/array-type-infinite-loop.clcpp
+++ b/clang/test/CodeGenOpenCLCXX/array-type-infinite-loop.clcpp
@@ -1,12 +1,12 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
 //RUN: %clang_cc1 %s -triple spir -emit-llvm -O1 -o - | FileCheck %s
 
 // CHECK-LABEL: define dso_local spir_kernel void @test(
 // CHECK-SAME: ptr addrspace(1) noundef readonly align 8 captures(none) [[IN:%.*]], ptr addrspace(1) noundef writeonly align 8 captures(none) initializes((0, 8)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META4:![0-9]+]] !kernel_arg_access_qual [[META5:![0-9]+]] !kernel_arg_type [[META6:![0-9]+]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7:![0-9]+]] {
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(1) [[IN]], i32 8
-// CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr addrspace(1) [[ARRAYIDX1]], align 8, !tbaa [[TBAA8:![0-9]+]]
-// CHECK-NEXT:    store i64 [[TMP0]], ptr addrspace(1) [[OUT]], align 8, !tbaa [[TBAA8]]
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[ARRAYIDX1_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(1) [[IN]], i32 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr addrspace(1) [[ARRAYIDX1_I]], align 8, !tbaa [[LONG_TBAA8:![0-9]+]]
+// CHECK-NEXT:    store i64 [[TMP0]], ptr addrspace(1) [[OUT]], align 8, !tbaa [[LONG_TBAA8]]
 // CHECK-NEXT:    ret void
 //
 __kernel void test(__global long *In, __global long *Out) {
@@ -18,7 +18,7 @@ __kernel void test(__global long *In, __global long *Out) {
 // CHECK: [[META5]] = !{!"none", !"none"}
 // CHECK: [[META6]] = !{!"long*", !"long*"}
 // CHECK: [[META7]] = !{!"", !""}
-// CHECK: [[TBAA8]] = !{[[META9:![0-9]+]], [[META9]], i64 0}
+// CHECK: [[LONG_TBAA8]] = !{[[META9:![0-9]+]], [[META9]], i64 0}
 // CHECK: [[META9]] = !{!"long", [[META10:![0-9]+]], i64 0}
 // CHECK: [[META10]] = !{!"omnipotent char", [[META11:![0-9]+]], i64 0}
 // CHECK: [[META11]] = !{!"Simple C++ TBAA"}
diff --git a/clang/test/DebugInfo/Generic/unsigned-promotion-debuginfo.c b/clang/test/DebugInfo/Generic/unsigned-promotion-debuginfo.c
index 88e691d..6ca17e1 100644
--- a/clang/test/DebugInfo/Generic/unsigned-promotion-debuginfo.c
+++ b/clang/test/DebugInfo/Generic/unsigned-promotion-debuginfo.c
@@ -1,4 +1,4 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
 // RUN: %clang_cc1 -O2 -triple x86_64-linux-gnu -emit-llvm -o - %s \
 // RUN:   -fdebug-prefix-map=%S/= -fno-ident -fdebug-compilation-dir=%S -debug-info-kind=limited \
 // RUN:   -fsanitize-annotate-debug-info=signed-integer-overflow \
@@ -14,9 +14,9 @@ unsigned short si, sj, sk;
 // CHECKS-LABEL: define dso_local void @testshortmul(
 // CHECKS-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] !dbg [[DBG13:![0-9]+]] {
 // CHECKS-NEXT:  [[ENTRY:.*:]]
-// CHECKS-NEXT:    [[TMP0:%.*]] = load i16, ptr @sj, align 2, !dbg [[DBG16:![0-9]+]], !tbaa [[TBAA17:![0-9]+]]
+// CHECKS-NEXT:    [[TMP0:%.*]] = load i16, ptr @sj, align 2, !dbg [[DBG16:![0-9]+]], !tbaa [[SHORT_TBAA17:![0-9]+]]
 // CHECKS-NEXT:    [[CONV:%.*]] = zext i16 [[TMP0]] to i32, !dbg [[DBG16]]
-// CHECKS-NEXT:    [[TMP1:%.*]] = load i16, ptr @sk, align 2, !dbg [[DBG21:![0-9]+]], !tbaa [[TBAA17]]
+// CHECKS-NEXT:    [[TMP1:%.*]] = load i16, ptr @sk, align 2, !dbg [[DBG21:![0-9]+]], !tbaa [[SHORT_TBAA17]]
 // CHECKS-NEXT:    [[CONV1:%.*]] = zext i16 [[TMP1]] to i32, !dbg [[DBG21]]
 // CHECKS-NEXT:    [[TMP2:%.*]] = tail call { i32, i1 } @llvm.smul.with.overflow.i32(i32 [[CONV]], i32 [[CONV1]]), !dbg [[DBG22:![0-9]+]], !nosanitize [[META26:![0-9]+]]
 // CHECKS-NEXT:    [[TMP3:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1, !dbg [[DBG22]], !nosanitize [[META26]]
@@ -29,16 +29,16 @@ unsigned short si, sj, sk;
 // CHECKS:       [[CONT]]:
 // CHECKS-NEXT:    [[TMP6:%.*]] = extractvalue { i32, i1 } [[TMP2]], 0, !dbg [[DBG22]], !nosanitize [[META26]]
 // CHECKS-NEXT:    [[CONV2:%.*]] = trunc i32 [[TMP6]] to i16, !dbg [[DBG16]]
-// CHECKS-NEXT:    store i16 [[CONV2]], ptr @si, align 2, !dbg [[DBG28:![0-9]+]], !tbaa [[TBAA17]]
+// CHECKS-NEXT:    store i16 [[CONV2]], ptr @si, align 2, !dbg [[DBG28:![0-9]+]], !tbaa [[SHORT_TBAA17]]
 // CHECKS-NEXT:    ret void, !dbg [[DBG29:![0-9]+]]
 //
 // CHECKU-LABEL: define dso_local void @testshortmul(
 // CHECKU-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] !dbg [[DBG13:![0-9]+]] {
 // CHECKU-NEXT:  [[ENTRY:.*:]]
-// CHECKU-NEXT:    [[TMP0:%.*]] = load i16, ptr @sj, align 2, !dbg [[DBG16:![0-9]+]], !tbaa [[TBAA17:![0-9]+]]
-// CHECKU-NEXT:    [[TMP1:%.*]] = load i16, ptr @sk, align 2, !dbg [[DBG21:![0-9]+]], !tbaa [[TBAA17]]
+// CHECKU-NEXT:    [[TMP0:%.*]] = load i16, ptr @sj, align 2, !dbg [[DBG16:![0-9]+]], !tbaa [[SHORT_TBAA17:![0-9]+]]
+// CHECKU-NEXT:    [[TMP1:%.*]] = load i16, ptr @sk, align 2, !dbg [[DBG21:![0-9]+]], !tbaa [[SHORT_TBAA17]]
 // CHECKU-NEXT:    [[MUL:%.*]] = mul i16 [[TMP1]], [[TMP0]], !dbg [[DBG22:![0-9]+]]
-// CHECKU-NEXT:    store i16 [[MUL]], ptr @si, align 2, !dbg [[DBG23:![0-9]+]], !tbaa [[TBAA17]]
+// CHECKU-NEXT:    store i16 [[MUL]], ptr @si, align 2, !dbg [[DBG23:![0-9]+]], !tbaa [[SHORT_TBAA17]]
 // CHECKU-NEXT:    ret void, !dbg [[DBG24:![0-9]+]]
 //
 void testshortmul(void) {
@@ -50,7 +50,7 @@ void testshortmul(void) {
 // CHECKS: [[META0:![0-9]+]] = !DIGlobalVariableExpression(var: [[META1:![0-9]+]], expr: !DIExpression())
 // CHECKS: [[META1]] = distinct !DIGlobalVariable(name: "sj", scope: [[META2:![0-9]+]], file: [[META7:![0-9]+]], line: 12, type: [[META8:![0-9]+]], isLocal: false, isDefinition: true)
 // CHECKS: [[META2]] = distinct !DICompileUnit(language: DW_LANG_C11, file: [[META3:![0-9]+]], isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, globals: [[META4:![0-9]+]], splitDebugInlining: false, nameTableKind: None)
-// CHECKS: [[META3]] = !DIFile(filename: "<stdin>", directory: {{.*}})
+// CHECKS: [[META3]] = !DIFile(filename: "{{.*}}<stdin>", directory: {{.*}})
 // CHECKS: [[META4]] = !{[[META5:![0-9]+]], [[META0]], [[META9:![0-9]+]]}
 // CHECKS: [[META5]] = !DIGlobalVariableExpression(var: [[META6:![0-9]+]], expr: !DIExpression())
 // CHECKS: [[META6]] = distinct !DIGlobalVariable(name: "si", scope: [[META2]], file: [[META7]], line: 12, type: [[META8]], isLocal: false, isDefinition: true)
@@ -62,7 +62,7 @@ void testshortmul(void) {
 // CHECKS: [[META14]] = !DISubroutineType(types: [[META15:![0-9]+]])
 // CHECKS: [[META15]] = !{null}
 // CHECKS: [[DBG16]] = !DILocation(line: 47, column: 8, scope: [[DBG13]])
-// CHECKS: [[TBAA17]] = !{[[META18:![0-9]+]], [[META18]], i64 0}
+// CHECKS: [[SHORT_TBAA17]] = !{[[META18:![0-9]+]], [[META18]], i64 0}
 // CHECKS: [[META18]] = !{!"short", [[META19:![0-9]+]], i64 0}
 // CHECKS: [[META19]] = !{!"omnipotent char", [[META20:![0-9]+]], i64 0}
 // CHECKS: [[META20]] = !{!"Simple C/C++ TBAA"}
@@ -79,7 +79,7 @@ void testshortmul(void) {
 // CHECKU: [[META0:![0-9]+]] = !DIGlobalVariableExpression(var: [[META1:![0-9]+]], expr: !DIExpression())
 // CHECKU: [[META1]] = distinct !DIGlobalVariable(name: "sj", scope: [[META2:![0-9]+]], file: [[META7:![0-9]+]], line: 12, type: [[META8:![0-9]+]], isLocal: false, isDefinition: true)
 // CHECKU: [[META2]] = distinct !DICompileUnit(language: DW_LANG_C11, file: [[META3:![0-9]+]], isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, globals: [[META4:![0-9]+]], splitDebugInlining: false, nameTableKind: None)
-// CHECKU: [[META3]] = !DIFile(filename: "<stdin>", directory: {{.*}})
+// CHECKU: [[META3]] = !DIFile(filename: "{{.*}}<stdin>", directory: {{.*}})
 // CHECKU: [[META4]] = !{[[META5:![0-9]+]], [[META0]], [[META9:![0-9]+]]}
 // CHECKU: [[META5]] = !DIGlobalVariableExpression(var: [[META6:![0-9]+]], expr: !DIExpression())
 // CHECKU: [[META6]] = distinct !DIGlobalVariable(name: "si", scope: [[META2]], file: [[META7]], line: 12, type: [[META8]], isLocal: false, isDefinition: true)
@@ -91,7 +91,7 @@ void testshortmul(void) {
 // CHECKU: [[META14]] = !DISubroutineType(types: [[META15:![0-9]+]])
 // CHECKU: [[META15]] = !{null}
 // CHECKU: [[DBG16]] = !DILocation(line: 47, column: 8, scope: [[DBG13]])
-// CHECKU: [[TBAA17]] = !{[[META18:![0-9]+]], [[META18]], i64 0}
+// CHECKU: [[SHORT_TBAA17]] = !{[[META18:![0-9]+]], [[META18]], i64 0}
 // CHECKU: [[META18]] = !{!"short", [[META19:![0-9]+]], i64 0}
 // CHECKU: [[META19]] = !{!"omnipotent char", [[META20:![0-9]+]], i64 0}
 // CHECKU: [[META20]] = !{!"Simple C/C++ TBAA"}
diff --git a/clang/test/Driver/print-supported-extensions-riscv.c b/clang/test/Driver/print-supported-extensions-riscv.c
index 413275d..f619d32 100644
--- a/clang/test/Driver/print-supported-extensions-riscv.c
+++ b/clang/test/Driver/print-supported-extensions-riscv.c
@@ -212,6 +212,7 @@
 // CHECK-EMPTY:
 // CHECK-NEXT: Experimental extensions
 // CHECK-NEXT:     p                    0.15      'P' ('Base P' (Packed SIMD))
+// CHECK-NEXT:     zibi                 0.1       'Zibi' (Branch with Immediate)
 // CHECK-NEXT:     zicfilp              1.0       'Zicfilp' (Landing pad)
 // CHECK-NEXT:     zicfiss              1.0       'Zicfiss' (Shadow stack)
 // CHECK-NEXT:     zalasr               0.1       'Zalasr' (Load-Acquire and Store-Release Instructions)
diff --git a/clang/test/Headers/__clang_hip_math.hip b/clang/test/Headers/__clang_hip_math.hip
index 15bdb75..b88aa3c 100644
--- a/clang/test/Headers/__clang_hip_math.hip
+++ b/clang/test/Headers/__clang_hip_math.hip
@@ -1,4 +1,4 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
 // REQUIRES: amdgpu-registered-target
 // REQUIRES: spirv-registered-target
 
@@ -47,41 +47,43 @@
 #define BOOL_TYPE int
 typedef unsigned long long uint64_t;
 
-// CHECK-LABEL: @test___make_mantissa_base8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[P:%.*]], align 1, !tbaa [[TBAA4:![0-9]+]]
+// CHECK-LABEL: define dso_local i64 @test___make_mantissa_base8(
+// CHECK-SAME: ptr noundef readonly captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*]]:
+// CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[P]], align 1, !tbaa [[CHAR_TBAA4:![0-9]+]]
 // CHECK-NEXT:    [[CMP_NOT_I1:%.*]] = icmp eq i8 [[TMP0]], 0
-// CHECK-NEXT:    br i1 [[CMP_NOT_I1]], label [[_ZL21__MAKE_MANTISSA_BASE8PKC_EXIT:%.*]], label [[WHILE_BODY_I:%.*]]
-// CHECK:       while.body.i:
-// CHECK-NEXT:    [[TMP1:%.*]] = phi i8 [ [[TMP3:%.*]], [[IF_THEN_I:%.*]] ], [ [[TMP0]], [[ENTRY:%.*]] ]
-// CHECK-NEXT:    [[__R_0_I3:%.*]] = phi i64 [ [[SUB_I:%.*]], [[IF_THEN_I]] ], [ 0, [[ENTRY]] ]
-// CHECK-NEXT:    [[__TAGP_ADDR_0_I2:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], [[IF_THEN_I]] ], [ [[P]], [[ENTRY]] ]
+// CHECK-NEXT:    br i1 [[CMP_NOT_I1]], label %[[_ZL21__MAKE_MANTISSA_BASE8PKC_EXIT:.*]], label %[[WHILE_BODY_I:.*]]
+// CHECK:       [[WHILE_BODY_I]]:
+// CHECK-NEXT:    [[TMP1:%.*]] = phi i8 [ [[TMP3:%.*]], %[[IF_THEN_I:.*]] ], [ [[TMP0]], %[[ENTRY]] ]
+// CHECK-NEXT:    [[__R_0_I3:%.*]] = phi i64 [ [[SUB_I:%.*]], %[[IF_THEN_I]] ], [ 0, %[[ENTRY]] ]
+// CHECK-NEXT:    [[__TAGP_ADDR_0_I2:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], %[[IF_THEN_I]] ], [ [[P]], %[[ENTRY]] ]
 // CHECK-NEXT:    [[TMP2:%.*]] = and i8 [[TMP1]], -8
 // CHECK-NEXT:    [[OR_COND_I:%.*]] = icmp eq i8 [[TMP2]], 48
-// CHECK-NEXT:    br i1 [[OR_COND_I]], label [[IF_THEN_I]], label [[_ZL21__MAKE_MANTISSA_BASE8PKC_EXIT]]
-// CHECK:       if.then.i:
+// CHECK-NEXT:    br i1 [[OR_COND_I]], label %[[IF_THEN_I]], label %[[_ZL21__MAKE_MANTISSA_BASE8PKC_EXIT]]
+// CHECK:       [[IF_THEN_I]]:
 // CHECK-NEXT:    [[MUL_I:%.*]] = shl i64 [[__R_0_I3]], 3
 // CHECK-NEXT:    [[CONV5_I:%.*]] = zext nneg i8 [[TMP1]] to i64
 // CHECK-NEXT:    [[ADD_I:%.*]] = add i64 [[MUL_I]], -48
 // CHECK-NEXT:    [[SUB_I]] = add i64 [[ADD_I]], [[CONV5_I]]
 // CHECK-NEXT:    [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I2]], i64 1
-// CHECK-NEXT:    [[TMP3]] = load i8, ptr [[INCDEC_PTR_I]], align 1, !tbaa [[TBAA4]]
+// CHECK-NEXT:    [[TMP3]] = load i8, ptr [[INCDEC_PTR_I]], align 1, !tbaa [[CHAR_TBAA4]]
 // CHECK-NEXT:    [[CMP_NOT_I:%.*]] = icmp eq i8 [[TMP3]], 0
-// CHECK-NEXT:    br i1 [[CMP_NOT_I]], label [[_ZL21__MAKE_MANTISSA_BASE8PKC_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP7:![0-9]+]]
-// CHECK:       _ZL21__make_mantissa_base8PKc.exit:
-// CHECK-NEXT:    [[RETVAL_2_I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ 0, [[WHILE_BODY_I]] ], [ [[SUB_I]], [[IF_THEN_I]] ]
+// CHECK-NEXT:    br i1 [[CMP_NOT_I]], label %[[_ZL21__MAKE_MANTISSA_BASE8PKC_EXIT]], label %[[WHILE_BODY_I]], !llvm.loop [[LOOP7:![0-9]+]]
+// CHECK:       [[_ZL21__MAKE_MANTISSA_BASE8PKC_EXIT]]:
+// CHECK-NEXT:    [[RETVAL_2_I:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ 0, %[[WHILE_BODY_I]] ], [ [[SUB_I]], %[[IF_THEN_I]] ]
 // CHECK-NEXT:    ret i64 [[RETVAL_2_I]]
 //
-// AMDGCNSPIRV-LABEL: @test___make_mantissa_base8(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    br label [[WHILE_COND_I:%.*]]
-// AMDGCNSPIRV:       while.cond.i:
-// AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_0_I:%.*]] = phi ptr addrspace(4) [ [[P:%.*]], [[ENTRY:%.*]] ], [ [[__TAGP_ADDR_1_I:%.*]], [[WHILE_BODY_I:%.*]] ]
-// AMDGCNSPIRV-NEXT:    [[__R_0_I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[__R_1_I:%.*]], [[WHILE_BODY_I]] ]
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I]], align 1, !tbaa [[TBAA5:![0-9]+]]
+// AMDGCNSPIRV-LABEL: define spir_func i64 @test___make_mantissa_base8(
+// AMDGCNSPIRV-SAME: ptr addrspace(4) noundef readonly captures(none) [[P:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR2:[0-9]+]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*]]:
+// AMDGCNSPIRV-NEXT:    br label %[[WHILE_COND_I:.*]]
+// AMDGCNSPIRV:       [[WHILE_COND_I]]:
+// AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_0_I:%.*]] = phi ptr addrspace(4) [ [[P]], %[[ENTRY]] ], [ [[__TAGP_ADDR_1_I:%.*]], %[[WHILE_BODY_I:.*]] ]
+// AMDGCNSPIRV-NEXT:    [[__R_0_I:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[__R_1_I:%.*]], %[[WHILE_BODY_I]] ]
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I]], align 1, !tbaa [[CHAR_TBAA5:![0-9]+]]
 // AMDGCNSPIRV-NEXT:    [[CMP_NOT_I:%.*]] = icmp eq i8 [[TMP0]], 0
-// AMDGCNSPIRV-NEXT:    br i1 [[CMP_NOT_I]], label [[_ZL21__MAKE_MANTISSA_BASE8PKC_EXIT:%.*]], label [[WHILE_BODY_I]]
-// AMDGCNSPIRV:       while.body.i:
+// AMDGCNSPIRV-NEXT:    br i1 [[CMP_NOT_I]], label %[[_ZL21__MAKE_MANTISSA_BASE8PKC_EXIT:.*]], label %[[WHILE_BODY_I]]
+// AMDGCNSPIRV:       [[WHILE_BODY_I]]:
 // AMDGCNSPIRV-NEXT:    [[TMP1:%.*]] = and i8 [[TMP0]], -8
 // AMDGCNSPIRV-NEXT:    [[OR_COND_I:%.*]] = icmp eq i8 [[TMP1]], 48
 // AMDGCNSPIRV-NEXT:    [[MUL_I:%.*]] = shl i64 [[__R_0_I]], 3
@@ -91,50 +93,52 @@ typedef unsigned long long uint64_t;
 // AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_1_I_IDX:%.*]] = zext i1 [[OR_COND_I]] to i64
 // AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_1_I]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__TAGP_ADDR_0_I]], i64 [[__TAGP_ADDR_1_I_IDX]]
 // AMDGCNSPIRV-NEXT:    [[__R_1_I]] = select i1 [[OR_COND_I]], i64 [[SUB_I]], i64 [[__R_0_I]]
-// AMDGCNSPIRV-NEXT:    br i1 [[OR_COND_I]], label [[WHILE_COND_I]], label [[_ZL21__MAKE_MANTISSA_BASE8PKC_EXIT]], !llvm.loop [[LOOP8:![0-9]+]]
-// AMDGCNSPIRV:       _ZL21__make_mantissa_base8PKc.exit:
-// AMDGCNSPIRV-NEXT:    [[RETVAL_2_I:%.*]] = phi i64 [ 0, [[WHILE_BODY_I]] ], [ [[__R_0_I]], [[WHILE_COND_I]] ]
+// AMDGCNSPIRV-NEXT:    br i1 [[OR_COND_I]], label %[[WHILE_COND_I]], label %[[_ZL21__MAKE_MANTISSA_BASE8PKC_EXIT]], !llvm.loop [[LOOP8:![0-9]+]]
+// AMDGCNSPIRV:       [[_ZL21__MAKE_MANTISSA_BASE8PKC_EXIT]]:
+// AMDGCNSPIRV-NEXT:    [[RETVAL_2_I:%.*]] = phi i64 [ 0, %[[WHILE_BODY_I]] ], [ [[__R_0_I]], %[[WHILE_COND_I]] ]
 // AMDGCNSPIRV-NEXT:    ret i64 [[RETVAL_2_I]]
 //
 extern "C" __device__ uint64_t test___make_mantissa_base8(const char *p) {
   return __make_mantissa_base8(p);
 }
 
-// CHECK-LABEL: @test___make_mantissa_base10(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[P:%.*]], align 1, !tbaa [[TBAA4]]
+// CHECK-LABEL: define dso_local i64 @test___make_mantissa_base10(
+// CHECK-SAME: ptr noundef readonly captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*]]:
+// CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[P]], align 1, !tbaa [[CHAR_TBAA4]]
 // CHECK-NEXT:    [[CMP_NOT_I1:%.*]] = icmp eq i8 [[TMP0]], 0
-// CHECK-NEXT:    br i1 [[CMP_NOT_I1]], label [[_ZL22__MAKE_MANTISSA_BASE10PKC_EXIT:%.*]], label [[WHILE_BODY_I:%.*]]
-// CHECK:       while.body.i:
-// CHECK-NEXT:    [[TMP1:%.*]] = phi i8 [ [[TMP3:%.*]], [[IF_THEN_I:%.*]] ], [ [[TMP0]], [[ENTRY:%.*]] ]
-// CHECK-NEXT:    [[__R_0_I3:%.*]] = phi i64 [ [[SUB_I:%.*]], [[IF_THEN_I]] ], [ 0, [[ENTRY]] ]
-// CHECK-NEXT:    [[__TAGP_ADDR_0_I2:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], [[IF_THEN_I]] ], [ [[P]], [[ENTRY]] ]
+// CHECK-NEXT:    br i1 [[CMP_NOT_I1]], label %[[_ZL22__MAKE_MANTISSA_BASE10PKC_EXIT:.*]], label %[[WHILE_BODY_I:.*]]
+// CHECK:       [[WHILE_BODY_I]]:
+// CHECK-NEXT:    [[TMP1:%.*]] = phi i8 [ [[TMP3:%.*]], %[[IF_THEN_I:.*]] ], [ [[TMP0]], %[[ENTRY]] ]
+// CHECK-NEXT:    [[__R_0_I3:%.*]] = phi i64 [ [[SUB_I:%.*]], %[[IF_THEN_I]] ], [ 0, %[[ENTRY]] ]
+// CHECK-NEXT:    [[__TAGP_ADDR_0_I2:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], %[[IF_THEN_I]] ], [ [[P]], %[[ENTRY]] ]
 // CHECK-NEXT:    [[TMP2:%.*]] = add i8 [[TMP1]], -48
 // CHECK-NEXT:    [[OR_COND_I:%.*]] = icmp ult i8 [[TMP2]], 10
-// CHECK-NEXT:    br i1 [[OR_COND_I]], label [[IF_THEN_I]], label [[_ZL22__MAKE_MANTISSA_BASE10PKC_EXIT]]
-// CHECK:       if.then.i:
+// CHECK-NEXT:    br i1 [[OR_COND_I]], label %[[IF_THEN_I]], label %[[_ZL22__MAKE_MANTISSA_BASE10PKC_EXIT]]
+// CHECK:       [[IF_THEN_I]]:
 // CHECK-NEXT:    [[MUL_I:%.*]] = mul i64 [[__R_0_I3]], 10
 // CHECK-NEXT:    [[CONV5_I:%.*]] = zext nneg i8 [[TMP1]] to i64
 // CHECK-NEXT:    [[ADD_I:%.*]] = add i64 [[MUL_I]], -48
 // CHECK-NEXT:    [[SUB_I]] = add i64 [[ADD_I]], [[CONV5_I]]
 // CHECK-NEXT:    [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I2]], i64 1
-// CHECK-NEXT:    [[TMP3]] = load i8, ptr [[INCDEC_PTR_I]], align 1, !tbaa [[TBAA4]]
+// CHECK-NEXT:    [[TMP3]] = load i8, ptr [[INCDEC_PTR_I]], align 1, !tbaa [[CHAR_TBAA4]]
 // CHECK-NEXT:    [[CMP_NOT_I:%.*]] = icmp eq i8 [[TMP3]], 0
-// CHECK-NEXT:    br i1 [[CMP_NOT_I]], label [[_ZL22__MAKE_MANTISSA_BASE10PKC_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP10:![0-9]+]]
-// CHECK:       _ZL22__make_mantissa_base10PKc.exit:
-// CHECK-NEXT:    [[RETVAL_2_I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ 0, [[WHILE_BODY_I]] ], [ [[SUB_I]], [[IF_THEN_I]] ]
+// CHECK-NEXT:    br i1 [[CMP_NOT_I]], label %[[_ZL22__MAKE_MANTISSA_BASE10PKC_EXIT]], label %[[WHILE_BODY_I]], !llvm.loop [[LOOP10:![0-9]+]]
+// CHECK:       [[_ZL22__MAKE_MANTISSA_BASE10PKC_EXIT]]:
+// CHECK-NEXT:    [[RETVAL_2_I:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ 0, %[[WHILE_BODY_I]] ], [ [[SUB_I]], %[[IF_THEN_I]] ]
 // CHECK-NEXT:    ret i64 [[RETVAL_2_I]]
 //
-// AMDGCNSPIRV-LABEL: @test___make_mantissa_base10(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    br label [[WHILE_COND_I:%.*]]
-// AMDGCNSPIRV:       while.cond.i:
-// AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_0_I:%.*]] = phi ptr addrspace(4) [ [[P:%.*]], [[ENTRY:%.*]] ], [ [[__TAGP_ADDR_1_I:%.*]], [[WHILE_BODY_I:%.*]] ]
-// AMDGCNSPIRV-NEXT:    [[__R_0_I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[__R_1_I:%.*]], [[WHILE_BODY_I]] ]
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I]], align 1, !tbaa [[TBAA5]]
+// AMDGCNSPIRV-LABEL: define spir_func i64 @test___make_mantissa_base10(
+// AMDGCNSPIRV-SAME: ptr addrspace(4) noundef readonly captures(none) [[P:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR2]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*]]:
+// AMDGCNSPIRV-NEXT:    br label %[[WHILE_COND_I:.*]]
+// AMDGCNSPIRV:       [[WHILE_COND_I]]:
+// AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_0_I:%.*]] = phi ptr addrspace(4) [ [[P]], %[[ENTRY]] ], [ [[__TAGP_ADDR_1_I:%.*]], %[[WHILE_BODY_I:.*]] ]
+// AMDGCNSPIRV-NEXT:    [[__R_0_I:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[__R_1_I:%.*]], %[[WHILE_BODY_I]] ]
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I]], align 1, !tbaa [[CHAR_TBAA5]]
 // AMDGCNSPIRV-NEXT:    [[CMP_NOT_I:%.*]] = icmp eq i8 [[TMP0]], 0
-// AMDGCNSPIRV-NEXT:    br i1 [[CMP_NOT_I]], label [[_ZL22__MAKE_MANTISSA_BASE10PKC_EXIT:%.*]], label [[WHILE_BODY_I]]
-// AMDGCNSPIRV:       while.body.i:
+// AMDGCNSPIRV-NEXT:    br i1 [[CMP_NOT_I]], label %[[_ZL22__MAKE_MANTISSA_BASE10PKC_EXIT:.*]], label %[[WHILE_BODY_I]]
+// AMDGCNSPIRV:       [[WHILE_BODY_I]]:
 // AMDGCNSPIRV-NEXT:    [[TMP1:%.*]] = add i8 [[TMP0]], -48
 // AMDGCNSPIRV-NEXT:    [[OR_COND_I:%.*]] = icmp ult i8 [[TMP1]], 10
 // AMDGCNSPIRV-NEXT:    [[MUL_I:%.*]] = mul i64 [[__R_0_I]], 10
@@ -144,220 +148,224 @@ extern "C" __device__ uint64_t test___make_mantissa_base8(const char *p) {
 // AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_1_I_IDX:%.*]] = zext i1 [[OR_COND_I]] to i64
 // AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_1_I]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__TAGP_ADDR_0_I]], i64 [[__TAGP_ADDR_1_I_IDX]]
 // AMDGCNSPIRV-NEXT:    [[__R_1_I]] = select i1 [[OR_COND_I]], i64 [[SUB_I]], i64 [[__R_0_I]]
-// AMDGCNSPIRV-NEXT:    br i1 [[OR_COND_I]], label [[WHILE_COND_I]], label [[_ZL22__MAKE_MANTISSA_BASE10PKC_EXIT]], !llvm.loop [[LOOP11:![0-9]+]]
-// AMDGCNSPIRV:       _ZL22__make_mantissa_base10PKc.exit:
-// AMDGCNSPIRV-NEXT:    [[RETVAL_2_I:%.*]] = phi i64 [ 0, [[WHILE_BODY_I]] ], [ [[__R_0_I]], [[WHILE_COND_I]] ]
+// AMDGCNSPIRV-NEXT:    br i1 [[OR_COND_I]], label %[[WHILE_COND_I]], label %[[_ZL22__MAKE_MANTISSA_BASE10PKC_EXIT]], !llvm.loop [[LOOP11:![0-9]+]]
+// AMDGCNSPIRV:       [[_ZL22__MAKE_MANTISSA_BASE10PKC_EXIT]]:
+// AMDGCNSPIRV-NEXT:    [[RETVAL_2_I:%.*]] = phi i64 [ 0, %[[WHILE_BODY_I]] ], [ [[__R_0_I]], %[[WHILE_COND_I]] ]
 // AMDGCNSPIRV-NEXT:    ret i64 [[RETVAL_2_I]]
 //
 extern "C" __device__ uint64_t test___make_mantissa_base10(const char *p) {
   return __make_mantissa_base10(p);
 }
 
-// CHECK-LABEL: @test___make_mantissa_base16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[P:%.*]], align 1, !tbaa [[TBAA4]]
+// CHECK-LABEL: define dso_local i64 @test___make_mantissa_base16(
+// CHECK-SAME: ptr noundef readonly captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*]]:
+// CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[P]], align 1, !tbaa [[CHAR_TBAA4]]
 // CHECK-NEXT:    [[CMP_NOT_I1:%.*]] = icmp eq i8 [[TMP0]], 0
-// CHECK-NEXT:    br i1 [[CMP_NOT_I1]], label [[_ZL22__MAKE_MANTISSA_BASE16PKC_EXIT:%.*]], label [[WHILE_BODY_I:%.*]]
-// CHECK:       while.body.i:
-// CHECK-NEXT:    [[TMP1:%.*]] = phi i8 [ [[TMP5:%.*]], [[IF_END31_I:%.*]] ], [ [[TMP0]], [[ENTRY:%.*]] ]
-// CHECK-NEXT:    [[__R_0_I3:%.*]] = phi i64 [ [[ADD28_I:%.*]], [[IF_END31_I]] ], [ 0, [[ENTRY]] ]
-// CHECK-NEXT:    [[__TAGP_ADDR_0_I2:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], [[IF_END31_I]] ], [ [[P]], [[ENTRY]] ]
+// CHECK-NEXT:    br i1 [[CMP_NOT_I1]], label %[[_ZL22__MAKE_MANTISSA_BASE16PKC_EXIT:.*]], label %[[WHILE_BODY_I:.*]]
+// CHECK:       [[WHILE_BODY_I]]:
+// CHECK-NEXT:    [[TMP1:%.*]] = phi i8 [ [[TMP5:%.*]], %[[IF_END31_I:.*]] ], [ [[TMP0]], %[[ENTRY]] ]
+// CHECK-NEXT:    [[__R_0_I3:%.*]] = phi i64 [ [[ADD28_I:%.*]], %[[IF_END31_I]] ], [ 0, %[[ENTRY]] ]
+// CHECK-NEXT:    [[__TAGP_ADDR_0_I2:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], %[[IF_END31_I]] ], [ [[P]], %[[ENTRY]] ]
 // CHECK-NEXT:    [[TMP2:%.*]] = add i8 [[TMP1]], -48
 // CHECK-NEXT:    [[OR_COND_I:%.*]] = icmp ult i8 [[TMP2]], 10
-// CHECK-NEXT:    br i1 [[OR_COND_I]], label [[IF_END31_I]], label [[IF_ELSE_I:%.*]]
-// CHECK:       if.else.i:
+// CHECK-NEXT:    br i1 [[OR_COND_I]], label %[[IF_END31_I]], label %[[IF_ELSE_I:.*]]
+// CHECK:       [[IF_ELSE_I]]:
 // CHECK-NEXT:    [[TMP3:%.*]] = add i8 [[TMP1]], -97
 // CHECK-NEXT:    [[OR_COND33_I:%.*]] = icmp ult i8 [[TMP3]], 6
-// CHECK-NEXT:    br i1 [[OR_COND33_I]], label [[IF_END31_I]], label [[IF_ELSE17_I:%.*]]
-// CHECK:       if.else17.i:
+// CHECK-NEXT:    br i1 [[OR_COND33_I]], label %[[IF_END31_I]], label %[[IF_ELSE17_I:.*]]
+// CHECK:       [[IF_ELSE17_I]]:
 // CHECK-NEXT:    [[TMP4:%.*]] = add i8 [[TMP1]], -65
 // CHECK-NEXT:    [[OR_COND34_I:%.*]] = icmp ult i8 [[TMP4]], 6
-// CHECK-NEXT:    br i1 [[OR_COND34_I]], label [[IF_END31_I]], label [[_ZL22__MAKE_MANTISSA_BASE16PKC_EXIT]]
-// CHECK:       if.end31.i:
-// CHECK-NEXT:    [[DOTSINK:%.*]] = phi i64 [ -48, [[WHILE_BODY_I]] ], [ -87, [[IF_ELSE_I]] ], [ -55, [[IF_ELSE17_I]] ]
+// CHECK-NEXT:    br i1 [[OR_COND34_I]], label %[[IF_END31_I]], label %[[_ZL22__MAKE_MANTISSA_BASE16PKC_EXIT]]
+// CHECK:       [[IF_END31_I]]:
+// CHECK-NEXT:    [[DOTSINK:%.*]] = phi i64 [ -48, %[[WHILE_BODY_I]] ], [ -87, %[[IF_ELSE_I]] ], [ -55, %[[IF_ELSE17_I]] ]
 // CHECK-NEXT:    [[MUL24_I:%.*]] = shl i64 [[__R_0_I3]], 4
 // CHECK-NEXT:    [[CONV25_I:%.*]] = zext nneg i8 [[TMP1]] to i64
 // CHECK-NEXT:    [[ADD26_I:%.*]] = add i64 [[MUL24_I]], [[DOTSINK]]
 // CHECK-NEXT:    [[ADD28_I]] = add i64 [[ADD26_I]], [[CONV25_I]]
 // CHECK-NEXT:    [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I2]], i64 1
-// CHECK-NEXT:    [[TMP5]] = load i8, ptr [[INCDEC_PTR_I]], align 1, !tbaa [[TBAA4]]
+// CHECK-NEXT:    [[TMP5]] = load i8, ptr [[INCDEC_PTR_I]], align 1, !tbaa [[CHAR_TBAA4]]
 // CHECK-NEXT:    [[CMP_NOT_I:%.*]] = icmp eq i8 [[TMP5]], 0
-// CHECK-NEXT:    br i1 [[CMP_NOT_I]], label [[_ZL22__MAKE_MANTISSA_BASE16PKC_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP11:![0-9]+]]
-// CHECK:       _ZL22__make_mantissa_base16PKc.exit:
-// CHECK-NEXT:    [[RETVAL_2_I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ 0, [[IF_ELSE17_I]] ], [ [[ADD28_I]], [[IF_END31_I]] ]
+// CHECK-NEXT:    br i1 [[CMP_NOT_I]], label %[[_ZL22__MAKE_MANTISSA_BASE16PKC_EXIT]], label %[[WHILE_BODY_I]], !llvm.loop [[LOOP11:![0-9]+]]
+// CHECK:       [[_ZL22__MAKE_MANTISSA_BASE16PKC_EXIT]]:
+// CHECK-NEXT:    [[RETVAL_2_I:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ 0, %[[IF_ELSE17_I]] ], [ [[ADD28_I]], %[[IF_END31_I]] ]
 // CHECK-NEXT:    ret i64 [[RETVAL_2_I]]
 //
-// AMDGCNSPIRV-LABEL: @test___make_mantissa_base16(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = load i8, ptr addrspace(4) [[P:%.*]], align 1, !tbaa [[TBAA5]]
+// AMDGCNSPIRV-LABEL: define spir_func i64 @test___make_mantissa_base16(
+// AMDGCNSPIRV-SAME: ptr addrspace(4) noundef readonly captures(none) [[P:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR2]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*]]:
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = load i8, ptr addrspace(4) [[P]], align 1, !tbaa [[CHAR_TBAA5]]
 // AMDGCNSPIRV-NEXT:    [[CMP_NOT_I1:%.*]] = icmp eq i8 [[TMP0]], 0
-// AMDGCNSPIRV-NEXT:    br i1 [[CMP_NOT_I1]], label [[_ZL22__MAKE_MANTISSA_BASE16PKC_EXIT:%.*]], label [[WHILE_BODY_I:%.*]]
-// AMDGCNSPIRV:       while.body.i:
-// AMDGCNSPIRV-NEXT:    [[TMP1:%.*]] = phi i8 [ [[TMP5:%.*]], [[IF_END31_I:%.*]] ], [ [[TMP0]], [[ENTRY:%.*]] ]
-// AMDGCNSPIRV-NEXT:    [[__R_0_I3:%.*]] = phi i64 [ [[ADD28_I:%.*]], [[IF_END31_I]] ], [ 0, [[ENTRY]] ]
-// AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_0_I2:%.*]] = phi ptr addrspace(4) [ [[INCDEC_PTR_I:%.*]], [[IF_END31_I]] ], [ [[P]], [[ENTRY]] ]
+// AMDGCNSPIRV-NEXT:    br i1 [[CMP_NOT_I1]], label %[[_ZL22__MAKE_MANTISSA_BASE16PKC_EXIT:.*]], label %[[WHILE_BODY_I:.*]]
+// AMDGCNSPIRV:       [[WHILE_BODY_I]]:
+// AMDGCNSPIRV-NEXT:    [[TMP1:%.*]] = phi i8 [ [[TMP5:%.*]], %[[IF_END31_I:.*]] ], [ [[TMP0]], %[[ENTRY]] ]
+// AMDGCNSPIRV-NEXT:    [[__R_0_I3:%.*]] = phi i64 [ [[ADD28_I:%.*]], %[[IF_END31_I]] ], [ 0, %[[ENTRY]] ]
+// AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_0_I2:%.*]] = phi ptr addrspace(4) [ [[INCDEC_PTR_I:%.*]], %[[IF_END31_I]] ], [ [[P]], %[[ENTRY]] ]
 // AMDGCNSPIRV-NEXT:    [[TMP2:%.*]] = add i8 [[TMP1]], -48
 // AMDGCNSPIRV-NEXT:    [[OR_COND_I:%.*]] = icmp ult i8 [[TMP2]], 10
-// AMDGCNSPIRV-NEXT:    br i1 [[OR_COND_I]], label [[IF_END31_I]], label [[IF_ELSE_I:%.*]]
-// AMDGCNSPIRV:       if.else.i:
+// AMDGCNSPIRV-NEXT:    br i1 [[OR_COND_I]], label %[[IF_END31_I]], label %[[IF_ELSE_I:.*]]
+// AMDGCNSPIRV:       [[IF_ELSE_I]]:
 // AMDGCNSPIRV-NEXT:    [[TMP3:%.*]] = add i8 [[TMP1]], -97
 // AMDGCNSPIRV-NEXT:    [[OR_COND33_I:%.*]] = icmp ult i8 [[TMP3]], 6
-// AMDGCNSPIRV-NEXT:    br i1 [[OR_COND33_I]], label [[IF_END31_I]], label [[IF_ELSE17_I:%.*]]
-// AMDGCNSPIRV:       if.else17.i:
+// AMDGCNSPIRV-NEXT:    br i1 [[OR_COND33_I]], label %[[IF_END31_I]], label %[[IF_ELSE17_I:.*]]
+// AMDGCNSPIRV:       [[IF_ELSE17_I]]:
 // AMDGCNSPIRV-NEXT:    [[TMP4:%.*]] = add i8 [[TMP1]], -65
 // AMDGCNSPIRV-NEXT:    [[OR_COND34_I:%.*]] = icmp ult i8 [[TMP4]], 6
-// AMDGCNSPIRV-NEXT:    br i1 [[OR_COND34_I]], label [[IF_END31_I]], label [[_ZL22__MAKE_MANTISSA_BASE16PKC_EXIT]]
-// AMDGCNSPIRV:       if.end31.i:
-// AMDGCNSPIRV-NEXT:    [[DOTSINK:%.*]] = phi i64 [ -48, [[WHILE_BODY_I]] ], [ -87, [[IF_ELSE_I]] ], [ -55, [[IF_ELSE17_I]] ]
+// AMDGCNSPIRV-NEXT:    br i1 [[OR_COND34_I]], label %[[IF_END31_I]], label %[[_ZL22__MAKE_MANTISSA_BASE16PKC_EXIT]]
+// AMDGCNSPIRV:       [[IF_END31_I]]:
+// AMDGCNSPIRV-NEXT:    [[DOTSINK:%.*]] = phi i64 [ -48, %[[WHILE_BODY_I]] ], [ -87, %[[IF_ELSE_I]] ], [ -55, %[[IF_ELSE17_I]] ]
 // AMDGCNSPIRV-NEXT:    [[MUL24_I:%.*]] = shl i64 [[__R_0_I3]], 4
 // AMDGCNSPIRV-NEXT:    [[CONV25_I:%.*]] = zext nneg i8 [[TMP1]] to i64
 // AMDGCNSPIRV-NEXT:    [[ADD26_I:%.*]] = add i64 [[MUL24_I]], [[DOTSINK]]
 // AMDGCNSPIRV-NEXT:    [[ADD28_I]] = add i64 [[ADD26_I]], [[CONV25_I]]
 // AMDGCNSPIRV-NEXT:    [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__TAGP_ADDR_0_I2]], i64 1
-// AMDGCNSPIRV-NEXT:    [[TMP5]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I]], align 1, !tbaa [[TBAA5]]
+// AMDGCNSPIRV-NEXT:    [[TMP5]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I]], align 1, !tbaa [[CHAR_TBAA5]]
 // AMDGCNSPIRV-NEXT:    [[CMP_NOT_I:%.*]] = icmp eq i8 [[TMP5]], 0
-// AMDGCNSPIRV-NEXT:    br i1 [[CMP_NOT_I]], label [[_ZL22__MAKE_MANTISSA_BASE16PKC_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP12:![0-9]+]]
-// AMDGCNSPIRV:       _ZL22__make_mantissa_base16PKc.exit:
-// AMDGCNSPIRV-NEXT:    [[RETVAL_2_I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ 0, [[IF_ELSE17_I]] ], [ [[ADD28_I]], [[IF_END31_I]] ]
+// AMDGCNSPIRV-NEXT:    br i1 [[CMP_NOT_I]], label %[[_ZL22__MAKE_MANTISSA_BASE16PKC_EXIT]], label %[[WHILE_BODY_I]], !llvm.loop [[LOOP12:![0-9]+]]
+// AMDGCNSPIRV:       [[_ZL22__MAKE_MANTISSA_BASE16PKC_EXIT]]:
+// AMDGCNSPIRV-NEXT:    [[RETVAL_2_I:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ 0, %[[IF_ELSE17_I]] ], [ [[ADD28_I]], %[[IF_END31_I]] ]
 // AMDGCNSPIRV-NEXT:    ret i64 [[RETVAL_2_I]]
 //
 extern "C" __device__ uint64_t test___make_mantissa_base16(const char *p) {
   return __make_mantissa_base16(p);
 }
 
-// CHECK-LABEL: @test___make_mantissa(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[P:%.*]], align 1, !tbaa [[TBAA4]]
+// CHECK-LABEL: define dso_local i64 @test___make_mantissa(
+// CHECK-SAME: ptr noundef readonly captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[P]], align 1, !tbaa [[CHAR_TBAA4]]
 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp eq i8 [[TMP0]], 48
-// CHECK-NEXT:    br i1 [[CMP_I]], label [[IF_THEN_I:%.*]], label [[WHILE_COND_I14_I_PREHEADER:%.*]]
-// CHECK:       while.cond.i14.i.preheader:
-// CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr [[P]], align 1, !tbaa [[TBAA4]]
+// CHECK-NEXT:    br i1 [[CMP_I]], label %[[IF_THEN_I:.*]], label %[[WHILE_COND_I14_I_PREHEADER:.*]]
+// CHECK:       [[WHILE_COND_I14_I_PREHEADER]]:
+// CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr [[P]], align 1, !tbaa [[CHAR_TBAA4]]
 // CHECK-NEXT:    [[CMP_NOT_I17_I5:%.*]] = icmp eq i8 [[TMP1]], 0
-// CHECK-NEXT:    br i1 [[CMP_NOT_I17_I5]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT:%.*]], label [[WHILE_BODY_I18_I:%.*]]
-// CHECK:       if.then.i:
+// CHECK-NEXT:    br i1 [[CMP_NOT_I17_I5]], label %[[_ZL15__MAKE_MANTISSAPKC_EXIT:.*]], label %[[WHILE_BODY_I18_I:.*]]
+// CHECK:       [[IF_THEN_I]]:
 // CHECK-NEXT:    [[INCDEC_PTR_I:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 1
-// CHECK-NEXT:    [[TMP2:%.*]] = load i8, ptr [[INCDEC_PTR_I]], align 1, !tbaa [[TBAA4]]
-// CHECK-NEXT:    switch i8 [[TMP2]], label [[WHILE_COND_I_I_PREHEADER:%.*]] [
-// CHECK-NEXT:      i8 120, label [[IF_THEN5_I:%.*]]
-// CHECK-NEXT:      i8 88, label [[IF_THEN5_I]]
+// CHECK-NEXT:    [[TMP2:%.*]] = load i8, ptr [[INCDEC_PTR_I]], align 1, !tbaa [[CHAR_TBAA4]]
+// CHECK-NEXT:    switch i8 [[TMP2]], label %[[WHILE_COND_I_I_PREHEADER:.*]] [
+// CHECK-NEXT:      i8 120, label %[[IF_THEN5_I:.*]]
+// CHECK-NEXT:      i8 88, label %[[IF_THEN5_I]]
 // CHECK-NEXT:    ]
-// CHECK:       while.cond.i.i.preheader:
-// CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr [[INCDEC_PTR_I]], align 1, !tbaa [[TBAA4]]
+// CHECK:       [[WHILE_COND_I_I_PREHEADER]]:
+// CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr [[INCDEC_PTR_I]], align 1, !tbaa [[CHAR_TBAA4]]
 // CHECK-NEXT:    [[CMP_NOT_I_I14:%.*]] = icmp eq i8 [[TMP3]], 0
-// CHECK-NEXT:    br i1 [[CMP_NOT_I_I14]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT]], label [[WHILE_BODY_I_I:%.*]]
-// CHECK:       if.then5.i:
-// CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr [[INCDEC_PTR_I]], align 1, !tbaa [[TBAA4]]
+// CHECK-NEXT:    br i1 [[CMP_NOT_I_I14]], label %[[_ZL15__MAKE_MANTISSAPKC_EXIT]], label %[[WHILE_BODY_I_I:.*]]
+// CHECK:       [[IF_THEN5_I]]:
+// CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr [[INCDEC_PTR_I]], align 1, !tbaa [[CHAR_TBAA4]]
 // CHECK-NEXT:    [[CMP_NOT_I30_I9:%.*]] = icmp eq i8 [[TMP4]], 0
-// CHECK-NEXT:    br i1 [[CMP_NOT_I30_I9]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT]], label [[WHILE_BODY_I31_I:%.*]]
-// CHECK:       while.body.i31.i:
-// CHECK-NEXT:    [[TMP5:%.*]] = phi i8 [ [[TMP9:%.*]], [[IF_END31_I_I:%.*]] ], [ [[TMP4]], [[IF_THEN5_I]] ]
-// CHECK-NEXT:    [[__R_0_I29_I11:%.*]] = phi i64 [ [[ADD28_I_I:%.*]], [[IF_END31_I_I]] ], [ 0, [[IF_THEN5_I]] ]
-// CHECK-NEXT:    [[__TAGP_ADDR_0_I28_I10:%.*]] = phi ptr [ [[INCDEC_PTR_I34_I:%.*]], [[IF_END31_I_I]] ], [ [[INCDEC_PTR_I]], [[IF_THEN5_I]] ]
+// CHECK-NEXT:    br i1 [[CMP_NOT_I30_I9]], label %[[_ZL15__MAKE_MANTISSAPKC_EXIT]], label %[[WHILE_BODY_I31_I:.*]]
+// CHECK:       [[WHILE_BODY_I31_I]]:
+// CHECK-NEXT:    [[TMP5:%.*]] = phi i8 [ [[TMP9:%.*]], %[[IF_END31_I_I:.*]] ], [ [[TMP4]], %[[IF_THEN5_I]] ]
+// CHECK-NEXT:    [[__R_0_I29_I11:%.*]] = phi i64 [ [[ADD28_I_I:%.*]], %[[IF_END31_I_I]] ], [ 0, %[[IF_THEN5_I]] ]
+// CHECK-NEXT:    [[__TAGP_ADDR_0_I28_I10:%.*]] = phi ptr [ [[INCDEC_PTR_I34_I:%.*]], %[[IF_END31_I_I]] ], [ [[INCDEC_PTR_I]], %[[IF_THEN5_I]] ]
 // CHECK-NEXT:    [[TMP6:%.*]] = add i8 [[TMP5]], -48
 // CHECK-NEXT:    [[OR_COND_I32_I:%.*]] = icmp ult i8 [[TMP6]], 10
-// CHECK-NEXT:    br i1 [[OR_COND_I32_I]], label [[IF_END31_I_I]], label [[IF_ELSE_I_I:%.*]]
-// CHECK:       if.else.i.i:
+// CHECK-NEXT:    br i1 [[OR_COND_I32_I]], label %[[IF_END31_I_I]], label %[[IF_ELSE_I_I:.*]]
+// CHECK:       [[IF_ELSE_I_I]]:
 // CHECK-NEXT:    [[TMP7:%.*]] = add i8 [[TMP5]], -97
 // CHECK-NEXT:    [[OR_COND33_I_I:%.*]] = icmp ult i8 [[TMP7]], 6
-// CHECK-NEXT:    br i1 [[OR_COND33_I_I]], label [[IF_END31_I_I]], label [[IF_ELSE17_I_I:%.*]]
-// CHECK:       if.else17.i.i:
+// CHECK-NEXT:    br i1 [[OR_COND33_I_I]], label %[[IF_END31_I_I]], label %[[IF_ELSE17_I_I:.*]]
+// CHECK:       [[IF_ELSE17_I_I]]:
 // CHECK-NEXT:    [[TMP8:%.*]] = add i8 [[TMP5]], -65
 // CHECK-NEXT:    [[OR_COND34_I_I:%.*]] = icmp ult i8 [[TMP8]], 6
-// CHECK-NEXT:    br i1 [[OR_COND34_I_I]], label [[IF_END31_I_I]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT]]
-// CHECK:       if.end31.i.i:
-// CHECK-NEXT:    [[DOTSINK:%.*]] = phi i64 [ -48, [[WHILE_BODY_I31_I]] ], [ -87, [[IF_ELSE_I_I]] ], [ -55, [[IF_ELSE17_I_I]] ]
+// CHECK-NEXT:    br i1 [[OR_COND34_I_I]], label %[[IF_END31_I_I]], label %[[_ZL15__MAKE_MANTISSAPKC_EXIT]]
+// CHECK:       [[IF_END31_I_I]]:
+// CHECK-NEXT:    [[DOTSINK:%.*]] = phi i64 [ -48, %[[WHILE_BODY_I31_I]] ], [ -87, %[[IF_ELSE_I_I]] ], [ -55, %[[IF_ELSE17_I_I]] ]
 // CHECK-NEXT:    [[MUL24_I_I:%.*]] = shl i64 [[__R_0_I29_I11]], 4
 // CHECK-NEXT:    [[CONV25_I_I:%.*]] = zext nneg i8 [[TMP5]] to i64
 // CHECK-NEXT:    [[ADD26_I_I:%.*]] = add i64 [[MUL24_I_I]], [[DOTSINK]]
 // CHECK-NEXT:    [[ADD28_I_I]] = add i64 [[ADD26_I_I]], [[CONV25_I_I]]
 // CHECK-NEXT:    [[INCDEC_PTR_I34_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I28_I10]], i64 1
-// CHECK-NEXT:    [[TMP9]] = load i8, ptr [[INCDEC_PTR_I34_I]], align 1, !tbaa [[TBAA4]]
+// CHECK-NEXT:    [[TMP9]] = load i8, ptr [[INCDEC_PTR_I34_I]], align 1, !tbaa [[CHAR_TBAA4]]
 // CHECK-NEXT:    [[CMP_NOT_I30_I:%.*]] = icmp eq i8 [[TMP9]], 0
-// CHECK-NEXT:    br i1 [[CMP_NOT_I30_I]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT]], label [[WHILE_BODY_I31_I]], !llvm.loop [[LOOP11]]
-// CHECK:       while.body.i.i:
-// CHECK-NEXT:    [[TMP10:%.*]] = phi i8 [ [[TMP12:%.*]], [[IF_THEN_I_I:%.*]] ], [ [[TMP3]], [[WHILE_COND_I_I_PREHEADER]] ]
-// CHECK-NEXT:    [[__R_0_I_I16:%.*]] = phi i64 [ [[SUB_I_I:%.*]], [[IF_THEN_I_I]] ], [ 0, [[WHILE_COND_I_I_PREHEADER]] ]
-// CHECK-NEXT:    [[__TAGP_ADDR_0_I_I15:%.*]] = phi ptr [ [[INCDEC_PTR_I_I:%.*]], [[IF_THEN_I_I]] ], [ [[INCDEC_PTR_I]], [[WHILE_COND_I_I_PREHEADER]] ]
+// CHECK-NEXT:    br i1 [[CMP_NOT_I30_I]], label %[[_ZL15__MAKE_MANTISSAPKC_EXIT]], label %[[WHILE_BODY_I31_I]], !llvm.loop [[LOOP11]]
+// CHECK:       [[WHILE_BODY_I_I]]:
+// CHECK-NEXT:    [[TMP10:%.*]] = phi i8 [ [[TMP12:%.*]], %[[IF_THEN_I_I:.*]] ], [ [[TMP3]], %[[WHILE_COND_I_I_PREHEADER]] ]
+// CHECK-NEXT:    [[__R_0_I_I16:%.*]] = phi i64 [ [[SUB_I_I:%.*]], %[[IF_THEN_I_I]] ], [ 0, %[[WHILE_COND_I_I_PREHEADER]] ]
+// CHECK-NEXT:    [[__TAGP_ADDR_0_I_I15:%.*]] = phi ptr [ [[INCDEC_PTR_I_I:%.*]], %[[IF_THEN_I_I]] ], [ [[INCDEC_PTR_I]], %[[WHILE_COND_I_I_PREHEADER]] ]
 // CHECK-NEXT:    [[TMP11:%.*]] = and i8 [[TMP10]], -8
 // CHECK-NEXT:    [[OR_COND_I_I:%.*]] = icmp eq i8 [[TMP11]], 48
-// CHECK-NEXT:    br i1 [[OR_COND_I_I]], label [[IF_THEN_I_I]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT]]
-// CHECK:       if.then.i.i:
+// CHECK-NEXT:    br i1 [[OR_COND_I_I]], label %[[IF_THEN_I_I]], label %[[_ZL15__MAKE_MANTISSAPKC_EXIT]]
+// CHECK:       [[IF_THEN_I_I]]:
 // CHECK-NEXT:    [[MUL_I_I:%.*]] = shl i64 [[__R_0_I_I16]], 3
 // CHECK-NEXT:    [[CONV5_I_I:%.*]] = zext nneg i8 [[TMP10]] to i64
 // CHECK-NEXT:    [[ADD_I_I:%.*]] = add i64 [[MUL_I_I]], -48
 // CHECK-NEXT:    [[SUB_I_I]] = add i64 [[ADD_I_I]], [[CONV5_I_I]]
 // CHECK-NEXT:    [[INCDEC_PTR_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I_I15]], i64 1
-// CHECK-NEXT:    [[TMP12]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]]
+// CHECK-NEXT:    [[TMP12]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA4]]
 // CHECK-NEXT:    [[CMP_NOT_I_I:%.*]] = icmp eq i8 [[TMP12]], 0
-// CHECK-NEXT:    br i1 [[CMP_NOT_I_I]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT]], label [[WHILE_BODY_I_I]], !llvm.loop [[LOOP7]]
-// CHECK:       while.body.i18.i:
-// CHECK-NEXT:    [[TMP13:%.*]] = phi i8 [ [[TMP15:%.*]], [[IF_THEN_I21_I:%.*]] ], [ [[TMP1]], [[WHILE_COND_I14_I_PREHEADER]] ]
-// CHECK-NEXT:    [[__R_0_I16_I7:%.*]] = phi i64 [ [[SUB_I25_I:%.*]], [[IF_THEN_I21_I]] ], [ 0, [[WHILE_COND_I14_I_PREHEADER]] ]
-// CHECK-NEXT:    [[__TAGP_ADDR_0_I15_I6:%.*]] = phi ptr [ [[INCDEC_PTR_I26_I:%.*]], [[IF_THEN_I21_I]] ], [ [[P]], [[WHILE_COND_I14_I_PREHEADER]] ]
+// CHECK-NEXT:    br i1 [[CMP_NOT_I_I]], label %[[_ZL15__MAKE_MANTISSAPKC_EXIT]], label %[[WHILE_BODY_I_I]], !llvm.loop [[LOOP7]]
+// CHECK:       [[WHILE_BODY_I18_I]]:
+// CHECK-NEXT:    [[TMP13:%.*]] = phi i8 [ [[TMP15:%.*]], %[[IF_THEN_I21_I:.*]] ], [ [[TMP1]], %[[WHILE_COND_I14_I_PREHEADER]] ]
+// CHECK-NEXT:    [[__R_0_I16_I7:%.*]] = phi i64 [ [[SUB_I25_I:%.*]], %[[IF_THEN_I21_I]] ], [ 0, %[[WHILE_COND_I14_I_PREHEADER]] ]
+// CHECK-NEXT:    [[__TAGP_ADDR_0_I15_I6:%.*]] = phi ptr [ [[INCDEC_PTR_I26_I:%.*]], %[[IF_THEN_I21_I]] ], [ [[P]], %[[WHILE_COND_I14_I_PREHEADER]] ]
 // CHECK-NEXT:    [[TMP14:%.*]] = add i8 [[TMP13]], -48
 // CHECK-NEXT:    [[OR_COND_I19_I:%.*]] = icmp ult i8 [[TMP14]], 10
-// CHECK-NEXT:    br i1 [[OR_COND_I19_I]], label [[IF_THEN_I21_I]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT]]
-// CHECK:       if.then.i21.i:
+// CHECK-NEXT:    br i1 [[OR_COND_I19_I]], label %[[IF_THEN_I21_I]], label %[[_ZL15__MAKE_MANTISSAPKC_EXIT]]
+// CHECK:       [[IF_THEN_I21_I]]:
 // CHECK-NEXT:    [[MUL_I22_I:%.*]] = mul i64 [[__R_0_I16_I7]], 10
 // CHECK-NEXT:    [[CONV5_I23_I:%.*]] = zext nneg i8 [[TMP13]] to i64
 // CHECK-NEXT:    [[ADD_I24_I:%.*]] = add i64 [[MUL_I22_I]], -48
 // CHECK-NEXT:    [[SUB_I25_I]] = add i64 [[ADD_I24_I]], [[CONV5_I23_I]]
 // CHECK-NEXT:    [[INCDEC_PTR_I26_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I15_I6]], i64 1
-// CHECK-NEXT:    [[TMP15]] = load i8, ptr [[INCDEC_PTR_I26_I]], align 1, !tbaa [[TBAA4]]
+// CHECK-NEXT:    [[TMP15]] = load i8, ptr [[INCDEC_PTR_I26_I]], align 1, !tbaa [[CHAR_TBAA4]]
 // CHECK-NEXT:    [[CMP_NOT_I17_I:%.*]] = icmp eq i8 [[TMP15]], 0
-// CHECK-NEXT:    br i1 [[CMP_NOT_I17_I]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT]], label [[WHILE_BODY_I18_I]], !llvm.loop [[LOOP10]]
-// CHECK:       _ZL15__make_mantissaPKc.exit:
-// CHECK-NEXT:    [[RETVAL_0_I:%.*]] = phi i64 [ 0, [[WHILE_COND_I_I_PREHEADER]] ], [ 0, [[IF_THEN5_I]] ], [ 0, [[WHILE_COND_I14_I_PREHEADER]] ], [ [[SUB_I_I]], [[IF_THEN_I_I]] ], [ 0, [[WHILE_BODY_I_I]] ], [ [[ADD28_I_I]], [[IF_END31_I_I]] ], [ 0, [[IF_ELSE17_I_I]] ], [ [[SUB_I25_I]], [[IF_THEN_I21_I]] ], [ 0, [[WHILE_BODY_I18_I]] ]
+// CHECK-NEXT:    br i1 [[CMP_NOT_I17_I]], label %[[_ZL15__MAKE_MANTISSAPKC_EXIT]], label %[[WHILE_BODY_I18_I]], !llvm.loop [[LOOP10]]
+// CHECK:       [[_ZL15__MAKE_MANTISSAPKC_EXIT]]:
+// CHECK-NEXT:    [[RETVAL_0_I:%.*]] = phi i64 [ 0, %[[WHILE_COND_I_I_PREHEADER]] ], [ 0, %[[IF_THEN5_I]] ], [ 0, %[[WHILE_COND_I14_I_PREHEADER]] ], [ [[SUB_I_I]], %[[IF_THEN_I_I]] ], [ 0, %[[WHILE_BODY_I_I]] ], [ [[ADD28_I_I]], %[[IF_END31_I_I]] ], [ 0, %[[IF_ELSE17_I_I]] ], [ [[SUB_I25_I]], %[[IF_THEN_I21_I]] ], [ 0, %[[WHILE_BODY_I18_I]] ]
 // CHECK-NEXT:    ret i64 [[RETVAL_0_I]]
 //
-// AMDGCNSPIRV-LABEL: @test___make_mantissa(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = load i8, ptr addrspace(4) [[P:%.*]], align 1, !tbaa [[TBAA5]]
+// AMDGCNSPIRV-LABEL: define spir_func i64 @test___make_mantissa(
+// AMDGCNSPIRV-SAME: ptr addrspace(4) noundef readonly captures(none) [[P:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR2]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*]]:
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = load i8, ptr addrspace(4) [[P]], align 1, !tbaa [[CHAR_TBAA5]]
 // AMDGCNSPIRV-NEXT:    [[CMP_I:%.*]] = icmp eq i8 [[TMP0]], 48
-// AMDGCNSPIRV-NEXT:    br i1 [[CMP_I]], label [[IF_THEN_I:%.*]], label [[WHILE_COND_I14_I:%.*]]
-// AMDGCNSPIRV:       if.then.i:
+// AMDGCNSPIRV-NEXT:    br i1 [[CMP_I]], label %[[IF_THEN_I:.*]], label %[[WHILE_COND_I14_I:.*]]
+// AMDGCNSPIRV:       [[IF_THEN_I]]:
 // AMDGCNSPIRV-NEXT:    [[INCDEC_PTR_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[P]], i64 1
-// AMDGCNSPIRV-NEXT:    [[TMP1:%.*]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I]], align 1, !tbaa [[TBAA5]]
-// AMDGCNSPIRV-NEXT:    switch i8 [[TMP1]], label [[WHILE_COND_I_I:%.*]] [
-// AMDGCNSPIRV-NEXT:      i8 120, label [[IF_THEN5_I:%.*]]
-// AMDGCNSPIRV-NEXT:      i8 88, label [[IF_THEN5_I]]
+// AMDGCNSPIRV-NEXT:    [[TMP1:%.*]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I]], align 1, !tbaa [[CHAR_TBAA5]]
+// AMDGCNSPIRV-NEXT:    switch i8 [[TMP1]], label %[[WHILE_COND_I_I:.*]] [
+// AMDGCNSPIRV-NEXT:      i8 120, label %[[IF_THEN5_I:.*]]
+// AMDGCNSPIRV-NEXT:      i8 88, label %[[IF_THEN5_I]]
 // AMDGCNSPIRV-NEXT:    ]
-// AMDGCNSPIRV:       if.then5.i:
-// AMDGCNSPIRV-NEXT:    [[TMP2:%.*]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I]], align 1, !tbaa [[TBAA5]]
+// AMDGCNSPIRV:       [[IF_THEN5_I]]:
+// AMDGCNSPIRV-NEXT:    [[TMP2:%.*]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I]], align 1, !tbaa [[CHAR_TBAA5]]
 // AMDGCNSPIRV-NEXT:    [[CMP_NOT_I31_I5:%.*]] = icmp eq i8 [[TMP2]], 0
-// AMDGCNSPIRV-NEXT:    br i1 [[CMP_NOT_I31_I5]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT:%.*]], label [[WHILE_BODY_I32_I:%.*]]
-// AMDGCNSPIRV:       while.body.i32.i:
-// AMDGCNSPIRV-NEXT:    [[TMP3:%.*]] = phi i8 [ [[TMP7:%.*]], [[IF_END31_I_I:%.*]] ], [ [[TMP2]], [[IF_THEN5_I]] ]
-// AMDGCNSPIRV-NEXT:    [[__R_0_I30_I7:%.*]] = phi i64 [ [[ADD28_I_I:%.*]], [[IF_END31_I_I]] ], [ 0, [[IF_THEN5_I]] ]
-// AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_0_I29_I6:%.*]] = phi ptr addrspace(4) [ [[INCDEC_PTR_I36_I:%.*]], [[IF_END31_I_I]] ], [ [[INCDEC_PTR_I]], [[IF_THEN5_I]] ]
+// AMDGCNSPIRV-NEXT:    br i1 [[CMP_NOT_I31_I5]], label %[[_ZL15__MAKE_MANTISSAPKC_EXIT:.*]], label %[[WHILE_BODY_I32_I:.*]]
+// AMDGCNSPIRV:       [[WHILE_BODY_I32_I]]:
+// AMDGCNSPIRV-NEXT:    [[TMP3:%.*]] = phi i8 [ [[TMP7:%.*]], %[[IF_END31_I_I:.*]] ], [ [[TMP2]], %[[IF_THEN5_I]] ]
+// AMDGCNSPIRV-NEXT:    [[__R_0_I30_I7:%.*]] = phi i64 [ [[ADD28_I_I:%.*]], %[[IF_END31_I_I]] ], [ 0, %[[IF_THEN5_I]] ]
+// AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_0_I29_I6:%.*]] = phi ptr addrspace(4) [ [[INCDEC_PTR_I36_I:%.*]], %[[IF_END31_I_I]] ], [ [[INCDEC_PTR_I]], %[[IF_THEN5_I]] ]
 // AMDGCNSPIRV-NEXT:    [[TMP4:%.*]] = add i8 [[TMP3]], -48
 // AMDGCNSPIRV-NEXT:    [[OR_COND_I33_I:%.*]] = icmp ult i8 [[TMP4]], 10
-// AMDGCNSPIRV-NEXT:    br i1 [[OR_COND_I33_I]], label [[IF_END31_I_I]], label [[IF_ELSE_I_I:%.*]]
-// AMDGCNSPIRV:       if.else.i.i:
+// AMDGCNSPIRV-NEXT:    br i1 [[OR_COND_I33_I]], label %[[IF_END31_I_I]], label %[[IF_ELSE_I_I:.*]]
+// AMDGCNSPIRV:       [[IF_ELSE_I_I]]:
 // AMDGCNSPIRV-NEXT:    [[TMP5:%.*]] = add i8 [[TMP3]], -97
 // AMDGCNSPIRV-NEXT:    [[OR_COND33_I_I:%.*]] = icmp ult i8 [[TMP5]], 6
-// AMDGCNSPIRV-NEXT:    br i1 [[OR_COND33_I_I]], label [[IF_END31_I_I]], label [[IF_ELSE17_I_I:%.*]]
-// AMDGCNSPIRV:       if.else17.i.i:
+// AMDGCNSPIRV-NEXT:    br i1 [[OR_COND33_I_I]], label %[[IF_END31_I_I]], label %[[IF_ELSE17_I_I:.*]]
+// AMDGCNSPIRV:       [[IF_ELSE17_I_I]]:
 // AMDGCNSPIRV-NEXT:    [[TMP6:%.*]] = add i8 [[TMP3]], -65
 // AMDGCNSPIRV-NEXT:    [[OR_COND34_I_I:%.*]] = icmp ult i8 [[TMP6]], 6
-// AMDGCNSPIRV-NEXT:    br i1 [[OR_COND34_I_I]], label [[IF_END31_I_I]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT]]
-// AMDGCNSPIRV:       if.end31.i.i:
-// AMDGCNSPIRV-NEXT:    [[DOTSINK:%.*]] = phi i64 [ -48, [[WHILE_BODY_I32_I]] ], [ -87, [[IF_ELSE_I_I]] ], [ -55, [[IF_ELSE17_I_I]] ]
+// AMDGCNSPIRV-NEXT:    br i1 [[OR_COND34_I_I]], label %[[IF_END31_I_I]], label %[[_ZL15__MAKE_MANTISSAPKC_EXIT]]
+// AMDGCNSPIRV:       [[IF_END31_I_I]]:
+// AMDGCNSPIRV-NEXT:    [[DOTSINK:%.*]] = phi i64 [ -48, %[[WHILE_BODY_I32_I]] ], [ -87, %[[IF_ELSE_I_I]] ], [ -55, %[[IF_ELSE17_I_I]] ]
 // AMDGCNSPIRV-NEXT:    [[MUL24_I_I:%.*]] = shl i64 [[__R_0_I30_I7]], 4
 // AMDGCNSPIRV-NEXT:    [[CONV25_I_I:%.*]] = zext nneg i8 [[TMP3]] to i64
 // AMDGCNSPIRV-NEXT:    [[ADD26_I_I:%.*]] = add i64 [[MUL24_I_I]], [[DOTSINK]]
 // AMDGCNSPIRV-NEXT:    [[ADD28_I_I]] = add i64 [[ADD26_I_I]], [[CONV25_I_I]]
 // AMDGCNSPIRV-NEXT:    [[INCDEC_PTR_I36_I]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__TAGP_ADDR_0_I29_I6]], i64 1
-// AMDGCNSPIRV-NEXT:    [[TMP7]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I36_I]], align 1, !tbaa [[TBAA5]]
+// AMDGCNSPIRV-NEXT:    [[TMP7]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I36_I]], align 1, !tbaa [[CHAR_TBAA5]]
 // AMDGCNSPIRV-NEXT:    [[CMP_NOT_I31_I:%.*]] = icmp eq i8 [[TMP7]], 0
-// AMDGCNSPIRV-NEXT:    br i1 [[CMP_NOT_I31_I]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT]], label [[WHILE_BODY_I32_I]], !llvm.loop [[LOOP12]]
-// AMDGCNSPIRV:       while.cond.i.i:
-// AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_0_I_I:%.*]] = phi ptr addrspace(4) [ [[__TAGP_ADDR_1_I_I:%.*]], [[WHILE_BODY_I_I:%.*]] ], [ [[INCDEC_PTR_I]], [[IF_THEN_I]] ]
-// AMDGCNSPIRV-NEXT:    [[__R_0_I_I:%.*]] = phi i64 [ [[__R_1_I_I:%.*]], [[WHILE_BODY_I_I]] ], [ 0, [[IF_THEN_I]] ]
-// AMDGCNSPIRV-NEXT:    [[TMP8:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I_I]], align 1, !tbaa [[TBAA5]]
+// AMDGCNSPIRV-NEXT:    br i1 [[CMP_NOT_I31_I]], label %[[_ZL15__MAKE_MANTISSAPKC_EXIT]], label %[[WHILE_BODY_I32_I]], !llvm.loop [[LOOP12]]
+// AMDGCNSPIRV:       [[WHILE_COND_I_I]]:
+// AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_0_I_I:%.*]] = phi ptr addrspace(4) [ [[__TAGP_ADDR_1_I_I:%.*]], %[[WHILE_BODY_I_I:.*]] ], [ [[INCDEC_PTR_I]], %[[IF_THEN_I]] ]
+// AMDGCNSPIRV-NEXT:    [[__R_0_I_I:%.*]] = phi i64 [ [[__R_1_I_I:%.*]], %[[WHILE_BODY_I_I]] ], [ 0, %[[IF_THEN_I]] ]
+// AMDGCNSPIRV-NEXT:    [[TMP8:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I_I]], align 1, !tbaa [[CHAR_TBAA5]]
 // AMDGCNSPIRV-NEXT:    [[CMP_NOT_I_I:%.*]] = icmp eq i8 [[TMP8]], 0
-// AMDGCNSPIRV-NEXT:    br i1 [[CMP_NOT_I_I]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT]], label [[WHILE_BODY_I_I]]
-// AMDGCNSPIRV:       while.body.i.i:
+// AMDGCNSPIRV-NEXT:    br i1 [[CMP_NOT_I_I]], label %[[_ZL15__MAKE_MANTISSAPKC_EXIT]], label %[[WHILE_BODY_I_I]]
+// AMDGCNSPIRV:       [[WHILE_BODY_I_I]]:
 // AMDGCNSPIRV-NEXT:    [[TMP9:%.*]] = and i8 [[TMP8]], -8
 // AMDGCNSPIRV-NEXT:    [[OR_COND_I_I:%.*]] = icmp eq i8 [[TMP9]], 48
 // AMDGCNSPIRV-NEXT:    [[MUL_I_I:%.*]] = shl i64 [[__R_0_I_I]], 3
@@ -367,14 +375,14 @@ extern "C" __device__ uint64_t test___make_mantissa_base16(const char *p) {
 // AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_1_I_I_IDX:%.*]] = zext i1 [[OR_COND_I_I]] to i64
 // AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_1_I_I]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__TAGP_ADDR_0_I_I]], i64 [[__TAGP_ADDR_1_I_I_IDX]]
 // AMDGCNSPIRV-NEXT:    [[__R_1_I_I]] = select i1 [[OR_COND_I_I]], i64 [[SUB_I_I]], i64 [[__R_0_I_I]]
-// AMDGCNSPIRV-NEXT:    br i1 [[OR_COND_I_I]], label [[WHILE_COND_I_I]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT]], !llvm.loop [[LOOP8]]
-// AMDGCNSPIRV:       while.cond.i14.i:
-// AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_0_I15_I:%.*]] = phi ptr addrspace(4) [ [[__TAGP_ADDR_1_I25_I:%.*]], [[WHILE_BODY_I18_I:%.*]] ], [ [[P]], [[ENTRY:%.*]] ]
-// AMDGCNSPIRV-NEXT:    [[__R_0_I16_I:%.*]] = phi i64 [ [[__R_1_I26_I:%.*]], [[WHILE_BODY_I18_I]] ], [ 0, [[ENTRY]] ]
-// AMDGCNSPIRV-NEXT:    [[TMP10:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I15_I]], align 1, !tbaa [[TBAA5]]
+// AMDGCNSPIRV-NEXT:    br i1 [[OR_COND_I_I]], label %[[WHILE_COND_I_I]], label %[[_ZL15__MAKE_MANTISSAPKC_EXIT]], !llvm.loop [[LOOP8]]
+// AMDGCNSPIRV:       [[WHILE_COND_I14_I]]:
+// AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_0_I15_I:%.*]] = phi ptr addrspace(4) [ [[__TAGP_ADDR_1_I25_I:%.*]], %[[WHILE_BODY_I18_I:.*]] ], [ [[P]], %[[ENTRY]] ]
+// AMDGCNSPIRV-NEXT:    [[__R_0_I16_I:%.*]] = phi i64 [ [[__R_1_I26_I:%.*]], %[[WHILE_BODY_I18_I]] ], [ 0, %[[ENTRY]] ]
+// AMDGCNSPIRV-NEXT:    [[TMP10:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I15_I]], align 1, !tbaa [[CHAR_TBAA5]]
 // AMDGCNSPIRV-NEXT:    [[CMP_NOT_I17_I:%.*]] = icmp eq i8 [[TMP10]], 0
-// AMDGCNSPIRV-NEXT:    br i1 [[CMP_NOT_I17_I]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT]], label [[WHILE_BODY_I18_I]]
-// AMDGCNSPIRV:       while.body.i18.i:
+// AMDGCNSPIRV-NEXT:    br i1 [[CMP_NOT_I17_I]], label %[[_ZL15__MAKE_MANTISSAPKC_EXIT]], label %[[WHILE_BODY_I18_I]]
+// AMDGCNSPIRV:       [[WHILE_BODY_I18_I]]:
 // AMDGCNSPIRV-NEXT:    [[TMP11:%.*]] = add i8 [[TMP10]], -48
 // AMDGCNSPIRV-NEXT:    [[OR_COND_I19_I:%.*]] = icmp ult i8 [[TMP11]], 10
 // AMDGCNSPIRV-NEXT:    [[MUL_I20_I:%.*]] = mul i64 [[__R_0_I16_I]], 10
@@ -384,225 +392,261 @@ extern "C" __device__ uint64_t test___make_mantissa_base16(const char *p) {
 // AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_1_I25_I_IDX:%.*]] = zext i1 [[OR_COND_I19_I]] to i64
 // AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_1_I25_I]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__TAGP_ADDR_0_I15_I]], i64 [[__TAGP_ADDR_1_I25_I_IDX]]
 // AMDGCNSPIRV-NEXT:    [[__R_1_I26_I]] = select i1 [[OR_COND_I19_I]], i64 [[SUB_I23_I]], i64 [[__R_0_I16_I]]
-// AMDGCNSPIRV-NEXT:    br i1 [[OR_COND_I19_I]], label [[WHILE_COND_I14_I]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT]], !llvm.loop [[LOOP11]]
-// AMDGCNSPIRV:       _ZL15__make_mantissaPKc.exit:
-// AMDGCNSPIRV-NEXT:    [[RETVAL_0_I:%.*]] = phi i64 [ 0, [[IF_THEN5_I]] ], [ 0, [[WHILE_BODY_I_I]] ], [ [[__R_0_I_I]], [[WHILE_COND_I_I]] ], [ [[ADD28_I_I]], [[IF_END31_I_I]] ], [ 0, [[IF_ELSE17_I_I]] ], [ 0, [[WHILE_BODY_I18_I]] ], [ [[__R_0_I16_I]], [[WHILE_COND_I14_I]] ]
+// AMDGCNSPIRV-NEXT:    br i1 [[OR_COND_I19_I]], label %[[WHILE_COND_I14_I]], label %[[_ZL15__MAKE_MANTISSAPKC_EXIT]], !llvm.loop [[LOOP11]]
+// AMDGCNSPIRV:       [[_ZL15__MAKE_MANTISSAPKC_EXIT]]:
+// AMDGCNSPIRV-NEXT:    [[RETVAL_0_I:%.*]] = phi i64 [ 0, %[[IF_THEN5_I]] ], [ 0, %[[WHILE_BODY_I_I]] ], [ [[__R_0_I_I]], %[[WHILE_COND_I_I]] ], [ [[ADD28_I_I]], %[[IF_END31_I_I]] ], [ 0, %[[IF_ELSE17_I_I]] ], [ 0, %[[WHILE_BODY_I18_I]] ], [ [[__R_0_I16_I]], %[[WHILE_COND_I14_I]] ]
 // AMDGCNSPIRV-NEXT:    ret i64 [[RETVAL_0_I]]
 //
 extern "C" __device__ uint64_t test___make_mantissa(const char *p) {
   return __make_mantissa(p);
 }
 
-// CHECK-LABEL: @test_abs(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call noundef range(i32 0, -2147483648) i32 @llvm.abs.i32(i32 [[X:%.*]], i1 true)
+// CHECK-LABEL: define dso_local noundef range(i32 0, -2147483648) i32 @test_abs(
+// CHECK-SAME: i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call noundef range(i32 0, -2147483648) i32 @llvm.abs.i32(i32 [[X]], i1 true)
 // CHECK-NEXT:    ret i32 [[TMP0]]
 //
-// AMDGCNSPIRV-LABEL: @test_abs(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call noundef range(i32 0, -2147483648) addrspace(4) i32 @llvm.abs.i32(i32 [[X:%.*]], i1 true)
+// AMDGCNSPIRV-LABEL: define spir_func noundef range(i32 0, -2147483648) i32 @test_abs(
+// AMDGCNSPIRV-SAME: i32 noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3:[0-9]+]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call noundef range(i32 0, -2147483648) addrspace(4) i32 @llvm.abs.i32(i32 [[X]], i1 true)
 // AMDGCNSPIRV-NEXT:    ret i32 [[TMP0]]
 //
 extern "C" __device__ int test_abs(int x) {
   return abs(x);
 }
 
-// CHECK-LABEL: @test_labs(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call noundef range(i64 0, -9223372036854775808) i64 @llvm.abs.i64(i64 [[X:%.*]], i1 true)
+// CHECK-LABEL: define dso_local noundef range(i64 0, -9223372036854775808) i64 @test_labs(
+// CHECK-SAME: i64 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call noundef range(i64 0, -9223372036854775808) i64 @llvm.abs.i64(i64 [[X]], i1 true)
 // CHECK-NEXT:    ret i64 [[TMP0]]
 //
-// AMDGCNSPIRV-LABEL: @test_labs(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call noundef range(i64 0, -9223372036854775808) addrspace(4) i64 @llvm.abs.i64(i64 [[X:%.*]], i1 true)
+// AMDGCNSPIRV-LABEL: define spir_func noundef range(i64 0, -9223372036854775808) i64 @test_labs(
+// AMDGCNSPIRV-SAME: i64 noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call noundef range(i64 0, -9223372036854775808) addrspace(4) i64 @llvm.abs.i64(i64 [[X]], i1 true)
 // AMDGCNSPIRV-NEXT:    ret i64 [[TMP0]]
 //
 extern "C" __device__ long test_labs(long x) {
   return labs(x);
 }
 
-// CHECK-LABEL: @test_llabs(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call noundef range(i64 0, -9223372036854775808) i64 @llvm.abs.i64(i64 [[X:%.*]], i1 true)
+// CHECK-LABEL: define dso_local noundef range(i64 0, -9223372036854775808) i64 @test_llabs(
+// CHECK-SAME: i64 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call noundef range(i64 0, -9223372036854775808) i64 @llvm.abs.i64(i64 [[X]], i1 true)
 // CHECK-NEXT:    ret i64 [[TMP0]]
 //
-// AMDGCNSPIRV-LABEL: @test_llabs(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call noundef range(i64 0, -9223372036854775808) addrspace(4) i64 @llvm.abs.i64(i64 [[X:%.*]], i1 true)
+// AMDGCNSPIRV-LABEL: define spir_func noundef range(i64 0, -9223372036854775808) i64 @test_llabs(
+// AMDGCNSPIRV-SAME: i64 noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call noundef range(i64 0, -9223372036854775808) addrspace(4) i64 @llvm.abs.i64(i64 [[X]], i1 true)
 // AMDGCNSPIRV-NEXT:    ret i64 [[TMP0]]
 //
 extern "C" __device__ long long test_llabs(long x) {
   return llabs(x);
 }
 
-// DEFAULT-LABEL: @test_acosf(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_acos_f32(float noundef [[X:%.*]]) #[[ATTR14:[0-9]+]]
+// DEFAULT-LABEL: define dso_local noundef float @test_acosf(
+// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4:[0-9]+]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_acos_f32(float noundef [[X]]) #[[ATTR14:[0-9]+]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_acosf(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_acos_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14:[0-9]+]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_acosf(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR4:[0-9]+]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_acos_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR14:[0-9]+]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
-// APPROX-LABEL: @test_acosf(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_acos_f32(float noundef [[X:%.*]]) #[[ATTR14:[0-9]+]]
+// APPROX-LABEL: define dso_local noundef float @test_acosf(
+// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4:[0-9]+]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_acos_f32(float noundef [[X]]) #[[ATTR14:[0-9]+]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_acosf(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_acos_f32(float noundef [[X:%.*]]) #[[ATTR14:[0-9]+]]
+// NCRDIV-LABEL: define dso_local noundef float @test_acosf(
+// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4:[0-9]+]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_acos_f32(float noundef [[X]]) #[[ATTR14:[0-9]+]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_acosf(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_acos_f32(float noundef [[X:%.*]]) #[[ATTR12:[0-9]+]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_acosf(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR4:[0-9]+]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_acos_f32(float noundef [[X]]) #[[ATTR12:[0-9]+]]
 // AMDGCNSPIRV-NEXT:    ret float [[CALL_I]]
 //
 extern "C" __device__ float test_acosf(float x) {
   return acosf(x);
 }
 
-// DEFAULT-LABEL: @test_acos(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_acos_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// DEFAULT-LABEL: define dso_local noundef double @test_acos(
+// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_acos_f64(double noundef [[X]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_acos(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_acos_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_acos(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_acos_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
-// APPROX-LABEL: @test_acos(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_acos_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// APPROX-LABEL: define dso_local noundef double @test_acos(
+// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_acos_f64(double noundef [[X]]) #[[ATTR14]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_acos(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_acos_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// NCRDIV-LABEL: define dso_local noundef double @test_acos(
+// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_acos_f64(double noundef [[X]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_acos(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_acos_f64(double noundef [[X:%.*]]) #[[ATTR12]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_acos(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR4]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_acos_f64(double noundef [[X]]) #[[ATTR12]]
 // AMDGCNSPIRV-NEXT:    ret double [[CALL_I]]
 //
 extern "C" __device__ double test_acos(double x) {
   return acos(x);
 }
 
-// DEFAULT-LABEL: @test_acoshf(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_acosh_f32(float noundef [[X:%.*]]) #[[ATTR15:[0-9]+]]
+// DEFAULT-LABEL: define dso_local noundef float @test_acoshf(
+// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5:[0-9]+]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_acosh_f32(float noundef [[X]]) #[[ATTR15:[0-9]+]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_acoshf(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_acosh_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15:[0-9]+]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_acoshf(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5:[0-9]+]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_acosh_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR15:[0-9]+]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
-// APPROX-LABEL: @test_acoshf(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_acosh_f32(float noundef [[X:%.*]]) #[[ATTR15:[0-9]+]]
+// APPROX-LABEL: define dso_local noundef float @test_acoshf(
+// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5:[0-9]+]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_acosh_f32(float noundef [[X]]) #[[ATTR15:[0-9]+]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_acoshf(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_acosh_f32(float noundef [[X:%.*]]) #[[ATTR15:[0-9]+]]
+// NCRDIV-LABEL: define dso_local noundef float @test_acoshf(
+// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5:[0-9]+]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_acosh_f32(float noundef [[X]]) #[[ATTR15:[0-9]+]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_acoshf(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_acosh_f32(float noundef [[X:%.*]]) #[[ATTR13:[0-9]+]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_acoshf(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR5:[0-9]+]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_acosh_f32(float noundef [[X]]) #[[ATTR13:[0-9]+]]
 // AMDGCNSPIRV-NEXT:    ret float [[CALL_I]]
 //
 extern "C" __device__ float test_acoshf(float x) {
   return acoshf(x);
 }
 
-// DEFAULT-LABEL: @test_acosh(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_acosh_f64(double noundef [[X:%.*]]) #[[ATTR15]]
+// DEFAULT-LABEL: define dso_local noundef double @test_acosh(
+// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_acosh_f64(double noundef [[X]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_acosh(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_acosh_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_acosh(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_acosh_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
-// APPROX-LABEL: @test_acosh(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_acosh_f64(double noundef [[X:%.*]]) #[[ATTR15]]
+// APPROX-LABEL: define dso_local noundef double @test_acosh(
+// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_acosh_f64(double noundef [[X]]) #[[ATTR15]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_acosh(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_acosh_f64(double noundef [[X:%.*]]) #[[ATTR15]]
+// NCRDIV-LABEL: define dso_local noundef double @test_acosh(
+// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_acosh_f64(double noundef [[X]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_acosh(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_acosh_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_acosh(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR5]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_acosh_f64(double noundef [[X]]) #[[ATTR13]]
 // AMDGCNSPIRV-NEXT:    ret double [[CALL_I]]
 //
 extern "C" __device__ double test_acosh(double x) {
   return acosh(x);
 }
 
-// DEFAULT-LABEL: @test_asinf(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_asin_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// DEFAULT-LABEL: define dso_local noundef float @test_asinf(
+// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_asin_f32(float noundef [[X]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_asinf(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_asin_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_asinf(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_asin_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
-// APPROX-LABEL: @test_asinf(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_asin_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// APPROX-LABEL: define dso_local noundef float @test_asinf(
+// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_asin_f32(float noundef [[X]]) #[[ATTR14]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_asinf(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_asin_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// NCRDIV-LABEL: define dso_local noundef float @test_asinf(
+// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_asin_f32(float noundef [[X]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_asinf(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_asin_f32(float noundef [[X:%.*]]) #[[ATTR12]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_asinf(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR4]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_asin_f32(float noundef [[X]]) #[[ATTR12]]
 // AMDGCNSPIRV-NEXT:    ret float [[CALL_I]]
 //
 extern "C" __device__ float test_asinf(float x) {
   return asinf(x);
 }
 
-// DEFAULT-LABEL: @test_asin(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_asin_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// DEFAULT-LABEL: define dso_local noundef double @test_asin(
+// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_asin_f64(double noundef [[X]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_asin(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_asin_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_asin(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_asin_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
-// APPROX-LABEL: @test_asin(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_asin_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// APPROX-LABEL: define dso_local noundef double @test_asin(
+// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_asin_f64(double noundef [[X]]) #[[ATTR14]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_asin(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_asin_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// NCRDIV-LABEL: define dso_local noundef double @test_asin(
+// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_asin_f64(double noundef [[X]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_asin(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_asin_f64(double noundef [[X:%.*]]) #[[ATTR12]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_asin(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR4]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_asin_f64(double noundef [[X]]) #[[ATTR12]]
 // AMDGCNSPIRV-NEXT:    ret double [[CALL_I]]
 //
 extern "C" __device__ double test_asin(double x) {
@@ -610,1551 +654,1816 @@ extern "C" __device__ double test_asin(double x) {
   return asin(x);
 }
 
-// DEFAULT-LABEL: @test_asinhf(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_asinh_f32(float noundef [[X:%.*]]) #[[ATTR15]]
+// DEFAULT-LABEL: define dso_local noundef float @test_asinhf(
+// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_asinh_f32(float noundef [[X]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_asinhf(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_asinh_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_asinhf(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_asinh_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
-// APPROX-LABEL: @test_asinhf(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_asinh_f32(float noundef [[X:%.*]]) #[[ATTR15]]
+// APPROX-LABEL: define dso_local noundef float @test_asinhf(
+// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_asinh_f32(float noundef [[X]]) #[[ATTR15]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_asinhf(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_asinh_f32(float noundef [[X:%.*]]) #[[ATTR15]]
+// NCRDIV-LABEL: define dso_local noundef float @test_asinhf(
+// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_asinh_f32(float noundef [[X]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_asinhf(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_asinh_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_asinhf(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR5]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_asinh_f32(float noundef [[X]]) #[[ATTR13]]
 // AMDGCNSPIRV-NEXT:    ret float [[CALL_I]]
 //
 extern "C" __device__ float test_asinhf(float x) {
   return asinhf(x);
 }
 
-// DEFAULT-LABEL: @test_asinh(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_asinh_f64(double noundef [[X:%.*]]) #[[ATTR15]]
+// DEFAULT-LABEL: define dso_local noundef double @test_asinh(
+// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_asinh_f64(double noundef [[X]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_asinh(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_asinh_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_asinh(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_asinh_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
-// APPROX-LABEL: @test_asinh(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_asinh_f64(double noundef [[X:%.*]]) #[[ATTR15]]
+// APPROX-LABEL: define dso_local noundef double @test_asinh(
+// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_asinh_f64(double noundef [[X]]) #[[ATTR15]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_asinh(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_asinh_f64(double noundef [[X:%.*]]) #[[ATTR15]]
+// NCRDIV-LABEL: define dso_local noundef double @test_asinh(
+// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_asinh_f64(double noundef [[X]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_asinh(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_asinh_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_asinh(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR5]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_asinh_f64(double noundef [[X]]) #[[ATTR13]]
 // AMDGCNSPIRV-NEXT:    ret double [[CALL_I]]
 //
 extern "C" __device__ double test_asinh(double x) {
   return asinh(x);
 }
 
-// DEFAULT-LABEL: @test_atan2f(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atan2_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR14]]
+// DEFAULT-LABEL: define dso_local noundef float @test_atan2f(
+// DEFAULT-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atan2_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_atan2f(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_atan2_f32(float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR14]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_atan2f(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_atan2_f32(float noundef nofpclass(nan inf) [[X]], float noundef nofpclass(nan inf) [[Y]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
-// APPROX-LABEL: @test_atan2f(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atan2_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR14]]
+// APPROX-LABEL: define dso_local noundef float @test_atan2f(
+// APPROX-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atan2_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR14]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_atan2f(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atan2_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR14]]
+// NCRDIV-LABEL: define dso_local noundef float @test_atan2f(
+// NCRDIV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atan2_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_atan2f(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_atan2_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR12]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_atan2f(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR4]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_atan2_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR12]]
 // AMDGCNSPIRV-NEXT:    ret float [[CALL_I]]
 //
 extern "C" __device__ float test_atan2f(float x, float y) {
   return atan2f(x, y);
 }
 
-// DEFAULT-LABEL: @test_atan2(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atan2_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR14]]
+// DEFAULT-LABEL: define dso_local noundef double @test_atan2(
+// DEFAULT-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atan2_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_atan2(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_atan2_f64(double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR14]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_atan2(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_atan2_f64(double noundef nofpclass(nan inf) [[X]], double noundef nofpclass(nan inf) [[Y]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
-// APPROX-LABEL: @test_atan2(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atan2_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR14]]
+// APPROX-LABEL: define dso_local noundef double @test_atan2(
+// APPROX-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atan2_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR14]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_atan2(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atan2_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR14]]
+// NCRDIV-LABEL: define dso_local noundef double @test_atan2(
+// NCRDIV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atan2_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_atan2(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_atan2_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR12]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_atan2(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR4]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_atan2_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR12]]
 // AMDGCNSPIRV-NEXT:    ret double [[CALL_I]]
 //
 extern "C" __device__ double test_atan2(double x, double y) {
   return atan2(x, y);
 }
 
-// DEFAULT-LABEL: @test_atanf(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atan_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// DEFAULT-LABEL: define dso_local noundef float @test_atanf(
+// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atan_f32(float noundef [[X]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_atanf(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_atan_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_atanf(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_atan_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
-// APPROX-LABEL: @test_atanf(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atan_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// APPROX-LABEL: define dso_local noundef float @test_atanf(
+// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atan_f32(float noundef [[X]]) #[[ATTR14]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_atanf(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atan_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// NCRDIV-LABEL: define dso_local noundef float @test_atanf(
+// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atan_f32(float noundef [[X]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_atanf(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_atan_f32(float noundef [[X:%.*]]) #[[ATTR12]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_atanf(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR4]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_atan_f32(float noundef [[X]]) #[[ATTR12]]
 // AMDGCNSPIRV-NEXT:    ret float [[CALL_I]]
 //
 extern "C" __device__ float test_atanf(float x) {
   return atanf(x);
 }
 
-// DEFAULT-LABEL: @test_atan(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atan_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// DEFAULT-LABEL: define dso_local noundef double @test_atan(
+// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atan_f64(double noundef [[X]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_atan(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_atan_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_atan(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_atan_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
-// APPROX-LABEL: @test_atan(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atan_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// APPROX-LABEL: define dso_local noundef double @test_atan(
+// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atan_f64(double noundef [[X]]) #[[ATTR14]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_atan(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atan_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// NCRDIV-LABEL: define dso_local noundef double @test_atan(
+// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atan_f64(double noundef [[X]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_atan(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_atan_f64(double noundef [[X:%.*]]) #[[ATTR12]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_atan(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR4]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_atan_f64(double noundef [[X]]) #[[ATTR12]]
 // AMDGCNSPIRV-NEXT:    ret double [[CALL_I]]
 //
 extern "C" __device__ double test_atan(double x) {
   return atan(x);
 }
 
-// DEFAULT-LABEL: @test_atanhf(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atanh_f32(float noundef [[X:%.*]]) #[[ATTR15]]
+// DEFAULT-LABEL: define dso_local noundef float @test_atanhf(
+// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atanh_f32(float noundef [[X]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_atanhf(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_atanh_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_atanhf(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_atanh_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
-// APPROX-LABEL: @test_atanhf(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atanh_f32(float noundef [[X:%.*]]) #[[ATTR15]]
+// APPROX-LABEL: define dso_local noundef float @test_atanhf(
+// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atanh_f32(float noundef [[X]]) #[[ATTR15]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_atanhf(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atanh_f32(float noundef [[X:%.*]]) #[[ATTR15]]
+// NCRDIV-LABEL: define dso_local noundef float @test_atanhf(
+// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atanh_f32(float noundef [[X]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_atanhf(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_atanh_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_atanhf(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR5]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_atanh_f32(float noundef [[X]]) #[[ATTR13]]
 // AMDGCNSPIRV-NEXT:    ret float [[CALL_I]]
 //
 extern "C" __device__ float test_atanhf(float x) {
   return atanhf(x);
 }
 
-// DEFAULT-LABEL: @test_atanh(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atanh_f64(double noundef [[X:%.*]]) #[[ATTR15]]
+// DEFAULT-LABEL: define dso_local noundef double @test_atanh(
+// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atanh_f64(double noundef [[X]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_atanh(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_atanh_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_atanh(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_atanh_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
-// APPROX-LABEL: @test_atanh(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atanh_f64(double noundef [[X:%.*]]) #[[ATTR15]]
+// APPROX-LABEL: define dso_local noundef double @test_atanh(
+// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atanh_f64(double noundef [[X]]) #[[ATTR15]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_atanh(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atanh_f64(double noundef [[X:%.*]]) #[[ATTR15]]
+// NCRDIV-LABEL: define dso_local noundef double @test_atanh(
+// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atanh_f64(double noundef [[X]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_atanh(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_atanh_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_atanh(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR5]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_atanh_f64(double noundef [[X]]) #[[ATTR13]]
 // AMDGCNSPIRV-NEXT:    ret double [[CALL_I]]
 //
 extern "C" __device__ double test_atanh(double x) {
   return atanh(x);
 }
 
-// DEFAULT-LABEL: @test_cbrtf(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cbrt_f32(float noundef [[X:%.*]]) #[[ATTR15]]
+// DEFAULT-LABEL: define dso_local noundef float @test_cbrtf(
+// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cbrt_f32(float noundef [[X]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_cbrtf(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_cbrt_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_cbrtf(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_cbrt_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
-// APPROX-LABEL: @test_cbrtf(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cbrt_f32(float noundef [[X:%.*]]) #[[ATTR15]]
+// APPROX-LABEL: define dso_local noundef float @test_cbrtf(
+// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cbrt_f32(float noundef [[X]]) #[[ATTR15]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_cbrtf(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cbrt_f32(float noundef [[X:%.*]]) #[[ATTR15]]
+// NCRDIV-LABEL: define dso_local noundef float @test_cbrtf(
+// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cbrt_f32(float noundef [[X]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_cbrtf(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_cbrt_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_cbrtf(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR5]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_cbrt_f32(float noundef [[X]]) #[[ATTR13]]
 // AMDGCNSPIRV-NEXT:    ret float [[CALL_I]]
 //
 extern "C" __device__ float test_cbrtf(float x) {
   return cbrtf(x);
 }
 
-// DEFAULT-LABEL: @test_cbrt(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cbrt_f64(double noundef [[X:%.*]]) #[[ATTR15]]
+// DEFAULT-LABEL: define dso_local noundef double @test_cbrt(
+// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cbrt_f64(double noundef [[X]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_cbrt(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_cbrt_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_cbrt(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_cbrt_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
-// APPROX-LABEL: @test_cbrt(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cbrt_f64(double noundef [[X:%.*]]) #[[ATTR15]]
+// APPROX-LABEL: define dso_local noundef double @test_cbrt(
+// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cbrt_f64(double noundef [[X]]) #[[ATTR15]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_cbrt(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cbrt_f64(double noundef [[X:%.*]]) #[[ATTR15]]
+// NCRDIV-LABEL: define dso_local noundef double @test_cbrt(
+// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cbrt_f64(double noundef [[X]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_cbrt(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_cbrt_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_cbrt(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR5]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_cbrt_f64(double noundef [[X]]) #[[ATTR13]]
 // AMDGCNSPIRV-NEXT:    ret double [[CALL_I]]
 //
 extern "C" __device__ double test_cbrt(double x) {
   return cbrt(x);
 }
 
-// DEFAULT-LABEL: @test_ceilf(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.ceil.f32(float [[X:%.*]])
+// DEFAULT-LABEL: define dso_local noundef float @test_ceilf(
+// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.ceil.f32(float [[X]])
 // DEFAULT-NEXT:    ret float [[TMP0]]
 //
-// FINITEONLY-LABEL: @test_ceilf(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.ceil.f32(float nofpclass(nan inf) [[X:%.*]])
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_ceilf(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.ceil.f32(float nofpclass(nan inf) [[X]])
 // FINITEONLY-NEXT:    ret float [[TMP0]]
 //
-// APPROX-LABEL: @test_ceilf(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.ceil.f32(float [[X:%.*]])
+// APPROX-LABEL: define dso_local noundef float @test_ceilf(
+// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.ceil.f32(float [[X]])
 // APPROX-NEXT:    ret float [[TMP0]]
 //
-// NCRDIV-LABEL: @test_ceilf(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.ceil.f32(float [[X:%.*]])
+// NCRDIV-LABEL: define dso_local noundef float @test_ceilf(
+// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.ceil.f32(float [[X]])
 // NCRDIV-NEXT:    ret float [[TMP0]]
 //
-// AMDGCNSPIRV-LABEL: @test_ceilf(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.ceil.f32(float [[X:%.*]])
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_ceilf(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.ceil.f32(float [[X]])
 // AMDGCNSPIRV-NEXT:    ret float [[TMP0]]
 //
 extern "C" __device__ float test_ceilf(float x) {
   return ceilf(x);
 }
 
-// DEFAULT-LABEL: @test_ceil(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.ceil.f64(double [[X:%.*]])
+// DEFAULT-LABEL: define dso_local noundef double @test_ceil(
+// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.ceil.f64(double [[X]])
 // DEFAULT-NEXT:    ret double [[TMP0]]
 //
-// FINITEONLY-LABEL: @test_ceil(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef double @llvm.ceil.f64(double nofpclass(nan inf) [[X:%.*]])
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_ceil(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef double @llvm.ceil.f64(double nofpclass(nan inf) [[X]])
 // FINITEONLY-NEXT:    ret double [[TMP0]]
 //
-// APPROX-LABEL: @test_ceil(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.ceil.f64(double [[X:%.*]])
+// APPROX-LABEL: define dso_local noundef double @test_ceil(
+// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.ceil.f64(double [[X]])
 // APPROX-NEXT:    ret double [[TMP0]]
 //
-// NCRDIV-LABEL: @test_ceil(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.ceil.f64(double [[X:%.*]])
+// NCRDIV-LABEL: define dso_local noundef double @test_ceil(
+// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.ceil.f64(double [[X]])
 // NCRDIV-NEXT:    ret double [[TMP0]]
 //
-// AMDGCNSPIRV-LABEL: @test_ceil(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.ceil.f64(double [[X:%.*]])
+// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_ceil(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.ceil.f64(double [[X]])
 // AMDGCNSPIRV-NEXT:    ret double [[TMP0]]
 //
 extern "C" __device__ double test_ceil(double x) {
   return ceil(x);
 }
 
-// DEFAULT-LABEL: @test_copysignf(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.copysign.f32(float [[X:%.*]], float [[Y:%.*]])
+// DEFAULT-LABEL: define dso_local noundef float @test_copysignf(
+// DEFAULT-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.copysign.f32(float [[X]], float [[Y]])
 // DEFAULT-NEXT:    ret float [[TMP0]]
 //
-// FINITEONLY-LABEL: @test_copysignf(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.copysign.f32(float nofpclass(nan inf) [[X:%.*]], float nofpclass(nan inf) [[Y:%.*]])
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_copysignf(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.copysign.f32(float nofpclass(nan inf) [[X]], float nofpclass(nan inf) [[Y]])
 // FINITEONLY-NEXT:    ret float [[TMP0]]
 //
-// APPROX-LABEL: @test_copysignf(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.copysign.f32(float [[X:%.*]], float [[Y:%.*]])
+// APPROX-LABEL: define dso_local noundef float @test_copysignf(
+// APPROX-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.copysign.f32(float [[X]], float [[Y]])
 // APPROX-NEXT:    ret float [[TMP0]]
 //
-// NCRDIV-LABEL: @test_copysignf(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.copysign.f32(float [[X:%.*]], float [[Y:%.*]])
+// NCRDIV-LABEL: define dso_local noundef float @test_copysignf(
+// NCRDIV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.copysign.f32(float [[X]], float [[Y]])
 // NCRDIV-NEXT:    ret float [[TMP0]]
 //
-// AMDGCNSPIRV-LABEL: @test_copysignf(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.copysign.f32(float [[X:%.*]], float [[Y:%.*]])
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_copysignf(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.copysign.f32(float [[X]], float [[Y]])
 // AMDGCNSPIRV-NEXT:    ret float [[TMP0]]
 //
 extern "C" __device__ float test_copysignf(float x, float y) {
   return copysignf(x, y);
 }
 
-// DEFAULT-LABEL: @test_copysign(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.copysign.f64(double [[X:%.*]], double [[Y:%.*]])
+// DEFAULT-LABEL: define dso_local noundef double @test_copysign(
+// DEFAULT-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.copysign.f64(double [[X]], double [[Y]])
 // DEFAULT-NEXT:    ret double [[TMP0]]
 //
-// FINITEONLY-LABEL: @test_copysign(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef double @llvm.copysign.f64(double nofpclass(nan inf) [[X:%.*]], double nofpclass(nan inf) [[Y:%.*]])
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_copysign(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef double @llvm.copysign.f64(double nofpclass(nan inf) [[X]], double nofpclass(nan inf) [[Y]])
 // FINITEONLY-NEXT:    ret double [[TMP0]]
 //
-// APPROX-LABEL: @test_copysign(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.copysign.f64(double [[X:%.*]], double [[Y:%.*]])
+// APPROX-LABEL: define dso_local noundef double @test_copysign(
+// APPROX-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.copysign.f64(double [[X]], double [[Y]])
 // APPROX-NEXT:    ret double [[TMP0]]
 //
-// NCRDIV-LABEL: @test_copysign(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.copysign.f64(double [[X:%.*]], double [[Y:%.*]])
+// NCRDIV-LABEL: define dso_local noundef double @test_copysign(
+// NCRDIV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.copysign.f64(double [[X]], double [[Y]])
 // NCRDIV-NEXT:    ret double [[TMP0]]
 //
-// AMDGCNSPIRV-LABEL: @test_copysign(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.copysign.f64(double [[X:%.*]], double [[Y:%.*]])
+// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_copysign(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.copysign.f64(double [[X]], double [[Y]])
 // AMDGCNSPIRV-NEXT:    ret double [[TMP0]]
 //
 extern "C" __device__ double test_copysign(double x, double y) {
   return copysign(x, y);
 }
 
-// DEFAULT-LABEL: @test_cosf(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cos_f32(float noundef [[X:%.*]]) #[[ATTR16:[0-9]+]]
+// DEFAULT-LABEL: define dso_local noundef float @test_cosf(
+// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6:[0-9]+]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cos_f32(float noundef [[X]]) #[[ATTR16:[0-9]+]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_cosf(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_cos_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16:[0-9]+]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_cosf(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6:[0-9]+]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_cos_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR16:[0-9]+]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
-// APPROX-LABEL: @test_cosf(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I1:%.*]] = tail call contract noundef float @__ocml_native_cos_f32(float noundef [[X:%.*]]) #[[ATTR16:[0-9]+]]
+// APPROX-LABEL: define dso_local noundef float @test_cosf(
+// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6:[0-9]+]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I1:%.*]] = tail call contract noundef float @__ocml_native_cos_f32(float noundef [[X]]) #[[ATTR16:[0-9]+]]
 // APPROX-NEXT:    ret float [[CALL_I1]]
 //
-// NCRDIV-LABEL: @test_cosf(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cos_f32(float noundef [[X:%.*]]) #[[ATTR16:[0-9]+]]
+// NCRDIV-LABEL: define dso_local noundef float @test_cosf(
+// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6:[0-9]+]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cos_f32(float noundef [[X]]) #[[ATTR16:[0-9]+]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_cosf(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_cos_f32(float noundef [[X:%.*]]) #[[ATTR14:[0-9]+]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_cosf(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR6:[0-9]+]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_cos_f32(float noundef [[X]]) #[[ATTR14:[0-9]+]]
 // AMDGCNSPIRV-NEXT:    ret float [[CALL_I]]
 //
 extern "C" __device__ float test_cosf(float x) {
   return cosf(x);
 }
 
-// DEFAULT-LABEL: @test_cos(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cos_f64(double noundef [[X:%.*]]) #[[ATTR16]]
+// DEFAULT-LABEL: define dso_local noundef double @test_cos(
+// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cos_f64(double noundef [[X]]) #[[ATTR16]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_cos(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_cos_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_cos(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_cos_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
-// APPROX-LABEL: @test_cos(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cos_f64(double noundef [[X:%.*]]) #[[ATTR16]]
+// APPROX-LABEL: define dso_local noundef double @test_cos(
+// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cos_f64(double noundef [[X]]) #[[ATTR16]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_cos(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cos_f64(double noundef [[X:%.*]]) #[[ATTR16]]
+// NCRDIV-LABEL: define dso_local noundef double @test_cos(
+// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cos_f64(double noundef [[X]]) #[[ATTR16]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_cos(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_cos_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_cos(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR6]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_cos_f64(double noundef [[X]]) #[[ATTR14]]
 // AMDGCNSPIRV-NEXT:    ret double [[CALL_I]]
 //
 extern "C" __device__ double test_cos(double x) {
   return cos(x);
 }
 
-// DEFAULT-LABEL: @test_coshf(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cosh_f32(float noundef [[X:%.*]]) #[[ATTR15]]
+// DEFAULT-LABEL: define dso_local noundef float @test_coshf(
+// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cosh_f32(float noundef [[X]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_coshf(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_cosh_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_coshf(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_cosh_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
-// APPROX-LABEL: @test_coshf(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cosh_f32(float noundef [[X:%.*]]) #[[ATTR15]]
+// APPROX-LABEL: define dso_local noundef float @test_coshf(
+// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cosh_f32(float noundef [[X]]) #[[ATTR15]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_coshf(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cosh_f32(float noundef [[X:%.*]]) #[[ATTR15]]
+// NCRDIV-LABEL: define dso_local noundef float @test_coshf(
+// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cosh_f32(float noundef [[X]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_coshf(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_cosh_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_coshf(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR5]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_cosh_f32(float noundef [[X]]) #[[ATTR13]]
 // AMDGCNSPIRV-NEXT:    ret float [[CALL_I]]
 //
 extern "C" __device__ float test_coshf(float x) {
   return coshf(x);
 }
 
-// DEFAULT-LABEL: @test_cosh(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cosh_f64(double noundef [[X:%.*]]) #[[ATTR15]]
+// DEFAULT-LABEL: define dso_local noundef double @test_cosh(
+// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cosh_f64(double noundef [[X]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_cosh(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_cosh_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_cosh(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_cosh_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
-// APPROX-LABEL: @test_cosh(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cosh_f64(double noundef [[X:%.*]]) #[[ATTR15]]
+// APPROX-LABEL: define dso_local noundef double @test_cosh(
+// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cosh_f64(double noundef [[X]]) #[[ATTR15]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_cosh(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cosh_f64(double noundef [[X:%.*]]) #[[ATTR15]]
+// NCRDIV-LABEL: define dso_local noundef double @test_cosh(
+// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cosh_f64(double noundef [[X]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_cosh(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_cosh_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_cosh(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR5]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_cosh_f64(double noundef [[X]]) #[[ATTR13]]
 // AMDGCNSPIRV-NEXT:    ret double [[CALL_I]]
 //
 extern "C" __device__ double test_cosh(double x) {
   return cosh(x);
 }
 
-// DEFAULT-LABEL: @test_cospif(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cospi_f32(float noundef [[X:%.*]]) #[[ATTR16]]
+// DEFAULT-LABEL: define dso_local noundef float @test_cospif(
+// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cospi_f32(float noundef [[X]]) #[[ATTR16]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_cospif(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_cospi_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_cospif(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_cospi_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
-// APPROX-LABEL: @test_cospif(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cospi_f32(float noundef [[X:%.*]]) #[[ATTR16]]
+// APPROX-LABEL: define dso_local noundef float @test_cospif(
+// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cospi_f32(float noundef [[X]]) #[[ATTR16]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_cospif(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cospi_f32(float noundef [[X:%.*]]) #[[ATTR16]]
+// NCRDIV-LABEL: define dso_local noundef float @test_cospif(
+// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cospi_f32(float noundef [[X]]) #[[ATTR16]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_cospif(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_cospi_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_cospif(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR6]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_cospi_f32(float noundef [[X]]) #[[ATTR14]]
 // AMDGCNSPIRV-NEXT:    ret float [[CALL_I]]
 //
 extern "C" __device__ float test_cospif(float x) {
   return cospif(x);
 }
 
-// DEFAULT-LABEL: @test_cospi(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cospi_f64(double noundef [[X:%.*]]) #[[ATTR16]]
+// DEFAULT-LABEL: define dso_local noundef double @test_cospi(
+// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cospi_f64(double noundef [[X]]) #[[ATTR16]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_cospi(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_cospi_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_cospi(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_cospi_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
-// APPROX-LABEL: @test_cospi(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cospi_f64(double noundef [[X:%.*]]) #[[ATTR16]]
+// APPROX-LABEL: define dso_local noundef double @test_cospi(
+// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cospi_f64(double noundef [[X]]) #[[ATTR16]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_cospi(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cospi_f64(double noundef [[X:%.*]]) #[[ATTR16]]
+// NCRDIV-LABEL: define dso_local noundef double @test_cospi(
+// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cospi_f64(double noundef [[X]]) #[[ATTR16]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_cospi(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_cospi_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_cospi(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR6]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_cospi_f64(double noundef [[X]]) #[[ATTR14]]
 // AMDGCNSPIRV-NEXT:    ret double [[CALL_I]]
 //
 extern "C" __device__ double test_cospi(double x) {
   return cospi(x);
 }
 
-// DEFAULT-LABEL: @test_cyl_bessel_i0f(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_i0_f32(float noundef [[X:%.*]]) #[[ATTR16]]
+// DEFAULT-LABEL: define dso_local noundef float @test_cyl_bessel_i0f(
+// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_i0_f32(float noundef [[X]]) #[[ATTR16]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_cyl_bessel_i0f(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_i0_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_cyl_bessel_i0f(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_i0_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
-// APPROX-LABEL: @test_cyl_bessel_i0f(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_i0_f32(float noundef [[X:%.*]]) #[[ATTR16]]
+// APPROX-LABEL: define dso_local noundef float @test_cyl_bessel_i0f(
+// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_i0_f32(float noundef [[X]]) #[[ATTR16]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_cyl_bessel_i0f(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_i0_f32(float noundef [[X:%.*]]) #[[ATTR16]]
+// NCRDIV-LABEL: define dso_local noundef float @test_cyl_bessel_i0f(
+// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_i0_f32(float noundef [[X]]) #[[ATTR16]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_cyl_bessel_i0f(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_i0_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_cyl_bessel_i0f(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR6]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_i0_f32(float noundef [[X]]) #[[ATTR14]]
 // AMDGCNSPIRV-NEXT:    ret float [[CALL_I]]
 //
 extern "C" __device__ float test_cyl_bessel_i0f(float x) {
   return cyl_bessel_i0f(x);
 }
 
-// DEFAULT-LABEL: @test_cyl_bessel_i0(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_i0_f64(double noundef [[X:%.*]]) #[[ATTR16]]
+// DEFAULT-LABEL: define dso_local noundef double @test_cyl_bessel_i0(
+// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_i0_f64(double noundef [[X]]) #[[ATTR16]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_cyl_bessel_i0(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_i0_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_cyl_bessel_i0(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_i0_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
-// APPROX-LABEL: @test_cyl_bessel_i0(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_i0_f64(double noundef [[X:%.*]]) #[[ATTR16]]
+// APPROX-LABEL: define dso_local noundef double @test_cyl_bessel_i0(
+// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_i0_f64(double noundef [[X]]) #[[ATTR16]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_cyl_bessel_i0(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_i0_f64(double noundef [[X:%.*]]) #[[ATTR16]]
+// NCRDIV-LABEL: define dso_local noundef double @test_cyl_bessel_i0(
+// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_i0_f64(double noundef [[X]]) #[[ATTR16]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_cyl_bessel_i0(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_i0_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_cyl_bessel_i0(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR6]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_i0_f64(double noundef [[X]]) #[[ATTR14]]
 // AMDGCNSPIRV-NEXT:    ret double [[CALL_I]]
 //
 extern "C" __device__ double test_cyl_bessel_i0(double x) {
   return cyl_bessel_i0(x);
 }
 
-// DEFAULT-LABEL: @test_cyl_bessel_i1f(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_i1_f32(float noundef [[X:%.*]]) #[[ATTR16]]
+// DEFAULT-LABEL: define dso_local noundef float @test_cyl_bessel_i1f(
+// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_i1_f32(float noundef [[X]]) #[[ATTR16]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_cyl_bessel_i1f(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_i1_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_cyl_bessel_i1f(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_i1_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
-// APPROX-LABEL: @test_cyl_bessel_i1f(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_i1_f32(float noundef [[X:%.*]]) #[[ATTR16]]
+// APPROX-LABEL: define dso_local noundef float @test_cyl_bessel_i1f(
+// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_i1_f32(float noundef [[X]]) #[[ATTR16]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_cyl_bessel_i1f(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_i1_f32(float noundef [[X:%.*]]) #[[ATTR16]]
+// NCRDIV-LABEL: define dso_local noundef float @test_cyl_bessel_i1f(
+// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_i1_f32(float noundef [[X]]) #[[ATTR16]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_cyl_bessel_i1f(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_i1_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_cyl_bessel_i1f(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR6]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_i1_f32(float noundef [[X]]) #[[ATTR14]]
 // AMDGCNSPIRV-NEXT:    ret float [[CALL_I]]
 //
 extern "C" __device__ float test_cyl_bessel_i1f(float x) {
   return cyl_bessel_i1f(x);
 }
 
-// DEFAULT-LABEL: @test_cyl_bessel_i1(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_i1_f64(double noundef [[X:%.*]]) #[[ATTR16]]
+// DEFAULT-LABEL: define dso_local noundef double @test_cyl_bessel_i1(
+// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_i1_f64(double noundef [[X]]) #[[ATTR16]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_cyl_bessel_i1(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_i1_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_cyl_bessel_i1(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_i1_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
-// APPROX-LABEL: @test_cyl_bessel_i1(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_i1_f64(double noundef [[X:%.*]]) #[[ATTR16]]
+// APPROX-LABEL: define dso_local noundef double @test_cyl_bessel_i1(
+// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_i1_f64(double noundef [[X]]) #[[ATTR16]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_cyl_bessel_i1(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_i1_f64(double noundef [[X:%.*]]) #[[ATTR16]]
+// NCRDIV-LABEL: define dso_local noundef double @test_cyl_bessel_i1(
+// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_i1_f64(double noundef [[X]]) #[[ATTR16]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_cyl_bessel_i1(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_i1_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_cyl_bessel_i1(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR6]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_i1_f64(double noundef [[X]]) #[[ATTR14]]
 // AMDGCNSPIRV-NEXT:    ret double [[CALL_I]]
 //
 extern "C" __device__ double test_cyl_bessel_i1(double x) {
   return cyl_bessel_i1(x);
 }
 
-// DEFAULT-LABEL: @test_erfcf(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_erfc_f32(float noundef [[X:%.*]]) #[[ATTR15]]
+// DEFAULT-LABEL: define dso_local noundef float @test_erfcf(
+// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_erfc_f32(float noundef [[X]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_erfcf(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_erfc_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_erfcf(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_erfc_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
-// APPROX-LABEL: @test_erfcf(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_erfc_f32(float noundef [[X:%.*]]) #[[ATTR15]]
+// APPROX-LABEL: define dso_local noundef float @test_erfcf(
+// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_erfc_f32(float noundef [[X]]) #[[ATTR15]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_erfcf(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_erfc_f32(float noundef [[X:%.*]]) #[[ATTR15]]
+// NCRDIV-LABEL: define dso_local noundef float @test_erfcf(
+// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_erfc_f32(float noundef [[X]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_erfcf(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_erfc_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_erfcf(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR5]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_erfc_f32(float noundef [[X]]) #[[ATTR13]]
 // AMDGCNSPIRV-NEXT:    ret float [[CALL_I]]
 //
 extern "C" __device__ float test_erfcf(float x) {
   return erfcf(x);
 }
 
-// DEFAULT-LABEL: @test_erfc(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_erfc_f64(double noundef [[X:%.*]]) #[[ATTR15]]
+// DEFAULT-LABEL: define dso_local noundef double @test_erfc(
+// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_erfc_f64(double noundef [[X]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_erfc(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_erfc_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_erfc(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_erfc_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
-// APPROX-LABEL: @test_erfc(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_erfc_f64(double noundef [[X:%.*]]) #[[ATTR15]]
+// APPROX-LABEL: define dso_local noundef double @test_erfc(
+// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_erfc_f64(double noundef [[X]]) #[[ATTR15]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_erfc(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_erfc_f64(double noundef [[X:%.*]]) #[[ATTR15]]
+// NCRDIV-LABEL: define dso_local noundef double @test_erfc(
+// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_erfc_f64(double noundef [[X]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_erfc(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_erfc_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_erfc(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR5]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_erfc_f64(double noundef [[X]]) #[[ATTR13]]
 // AMDGCNSPIRV-NEXT:    ret double [[CALL_I]]
 //
 extern "C" __device__ double test_erfc(double x) {
   return erfc(x);
 }
 
-// DEFAULT-LABEL: @test_erfinvf(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_erfinv_f32(float noundef [[X:%.*]]) #[[ATTR15]]
+// DEFAULT-LABEL: define dso_local noundef float @test_erfinvf(
+// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_erfinv_f32(float noundef [[X]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_erfinvf(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_erfinv_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_erfinvf(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_erfinv_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
-// APPROX-LABEL: @test_erfinvf(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_erfinv_f32(float noundef [[X:%.*]]) #[[ATTR15]]
+// APPROX-LABEL: define dso_local noundef float @test_erfinvf(
+// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_erfinv_f32(float noundef [[X]]) #[[ATTR15]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_erfinvf(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_erfinv_f32(float noundef [[X:%.*]]) #[[ATTR15]]
+// NCRDIV-LABEL: define dso_local noundef float @test_erfinvf(
+// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_erfinv_f32(float noundef [[X]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_erfinvf(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_erfinv_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_erfinvf(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR5]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_erfinv_f32(float noundef [[X]]) #[[ATTR13]]
 // AMDGCNSPIRV-NEXT:    ret float [[CALL_I]]
 //
 extern "C" __device__ float test_erfinvf(float x) {
   return erfinvf(x);
 }
 
-// DEFAULT-LABEL: @test_erfinv(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_erfinv_f64(double noundef [[X:%.*]]) #[[ATTR15]]
+// DEFAULT-LABEL: define dso_local noundef double @test_erfinv(
+// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_erfinv_f64(double noundef [[X]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_erfinv(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_erfinv_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_erfinv(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_erfinv_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
-// APPROX-LABEL: @test_erfinv(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_erfinv_f64(double noundef [[X:%.*]]) #[[ATTR15]]
+// APPROX-LABEL: define dso_local noundef double @test_erfinv(
+// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_erfinv_f64(double noundef [[X]]) #[[ATTR15]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_erfinv(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_erfinv_f64(double noundef [[X:%.*]]) #[[ATTR15]]
+// NCRDIV-LABEL: define dso_local noundef double @test_erfinv(
+// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_erfinv_f64(double noundef [[X]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_erfinv(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_erfinv_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_erfinv(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR5]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_erfinv_f64(double noundef [[X]]) #[[ATTR13]]
 // AMDGCNSPIRV-NEXT:    ret double [[CALL_I]]
 //
 extern "C" __device__ double test_erfinv(double x) {
   return erfinv(x);
 }
 
-// DEFAULT-LABEL: @test_exp10f(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.exp10.f32(float [[X:%.*]])
+// DEFAULT-LABEL: define dso_local noundef float @test_exp10f(
+// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.exp10.f32(float [[X]])
 // DEFAULT-NEXT:    ret float [[TMP0]]
 //
-// FINITEONLY-LABEL: @test_exp10f(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.exp10.f32(float nofpclass(nan inf) [[X:%.*]])
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_exp10f(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.exp10.f32(float nofpclass(nan inf) [[X]])
 // FINITEONLY-NEXT:    ret float [[TMP0]]
 //
-// APPROX-LABEL: @test_exp10f(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.exp10.f32(float [[X:%.*]])
+// APPROX-LABEL: define dso_local noundef float @test_exp10f(
+// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.exp10.f32(float [[X]])
 // APPROX-NEXT:    ret float [[TMP0]]
 //
-// NCRDIV-LABEL: @test_exp10f(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.exp10.f32(float [[X:%.*]])
+// NCRDIV-LABEL: define dso_local noundef float @test_exp10f(
+// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.exp10.f32(float [[X]])
 // NCRDIV-NEXT:    ret float [[TMP0]]
 //
-// AMDGCNSPIRV-LABEL: @test_exp10f(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.exp10.f32(float [[X:%.*]])
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_exp10f(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.exp10.f32(float [[X]])
 // AMDGCNSPIRV-NEXT:    ret float [[TMP0]]
 //
 extern "C" __device__ float test_exp10f(float x) {
   return exp10f(x);
 }
 
-// DEFAULT-LABEL: @test_exp10(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp10_f64(double noundef [[X:%.*]]) #[[ATTR15]]
+// DEFAULT-LABEL: define dso_local noundef double @test_exp10(
+// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp10_f64(double noundef [[X]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_exp10(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_exp10_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_exp10(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_exp10_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
-// APPROX-LABEL: @test_exp10(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp10_f64(double noundef [[X:%.*]]) #[[ATTR15]]
+// APPROX-LABEL: define dso_local noundef double @test_exp10(
+// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp10_f64(double noundef [[X]]) #[[ATTR15]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_exp10(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp10_f64(double noundef [[X:%.*]]) #[[ATTR15]]
+// NCRDIV-LABEL: define dso_local noundef double @test_exp10(
+// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp10_f64(double noundef [[X]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_exp10(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_exp10_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_exp10(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR5]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_exp10_f64(double noundef [[X]]) #[[ATTR13]]
 // AMDGCNSPIRV-NEXT:    ret double [[CALL_I]]
 //
 extern "C" __device__ double test_exp10(double x) {
   return exp10(x);
 }
 
-// DEFAULT-LABEL: @test_exp2f(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.exp2.f32(float [[X:%.*]])
+// DEFAULT-LABEL: define dso_local noundef float @test_exp2f(
+// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.exp2.f32(float [[X]])
 // DEFAULT-NEXT:    ret float [[TMP0]]
 //
-// FINITEONLY-LABEL: @test_exp2f(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.exp2.f32(float nofpclass(nan inf) [[X:%.*]])
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_exp2f(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.exp2.f32(float nofpclass(nan inf) [[X]])
 // FINITEONLY-NEXT:    ret float [[TMP0]]
 //
-// APPROX-LABEL: @test_exp2f(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.exp2.f32(float [[X:%.*]])
+// APPROX-LABEL: define dso_local noundef float @test_exp2f(
+// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.exp2.f32(float [[X]])
 // APPROX-NEXT:    ret float [[TMP0]]
 //
-// NCRDIV-LABEL: @test_exp2f(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.exp2.f32(float [[X:%.*]])
+// NCRDIV-LABEL: define dso_local noundef float @test_exp2f(
+// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.exp2.f32(float [[X]])
 // NCRDIV-NEXT:    ret float [[TMP0]]
 //
-// AMDGCNSPIRV-LABEL: @test_exp2f(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.exp2.f32(float [[X:%.*]])
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_exp2f(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.exp2.f32(float [[X]])
 // AMDGCNSPIRV-NEXT:    ret float [[TMP0]]
 //
 extern "C" __device__ float test_exp2f(float x) {
   return exp2f(x);
 }
 
-// DEFAULT-LABEL: @test_exp2(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp2_f64(double noundef [[X:%.*]]) #[[ATTR15]]
+// DEFAULT-LABEL: define dso_local noundef double @test_exp2(
+// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp2_f64(double noundef [[X]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_exp2(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_exp2_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_exp2(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_exp2_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
-// APPROX-LABEL: @test_exp2(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp2_f64(double noundef [[X:%.*]]) #[[ATTR15]]
+// APPROX-LABEL: define dso_local noundef double @test_exp2(
+// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp2_f64(double noundef [[X]]) #[[ATTR15]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_exp2(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp2_f64(double noundef [[X:%.*]]) #[[ATTR15]]
+// NCRDIV-LABEL: define dso_local noundef double @test_exp2(
+// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp2_f64(double noundef [[X]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_exp2(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_exp2_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_exp2(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR5]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_exp2_f64(double noundef [[X]]) #[[ATTR13]]
 // AMDGCNSPIRV-NEXT:    ret double [[CALL_I]]
 //
 extern "C" __device__ double test_exp2(double x) {
   return exp2(x);
 }
 
-// DEFAULT-LABEL: @test_expf(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.exp.f32(float [[X:%.*]])
+// DEFAULT-LABEL: define dso_local noundef float @test_expf(
+// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.exp.f32(float [[X]])
 // DEFAULT-NEXT:    ret float [[TMP0]]
 //
-// FINITEONLY-LABEL: @test_expf(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.exp.f32(float nofpclass(nan inf) [[X:%.*]])
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_expf(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.exp.f32(float nofpclass(nan inf) [[X]])
 // FINITEONLY-NEXT:    ret float [[TMP0]]
 //
-// APPROX-LABEL: @test_expf(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.exp.f32(float [[X:%.*]])
+// APPROX-LABEL: define dso_local noundef float @test_expf(
+// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.exp.f32(float [[X]])
 // APPROX-NEXT:    ret float [[TMP0]]
 //
-// NCRDIV-LABEL: @test_expf(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.exp.f32(float [[X:%.*]])
+// NCRDIV-LABEL: define dso_local noundef float @test_expf(
+// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.exp.f32(float [[X]])
 // NCRDIV-NEXT:    ret float [[TMP0]]
 //
-// AMDGCNSPIRV-LABEL: @test_expf(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.exp.f32(float [[X:%.*]])
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_expf(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.exp.f32(float [[X]])
 // AMDGCNSPIRV-NEXT:    ret float [[TMP0]]
 //
 extern "C" __device__ float test_expf(float x) {
   return expf(x);
 }
 
-// DEFAULT-LABEL: @test_exp(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp_f64(double noundef [[X:%.*]]) #[[ATTR15]]
+// DEFAULT-LABEL: define dso_local noundef double @test_exp(
+// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp_f64(double noundef [[X]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_exp(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_exp_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_exp(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_exp_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
-// APPROX-LABEL: @test_exp(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp_f64(double noundef [[X:%.*]]) #[[ATTR15]]
+// APPROX-LABEL: define dso_local noundef double @test_exp(
+// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp_f64(double noundef [[X]]) #[[ATTR15]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_exp(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp_f64(double noundef [[X:%.*]]) #[[ATTR15]]
+// NCRDIV-LABEL: define dso_local noundef double @test_exp(
+// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp_f64(double noundef [[X]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_exp(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_exp_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_exp(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR5]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_exp_f64(double noundef [[X]]) #[[ATTR13]]
 // AMDGCNSPIRV-NEXT:    ret double [[CALL_I]]
 //
 extern "C" __device__ double test_exp(double x) {
   return exp(x);
 }
 
-// DEFAULT-LABEL: @test_expm1f(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_expm1_f32(float noundef [[X:%.*]]) #[[ATTR15]]
+// DEFAULT-LABEL: define dso_local noundef float @test_expm1f(
+// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_expm1_f32(float noundef [[X]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_expm1f(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_expm1_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_expm1f(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_expm1_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
-// APPROX-LABEL: @test_expm1f(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_expm1_f32(float noundef [[X:%.*]]) #[[ATTR15]]
+// APPROX-LABEL: define dso_local noundef float @test_expm1f(
+// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_expm1_f32(float noundef [[X]]) #[[ATTR15]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_expm1f(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_expm1_f32(float noundef [[X:%.*]]) #[[ATTR15]]
+// NCRDIV-LABEL: define dso_local noundef float @test_expm1f(
+// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_expm1_f32(float noundef [[X]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_expm1f(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_expm1_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_expm1f(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR5]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_expm1_f32(float noundef [[X]]) #[[ATTR13]]
 // AMDGCNSPIRV-NEXT:    ret float [[CALL_I]]
 //
 extern "C" __device__ float test_expm1f(float x) {
   return expm1f(x);
 }
 
-// DEFAULT-LABEL: @test_expm1(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_expm1_f64(double noundef [[X:%.*]]) #[[ATTR15]]
+// DEFAULT-LABEL: define dso_local noundef double @test_expm1(
+// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_expm1_f64(double noundef [[X]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_expm1(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_expm1_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_expm1(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_expm1_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
-// APPROX-LABEL: @test_expm1(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_expm1_f64(double noundef [[X:%.*]]) #[[ATTR15]]
+// APPROX-LABEL: define dso_local noundef double @test_expm1(
+// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_expm1_f64(double noundef [[X]]) #[[ATTR15]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_expm1(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_expm1_f64(double noundef [[X:%.*]]) #[[ATTR15]]
+// NCRDIV-LABEL: define dso_local noundef double @test_expm1(
+// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_expm1_f64(double noundef [[X]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_expm1(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_expm1_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_expm1(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR5]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_expm1_f64(double noundef [[X]]) #[[ATTR13]]
 // AMDGCNSPIRV-NEXT:    ret double [[CALL_I]]
 //
 extern "C" __device__ double test_expm1(double x) {
   return expm1(x);
 }
 
-// DEFAULT-LABEL: @test_fabsf(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.fabs.f32(float [[X:%.*]])
+// DEFAULT-LABEL: define dso_local noundef float @test_fabsf(
+// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.fabs.f32(float [[X]])
 // DEFAULT-NEXT:    ret float [[TMP0]]
 //
-// FINITEONLY-LABEL: @test_fabsf(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.fabs.f32(float nofpclass(nan inf) [[X:%.*]])
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_fabsf(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.fabs.f32(float nofpclass(nan inf) [[X]])
 // FINITEONLY-NEXT:    ret float [[TMP0]]
 //
-// APPROX-LABEL: @test_fabsf(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.fabs.f32(float [[X:%.*]])
+// APPROX-LABEL: define dso_local noundef float @test_fabsf(
+// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.fabs.f32(float [[X]])
 // APPROX-NEXT:    ret float [[TMP0]]
 //
-// NCRDIV-LABEL: @test_fabsf(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.fabs.f32(float [[X:%.*]])
+// NCRDIV-LABEL: define dso_local noundef float @test_fabsf(
+// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.fabs.f32(float [[X]])
 // NCRDIV-NEXT:    ret float [[TMP0]]
 //
-// AMDGCNSPIRV-LABEL: @test_fabsf(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.fabs.f32(float [[X:%.*]])
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_fabsf(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.fabs.f32(float [[X]])
 // AMDGCNSPIRV-NEXT:    ret float [[TMP0]]
 //
 extern "C" __device__ float test_fabsf(float x) {
   return fabsf(x);
 }
 
-// DEFAULT-LABEL: @test_fabs(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.fabs.f64(double [[X:%.*]])
+// DEFAULT-LABEL: define dso_local noundef double @test_fabs(
+// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.fabs.f64(double [[X]])
 // DEFAULT-NEXT:    ret double [[TMP0]]
 //
-// FINITEONLY-LABEL: @test_fabs(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef double @llvm.fabs.f64(double nofpclass(nan inf) [[X:%.*]])
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_fabs(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef double @llvm.fabs.f64(double nofpclass(nan inf) [[X]])
 // FINITEONLY-NEXT:    ret double [[TMP0]]
 //
-// APPROX-LABEL: @test_fabs(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.fabs.f64(double [[X:%.*]])
+// APPROX-LABEL: define dso_local noundef double @test_fabs(
+// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.fabs.f64(double [[X]])
 // APPROX-NEXT:    ret double [[TMP0]]
 //
-// NCRDIV-LABEL: @test_fabs(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.fabs.f64(double [[X:%.*]])
+// NCRDIV-LABEL: define dso_local noundef double @test_fabs(
+// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.fabs.f64(double [[X]])
 // NCRDIV-NEXT:    ret double [[TMP0]]
 //
-// AMDGCNSPIRV-LABEL: @test_fabs(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.fabs.f64(double [[X:%.*]])
+// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_fabs(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.fabs.f64(double [[X]])
 // AMDGCNSPIRV-NEXT:    ret double [[TMP0]]
 //
 extern "C" __device__ double test_fabs(double x) {
   return fabs(x);
 }
 
-// DEFAULT-LABEL: @test_fdimf(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_fdim_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR14]]
+// DEFAULT-LABEL: define dso_local noundef float @test_fdimf(
+// DEFAULT-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_fdim_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_fdimf(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_fdim_f32(float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR14]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_fdimf(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_fdim_f32(float noundef nofpclass(nan inf) [[X]], float noundef nofpclass(nan inf) [[Y]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
-// APPROX-LABEL: @test_fdimf(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_fdim_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR14]]
+// APPROX-LABEL: define dso_local noundef float @test_fdimf(
+// APPROX-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_fdim_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR14]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_fdimf(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_fdim_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR14]]
+// NCRDIV-LABEL: define dso_local noundef float @test_fdimf(
+// NCRDIV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_fdim_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_fdimf(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_fdim_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR12]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_fdimf(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR4]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_fdim_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR12]]
 // AMDGCNSPIRV-NEXT:    ret float [[CALL_I]]
 //
 extern "C" __device__ float test_fdimf(float x, float y) {
   return fdimf(x, y);
 }
 
-// DEFAULT-LABEL: @test_fdim(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_fdim_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR14]]
+// DEFAULT-LABEL: define dso_local noundef double @test_fdim(
+// DEFAULT-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_fdim_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_fdim(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_fdim_f64(double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR14]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_fdim(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_fdim_f64(double noundef nofpclass(nan inf) [[X]], double noundef nofpclass(nan inf) [[Y]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
-// APPROX-LABEL: @test_fdim(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_fdim_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR14]]
+// APPROX-LABEL: define dso_local noundef double @test_fdim(
+// APPROX-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_fdim_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR14]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_fdim(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_fdim_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR14]]
+// NCRDIV-LABEL: define dso_local noundef double @test_fdim(
+// NCRDIV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_fdim_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_fdim(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_fdim_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR12]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_fdim(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR4]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_fdim_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR12]]
 // AMDGCNSPIRV-NEXT:    ret double [[CALL_I]]
 //
 extern "C" __device__ double test_fdim(double x, double y) {
   return fdim(x, y);
 }
 
-// DEFAULT-LABEL: @test_fdividef(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[DIV_I:%.*]] = fdiv contract float [[X:%.*]], [[Y:%.*]]
+// DEFAULT-LABEL: define dso_local noundef float @test_fdividef(
+// DEFAULT-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[DIV_I:%.*]] = fdiv contract float [[X]], [[Y]]
 // DEFAULT-NEXT:    ret float [[DIV_I]]
 //
-// FINITEONLY-LABEL: @test_fdividef(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[DIV_I:%.*]] = fdiv nnan ninf contract float [[X:%.*]], [[Y:%.*]]
+// FINITEONLY-LABEL: define dso_local nofpclass(nan inf) float @test_fdividef(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[DIV_I:%.*]] = fdiv nnan ninf contract float [[X]], [[Y]]
 // FINITEONLY-NEXT:    ret float [[DIV_I]]
 //
-// APPROX-LABEL: @test_fdividef(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[DIV_I:%.*]] = fdiv contract float [[X:%.*]], [[Y:%.*]]
+// APPROX-LABEL: define dso_local noundef float @test_fdividef(
+// APPROX-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[DIV_I:%.*]] = fdiv contract float [[X]], [[Y]]
 // APPROX-NEXT:    ret float [[DIV_I]]
 //
-// NCRDIV-LABEL: @test_fdividef(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[DIV_I:%.*]] = fdiv contract float [[X:%.*]], [[Y:%.*]], !fpmath [[META12:![0-9]+]]
+// NCRDIV-LABEL: define dso_local noundef float @test_fdividef(
+// NCRDIV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[DIV_I:%.*]] = fdiv contract float [[X]], [[Y]], !fpmath [[META12:![0-9]+]]
 // NCRDIV-NEXT:    ret float [[DIV_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_fdividef(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[DIV_I:%.*]] = fdiv contract float [[X:%.*]], [[Y:%.*]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_fdividef(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[DIV_I:%.*]] = fdiv contract float [[X]], [[Y]]
 // AMDGCNSPIRV-NEXT:    ret float [[DIV_I]]
 //
 extern "C" __device__ float test_fdividef(float x, float y) {
   return fdividef(x, y);
 }
 
-// DEFAULT-LABEL: @test_floorf(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.floor.f32(float [[X:%.*]])
+// DEFAULT-LABEL: define dso_local noundef float @test_floorf(
+// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.floor.f32(float [[X]])
 // DEFAULT-NEXT:    ret float [[TMP0]]
 //
-// FINITEONLY-LABEL: @test_floorf(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.floor.f32(float nofpclass(nan inf) [[X:%.*]])
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_floorf(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.floor.f32(float nofpclass(nan inf) [[X]])
 // FINITEONLY-NEXT:    ret float [[TMP0]]
 //
-// APPROX-LABEL: @test_floorf(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.floor.f32(float [[X:%.*]])
+// APPROX-LABEL: define dso_local noundef float @test_floorf(
+// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.floor.f32(float [[X]])
 // APPROX-NEXT:    ret float [[TMP0]]
 //
-// NCRDIV-LABEL: @test_floorf(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.floor.f32(float [[X:%.*]])
+// NCRDIV-LABEL: define dso_local noundef float @test_floorf(
+// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.floor.f32(float [[X]])
 // NCRDIV-NEXT:    ret float [[TMP0]]
 //
-// AMDGCNSPIRV-LABEL: @test_floorf(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.floor.f32(float [[X:%.*]])
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_floorf(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.floor.f32(float [[X]])
 // AMDGCNSPIRV-NEXT:    ret float [[TMP0]]
 //
 extern "C" __device__ float test_floorf(float x) {
   return floorf(x);
 }
 
-// DEFAULT-LABEL: @test_floor(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.floor.f64(double [[X:%.*]])
+// DEFAULT-LABEL: define dso_local noundef double @test_floor(
+// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.floor.f64(double [[X]])
 // DEFAULT-NEXT:    ret double [[TMP0]]
 //
-// FINITEONLY-LABEL: @test_floor(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef double @llvm.floor.f64(double nofpclass(nan inf) [[X:%.*]])
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_floor(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef double @llvm.floor.f64(double nofpclass(nan inf) [[X]])
 // FINITEONLY-NEXT:    ret double [[TMP0]]
 //
-// APPROX-LABEL: @test_floor(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.floor.f64(double [[X:%.*]])
+// APPROX-LABEL: define dso_local noundef double @test_floor(
+// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.floor.f64(double [[X]])
 // APPROX-NEXT:    ret double [[TMP0]]
 //
-// NCRDIV-LABEL: @test_floor(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.floor.f64(double [[X:%.*]])
+// NCRDIV-LABEL: define dso_local noundef double @test_floor(
+// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.floor.f64(double [[X]])
 // NCRDIV-NEXT:    ret double [[TMP0]]
 //
-// AMDGCNSPIRV-LABEL: @test_floor(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.floor.f64(double [[X:%.*]])
+// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_floor(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.floor.f64(double [[X]])
 // AMDGCNSPIRV-NEXT:    ret double [[TMP0]]
 //
 extern "C" __device__ double test_floor(double x) {
   return floor(x);
 }
 
-// DEFAULT-LABEL: @test_fmaf(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.fma.f32(float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]])
+// DEFAULT-LABEL: define dso_local noundef float @test_fmaf(
+// DEFAULT-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.fma.f32(float [[X]], float [[Y]], float [[Z]])
 // DEFAULT-NEXT:    ret float [[TMP0]]
 //
-// FINITEONLY-LABEL: @test_fmaf(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.fma.f32(float nofpclass(nan inf) [[X:%.*]], float nofpclass(nan inf) [[Y:%.*]], float nofpclass(nan inf) [[Z:%.*]])
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_fmaf(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]], float noundef nofpclass(nan inf) [[Z:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.fma.f32(float nofpclass(nan inf) [[X]], float nofpclass(nan inf) [[Y]], float nofpclass(nan inf) [[Z]])
 // FINITEONLY-NEXT:    ret float [[TMP0]]
 //
-// APPROX-LABEL: @test_fmaf(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.fma.f32(float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]])
+// APPROX-LABEL: define dso_local noundef float @test_fmaf(
+// APPROX-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.fma.f32(float [[X]], float [[Y]], float [[Z]])
 // APPROX-NEXT:    ret float [[TMP0]]
 //
-// NCRDIV-LABEL: @test_fmaf(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.fma.f32(float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]])
+// NCRDIV-LABEL: define dso_local noundef float @test_fmaf(
+// NCRDIV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.fma.f32(float [[X]], float [[Y]], float [[Z]])
 // NCRDIV-NEXT:    ret float [[TMP0]]
 //
-// AMDGCNSPIRV-LABEL: @test_fmaf(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.fma.f32(float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]])
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_fmaf(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.fma.f32(float [[X]], float [[Y]], float [[Z]])
 // AMDGCNSPIRV-NEXT:    ret float [[TMP0]]
 //
 extern "C" __device__ float test_fmaf(float x, float y, float z) {
   return fmaf(x, y, z);
 }
 
-// DEFAULT-LABEL: @test_fma(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.fma.f64(double [[X:%.*]], double [[Y:%.*]], double [[Z:%.*]])
+// DEFAULT-LABEL: define dso_local noundef double @test_fma(
+// DEFAULT-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.fma.f64(double [[X]], double [[Y]], double [[Z]])
 // DEFAULT-NEXT:    ret double [[TMP0]]
 //
-// FINITEONLY-LABEL: @test_fma(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef double @llvm.fma.f64(double nofpclass(nan inf) [[X:%.*]], double nofpclass(nan inf) [[Y:%.*]], double nofpclass(nan inf) [[Z:%.*]])
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_fma(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]], double noundef nofpclass(nan inf) [[Z:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef double @llvm.fma.f64(double nofpclass(nan inf) [[X]], double nofpclass(nan inf) [[Y]], double nofpclass(nan inf) [[Z]])
 // FINITEONLY-NEXT:    ret double [[TMP0]]
 //
-// APPROX-LABEL: @test_fma(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.fma.f64(double [[X:%.*]], double [[Y:%.*]], double [[Z:%.*]])
+// APPROX-LABEL: define dso_local noundef double @test_fma(
+// APPROX-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.fma.f64(double [[X]], double [[Y]], double [[Z]])
 // APPROX-NEXT:    ret double [[TMP0]]
 //
-// NCRDIV-LABEL: @test_fma(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.fma.f64(double [[X:%.*]], double [[Y:%.*]], double [[Z:%.*]])
+// NCRDIV-LABEL: define dso_local noundef double @test_fma(
+// NCRDIV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.fma.f64(double [[X]], double [[Y]], double [[Z]])
 // NCRDIV-NEXT:    ret double [[TMP0]]
 //
-// AMDGCNSPIRV-LABEL: @test_fma(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.fma.f64(double [[X:%.*]], double [[Y:%.*]], double [[Z:%.*]])
+// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_fma(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.fma.f64(double [[X]], double [[Y]], double [[Z]])
 // AMDGCNSPIRV-NEXT:    ret double [[TMP0]]
 //
 extern "C" __device__ double test_fma(double x, double y, double z) {
   return fma(x, y, z);
 }
 
-// DEFAULT-LABEL: @test_fma_rn(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.fma.f64(double [[X:%.*]], double [[Y:%.*]], double [[Z:%.*]])
+// DEFAULT-LABEL: define dso_local noundef double @test_fma_rn(
+// DEFAULT-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.fma.f64(double [[X]], double [[Y]], double [[Z]])
 // DEFAULT-NEXT:    ret double [[TMP0]]
 //
-// FINITEONLY-LABEL: @test_fma_rn(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef double @llvm.fma.f64(double nofpclass(nan inf) [[X:%.*]], double nofpclass(nan inf) [[Y:%.*]], double nofpclass(nan inf) [[Z:%.*]])
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_fma_rn(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]], double noundef nofpclass(nan inf) [[Z:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef double @llvm.fma.f64(double nofpclass(nan inf) [[X]], double nofpclass(nan inf) [[Y]], double nofpclass(nan inf) [[Z]])
 // FINITEONLY-NEXT:    ret double [[TMP0]]
 //
-// APPROX-LABEL: @test_fma_rn(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.fma.f64(double [[X:%.*]], double [[Y:%.*]], double [[Z:%.*]])
+// APPROX-LABEL: define dso_local noundef double @test_fma_rn(
+// APPROX-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.fma.f64(double [[X]], double [[Y]], double [[Z]])
 // APPROX-NEXT:    ret double [[TMP0]]
 //
-// NCRDIV-LABEL: @test_fma_rn(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.fma.f64(double [[X:%.*]], double [[Y:%.*]], double [[Z:%.*]])
+// NCRDIV-LABEL: define dso_local noundef double @test_fma_rn(
+// NCRDIV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.fma.f64(double [[X]], double [[Y]], double [[Z]])
 // NCRDIV-NEXT:    ret double [[TMP0]]
 //
-// AMDGCNSPIRV-LABEL: @test_fma_rn(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.fma.f64(double [[X:%.*]], double [[Y:%.*]], double [[Z:%.*]])
+// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_fma_rn(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.fma.f64(double [[X]], double [[Y]], double [[Z]])
 // AMDGCNSPIRV-NEXT:    ret double [[TMP0]]
 //
 extern "C" __device__ double test_fma_rn(double x, double y, double z) {
   return __fma_rn(x, y, z);
 }
 
-// DEFAULT-LABEL: @test_fmaxf(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.maxnum.f32(float [[X:%.*]], float [[Y:%.*]])
+// DEFAULT-LABEL: define dso_local noundef float @test_fmaxf(
+// DEFAULT-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.maxnum.f32(float [[X]], float [[Y]])
 // DEFAULT-NEXT:    ret float [[TMP0]]
 //
-// FINITEONLY-LABEL: @test_fmaxf(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.maxnum.f32(float nofpclass(nan inf) [[X:%.*]], float nofpclass(nan inf) [[Y:%.*]])
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_fmaxf(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.maxnum.f32(float nofpclass(nan inf) [[X]], float nofpclass(nan inf) [[Y]])
 // FINITEONLY-NEXT:    ret float [[TMP0]]
 //
-// APPROX-LABEL: @test_fmaxf(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.maxnum.f32(float [[X:%.*]], float [[Y:%.*]])
+// APPROX-LABEL: define dso_local noundef float @test_fmaxf(
+// APPROX-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.maxnum.f32(float [[X]], float [[Y]])
 // APPROX-NEXT:    ret float [[TMP0]]
 //
-// NCRDIV-LABEL: @test_fmaxf(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.maxnum.f32(float [[X:%.*]], float [[Y:%.*]])
+// NCRDIV-LABEL: define dso_local noundef float @test_fmaxf(
+// NCRDIV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.maxnum.f32(float [[X]], float [[Y]])
 // NCRDIV-NEXT:    ret float [[TMP0]]
 //
-// AMDGCNSPIRV-LABEL: @test_fmaxf(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.maxnum.f32(float [[X:%.*]], float [[Y:%.*]])
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_fmaxf(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.maxnum.f32(float [[X]], float [[Y]])
 // AMDGCNSPIRV-NEXT:    ret float [[TMP0]]
 //
 extern "C" __device__ float test_fmaxf(float x, float y) {
   return fmaxf(x, y);
 }
 
-// DEFAULT-LABEL: @test_fmax(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.maxnum.f64(double [[X:%.*]], double [[Y:%.*]])
+// DEFAULT-LABEL: define dso_local noundef double @test_fmax(
+// DEFAULT-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.maxnum.f64(double [[X]], double [[Y]])
 // DEFAULT-NEXT:    ret double [[TMP0]]
 //
-// FINITEONLY-LABEL: @test_fmax(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef double @llvm.maxnum.f64(double nofpclass(nan inf) [[X:%.*]], double nofpclass(nan inf) [[Y:%.*]])
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_fmax(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef double @llvm.maxnum.f64(double nofpclass(nan inf) [[X]], double nofpclass(nan inf) [[Y]])
 // FINITEONLY-NEXT:    ret double [[TMP0]]
 //
-// APPROX-LABEL: @test_fmax(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.maxnum.f64(double [[X:%.*]], double [[Y:%.*]])
+// APPROX-LABEL: define dso_local noundef double @test_fmax(
+// APPROX-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.maxnum.f64(double [[X]], double [[Y]])
 // APPROX-NEXT:    ret double [[TMP0]]
 //
-// NCRDIV-LABEL: @test_fmax(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.maxnum.f64(double [[X:%.*]], double [[Y:%.*]])
+// NCRDIV-LABEL: define dso_local noundef double @test_fmax(
+// NCRDIV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.maxnum.f64(double [[X]], double [[Y]])
 // NCRDIV-NEXT:    ret double [[TMP0]]
 //
-// AMDGCNSPIRV-LABEL: @test_fmax(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.maxnum.f64(double [[X:%.*]], double [[Y:%.*]])
+// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_fmax(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.maxnum.f64(double [[X]], double [[Y]])
 // AMDGCNSPIRV-NEXT:    ret double [[TMP0]]
 //
 extern "C" __device__ double test_fmax(double x, double y) {
   return fmax(x, y);
 }
 
-// DEFAULT-LABEL: @test_fminf(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.minnum.f32(float [[X:%.*]], float [[Y:%.*]])
+// DEFAULT-LABEL: define dso_local noundef float @test_fminf(
+// DEFAULT-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.minnum.f32(float [[X]], float [[Y]])
 // DEFAULT-NEXT:    ret float [[TMP0]]
 //
-// FINITEONLY-LABEL: @test_fminf(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.minnum.f32(float nofpclass(nan inf) [[X:%.*]], float nofpclass(nan inf) [[Y:%.*]])
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_fminf(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.minnum.f32(float nofpclass(nan inf) [[X]], float nofpclass(nan inf) [[Y]])
 // FINITEONLY-NEXT:    ret float [[TMP0]]
 //
-// APPROX-LABEL: @test_fminf(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.minnum.f32(float [[X:%.*]], float [[Y:%.*]])
+// APPROX-LABEL: define dso_local noundef float @test_fminf(
+// APPROX-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.minnum.f32(float [[X]], float [[Y]])
 // APPROX-NEXT:    ret float [[TMP0]]
 //
-// NCRDIV-LABEL: @test_fminf(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.minnum.f32(float [[X:%.*]], float [[Y:%.*]])
+// NCRDIV-LABEL: define dso_local noundef float @test_fminf(
+// NCRDIV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.minnum.f32(float [[X]], float [[Y]])
 // NCRDIV-NEXT:    ret float [[TMP0]]
 //
-// AMDGCNSPIRV-LABEL: @test_fminf(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.minnum.f32(float [[X:%.*]], float [[Y:%.*]])
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_fminf(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.minnum.f32(float [[X]], float [[Y]])
 // AMDGCNSPIRV-NEXT:    ret float [[TMP0]]
 //
 extern "C" __device__ float test_fminf(float x, float y) {
   return fminf(x, y);
 }
 
-// DEFAULT-LABEL: @test_fmin(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.minnum.f64(double [[X:%.*]], double [[Y:%.*]])
+// DEFAULT-LABEL: define dso_local noundef double @test_fmin(
+// DEFAULT-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.minnum.f64(double [[X]], double [[Y]])
 // DEFAULT-NEXT:    ret double [[TMP0]]
 //
-// FINITEONLY-LABEL: @test_fmin(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef double @llvm.minnum.f64(double nofpclass(nan inf) [[X:%.*]], double nofpclass(nan inf) [[Y:%.*]])
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_fmin(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef double @llvm.minnum.f64(double nofpclass(nan inf) [[X]], double nofpclass(nan inf) [[Y]])
 // FINITEONLY-NEXT:    ret double [[TMP0]]
 //
-// APPROX-LABEL: @test_fmin(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.minnum.f64(double [[X:%.*]], double [[Y:%.*]])
+// APPROX-LABEL: define dso_local noundef double @test_fmin(
+// APPROX-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.minnum.f64(double [[X]], double [[Y]])
 // APPROX-NEXT:    ret double [[TMP0]]
 //
-// NCRDIV-LABEL: @test_fmin(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.minnum.f64(double [[X:%.*]], double [[Y:%.*]])
+// NCRDIV-LABEL: define dso_local noundef double @test_fmin(
+// NCRDIV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.minnum.f64(double [[X]], double [[Y]])
 // NCRDIV-NEXT:    ret double [[TMP0]]
 //
-// AMDGCNSPIRV-LABEL: @test_fmin(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.minnum.f64(double [[X:%.*]], double [[Y:%.*]])
+// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_fmin(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.minnum.f64(double [[X]], double [[Y]])
 // AMDGCNSPIRV-NEXT:    ret double [[TMP0]]
 //
 extern "C" __device__ double test_fmin(double x, double y) {
   return fmin(x, y);
 }
 
-// DEFAULT-LABEL: @test_fmodf(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_fmod_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR14]]
+// DEFAULT-LABEL: define dso_local noundef float @test_fmodf(
+// DEFAULT-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_fmod_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_fmodf(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_fmod_f32(float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR14]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_fmodf(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_fmod_f32(float noundef nofpclass(nan inf) [[X]], float noundef nofpclass(nan inf) [[Y]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
-// APPROX-LABEL: @test_fmodf(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_fmod_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR14]]
+// APPROX-LABEL: define dso_local noundef float @test_fmodf(
+// APPROX-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_fmod_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR14]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_fmodf(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_fmod_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR14]]
+// NCRDIV-LABEL: define dso_local noundef float @test_fmodf(
+// NCRDIV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_fmod_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_fmodf(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_fmod_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR12]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_fmodf(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR4]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_fmod_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR12]]
 // AMDGCNSPIRV-NEXT:    ret float [[CALL_I]]
 //
 extern "C" __device__ float test_fmodf(float x, float y) {
   return fmodf(x, y);
 }
 
-// DEFAULT-LABEL: @test_fmod(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_fmod_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR14]]
+// DEFAULT-LABEL: define dso_local noundef double @test_fmod(
+// DEFAULT-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_fmod_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_fmod(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_fmod_f64(double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR14]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_fmod(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_fmod_f64(double noundef nofpclass(nan inf) [[X]], double noundef nofpclass(nan inf) [[Y]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
-// APPROX-LABEL: @test_fmod(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_fmod_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR14]]
+// APPROX-LABEL: define dso_local noundef double @test_fmod(
+// APPROX-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_fmod_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR14]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_fmod(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_fmod_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR14]]
+// NCRDIV-LABEL: define dso_local noundef double @test_fmod(
+// NCRDIV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_fmod_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_fmod(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_fmod_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR12]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_fmod(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR4]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_fmod_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR12]]
 // AMDGCNSPIRV-NEXT:    ret double [[CALL_I]]
 //
 extern "C" __device__ double test_fmod(double x, double y) {
   return fmod(x, y);
 }
 
-// DEFAULT-LABEL: @test_frexpf(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call { float, i32 } @llvm.frexp.f32.i32(float [[X:%.*]])
+// DEFAULT-LABEL: define dso_local noundef float @test_frexpf(
+// DEFAULT-SAME: float noundef [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Y:%.*]]) local_unnamed_addr #[[ATTR7:[0-9]+]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call { float, i32 } @llvm.frexp.f32.i32(float [[X]])
 // DEFAULT-NEXT:    [[TMP1:%.*]] = extractvalue { float, i32 } [[TMP0]], 1
-// DEFAULT-NEXT:    store i32 [[TMP1]], ptr [[Y:%.*]], align 4, !tbaa [[TBAA12:![0-9]+]]
+// DEFAULT-NEXT:    store i32 [[TMP1]], ptr [[Y]], align 4, !tbaa [[INT_TBAA12:![0-9]+]]
 // DEFAULT-NEXT:    [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP0]], 0
 // DEFAULT-NEXT:    ret float [[TMP2]]
 //
-// FINITEONLY-LABEL: @test_frexpf(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call { float, i32 } @llvm.frexp.f32.i32(float nofpclass(nan inf) [[X:%.*]])
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_frexpf(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Y:%.*]]) local_unnamed_addr #[[ATTR7:[0-9]+]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call { float, i32 } @llvm.frexp.f32.i32(float nofpclass(nan inf) [[X]])
 // FINITEONLY-NEXT:    [[TMP1:%.*]] = extractvalue { float, i32 } [[TMP0]], 1
-// FINITEONLY-NEXT:    store i32 [[TMP1]], ptr [[Y:%.*]], align 4, !tbaa [[TBAA12:![0-9]+]]
+// FINITEONLY-NEXT:    store i32 [[TMP1]], ptr [[Y]], align 4, !tbaa [[INT_TBAA12:![0-9]+]]
 // FINITEONLY-NEXT:    [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP0]], 0
 // FINITEONLY-NEXT:    ret float [[TMP2]]
 //
-// APPROX-LABEL: @test_frexpf(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[TMP0:%.*]] = tail call { float, i32 } @llvm.frexp.f32.i32(float [[X:%.*]])
+// APPROX-LABEL: define dso_local noundef float @test_frexpf(
+// APPROX-SAME: float noundef [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Y:%.*]]) local_unnamed_addr #[[ATTR7:[0-9]+]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[TMP0:%.*]] = tail call { float, i32 } @llvm.frexp.f32.i32(float [[X]])
 // APPROX-NEXT:    [[TMP1:%.*]] = extractvalue { float, i32 } [[TMP0]], 1
-// APPROX-NEXT:    store i32 [[TMP1]], ptr [[Y:%.*]], align 4, !tbaa [[TBAA12:![0-9]+]]
+// APPROX-NEXT:    store i32 [[TMP1]], ptr [[Y]], align 4, !tbaa [[INT_TBAA12:![0-9]+]]
 // APPROX-NEXT:    [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP0]], 0
 // APPROX-NEXT:    ret float [[TMP2]]
 //
-// NCRDIV-LABEL: @test_frexpf(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call { float, i32 } @llvm.frexp.f32.i32(float [[X:%.*]])
+// NCRDIV-LABEL: define dso_local noundef float @test_frexpf(
+// NCRDIV-SAME: float noundef [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Y:%.*]]) local_unnamed_addr #[[ATTR7:[0-9]+]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call { float, i32 } @llvm.frexp.f32.i32(float [[X]])
 // NCRDIV-NEXT:    [[TMP1:%.*]] = extractvalue { float, i32 } [[TMP0]], 1
-// NCRDIV-NEXT:    store i32 [[TMP1]], ptr [[Y:%.*]], align 4, !tbaa [[TBAA13:![0-9]+]]
+// NCRDIV-NEXT:    store i32 [[TMP1]], ptr [[Y]], align 4, !tbaa [[INT_TBAA13:![0-9]+]]
 // NCRDIV-NEXT:    [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP0]], 0
 // NCRDIV-NEXT:    ret float [[TMP2]]
 //
-// AMDGCNSPIRV-LABEL: @test_frexpf(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call addrspace(4) { float, i32 } @llvm.frexp.f32.i32(float [[X:%.*]])
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_frexpf(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]], ptr addrspace(4) noundef writeonly captures(none) initializes((0, 4)) [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR7:[0-9]+]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call addrspace(4) { float, i32 } @llvm.frexp.f32.i32(float [[X]])
 // AMDGCNSPIRV-NEXT:    [[TMP1:%.*]] = extractvalue { float, i32 } [[TMP0]], 1
-// AMDGCNSPIRV-NEXT:    store i32 [[TMP1]], ptr addrspace(4) [[Y:%.*]], align 4, !tbaa [[TBAA13:![0-9]+]]
+// AMDGCNSPIRV-NEXT:    store i32 [[TMP1]], ptr addrspace(4) [[Y]], align 4, !tbaa [[INT_TBAA13:![0-9]+]]
 // AMDGCNSPIRV-NEXT:    [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP0]], 0
 // AMDGCNSPIRV-NEXT:    ret float [[TMP2]]
 //
@@ -2162,43 +2471,48 @@ extern "C" __device__ float test_frexpf(float x, int* y) {
   return frexpf(x, y);
 }
 
-// DEFAULT-LABEL: @test_frexp(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call { double, i32 } @llvm.frexp.f64.i32(double [[X:%.*]])
+// DEFAULT-LABEL: define dso_local noundef double @test_frexp(
+// DEFAULT-SAME: double noundef [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Y:%.*]]) local_unnamed_addr #[[ATTR7]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call { double, i32 } @llvm.frexp.f64.i32(double [[X]])
 // DEFAULT-NEXT:    [[TMP1:%.*]] = extractvalue { double, i32 } [[TMP0]], 1
-// DEFAULT-NEXT:    store i32 [[TMP1]], ptr [[Y:%.*]], align 4, !tbaa [[TBAA12]]
+// DEFAULT-NEXT:    store i32 [[TMP1]], ptr [[Y]], align 4, !tbaa [[INT_TBAA12]]
 // DEFAULT-NEXT:    [[TMP2:%.*]] = extractvalue { double, i32 } [[TMP0]], 0
 // DEFAULT-NEXT:    ret double [[TMP2]]
 //
-// FINITEONLY-LABEL: @test_frexp(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call { double, i32 } @llvm.frexp.f64.i32(double nofpclass(nan inf) [[X:%.*]])
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_frexp(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Y:%.*]]) local_unnamed_addr #[[ATTR7]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call { double, i32 } @llvm.frexp.f64.i32(double nofpclass(nan inf) [[X]])
 // FINITEONLY-NEXT:    [[TMP1:%.*]] = extractvalue { double, i32 } [[TMP0]], 1
-// FINITEONLY-NEXT:    store i32 [[TMP1]], ptr [[Y:%.*]], align 4, !tbaa [[TBAA12]]
+// FINITEONLY-NEXT:    store i32 [[TMP1]], ptr [[Y]], align 4, !tbaa [[INT_TBAA12]]
 // FINITEONLY-NEXT:    [[TMP2:%.*]] = extractvalue { double, i32 } [[TMP0]], 0
 // FINITEONLY-NEXT:    ret double [[TMP2]]
 //
-// APPROX-LABEL: @test_frexp(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[TMP0:%.*]] = tail call { double, i32 } @llvm.frexp.f64.i32(double [[X:%.*]])
+// APPROX-LABEL: define dso_local noundef double @test_frexp(
+// APPROX-SAME: double noundef [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Y:%.*]]) local_unnamed_addr #[[ATTR7]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[TMP0:%.*]] = tail call { double, i32 } @llvm.frexp.f64.i32(double [[X]])
 // APPROX-NEXT:    [[TMP1:%.*]] = extractvalue { double, i32 } [[TMP0]], 1
-// APPROX-NEXT:    store i32 [[TMP1]], ptr [[Y:%.*]], align 4, !tbaa [[TBAA12]]
+// APPROX-NEXT:    store i32 [[TMP1]], ptr [[Y]], align 4, !tbaa [[INT_TBAA12]]
 // APPROX-NEXT:    [[TMP2:%.*]] = extractvalue { double, i32 } [[TMP0]], 0
 // APPROX-NEXT:    ret double [[TMP2]]
 //
-// NCRDIV-LABEL: @test_frexp(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call { double, i32 } @llvm.frexp.f64.i32(double [[X:%.*]])
+// NCRDIV-LABEL: define dso_local noundef double @test_frexp(
+// NCRDIV-SAME: double noundef [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Y:%.*]]) local_unnamed_addr #[[ATTR7]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call { double, i32 } @llvm.frexp.f64.i32(double [[X]])
 // NCRDIV-NEXT:    [[TMP1:%.*]] = extractvalue { double, i32 } [[TMP0]], 1
-// NCRDIV-NEXT:    store i32 [[TMP1]], ptr [[Y:%.*]], align 4, !tbaa [[TBAA13]]
+// NCRDIV-NEXT:    store i32 [[TMP1]], ptr [[Y]], align 4, !tbaa [[INT_TBAA13]]
 // NCRDIV-NEXT:    [[TMP2:%.*]] = extractvalue { double, i32 } [[TMP0]], 0
 // NCRDIV-NEXT:    ret double [[TMP2]]
 //
-// AMDGCNSPIRV-LABEL: @test_frexp(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call addrspace(4) { double, i32 } @llvm.frexp.f64.i32(double [[X:%.*]])
+// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_frexp(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]], ptr addrspace(4) noundef writeonly captures(none) initializes((0, 4)) [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR7]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call addrspace(4) { double, i32 } @llvm.frexp.f64.i32(double [[X]])
 // AMDGCNSPIRV-NEXT:    [[TMP1:%.*]] = extractvalue { double, i32 } [[TMP0]], 1
-// AMDGCNSPIRV-NEXT:    store i32 [[TMP1]], ptr addrspace(4) [[Y:%.*]], align 4, !tbaa [[TBAA13]]
+// AMDGCNSPIRV-NEXT:    store i32 [[TMP1]], ptr addrspace(4) [[Y]], align 4, !tbaa [[INT_TBAA13]]
 // AMDGCNSPIRV-NEXT:    [[TMP2:%.*]] = extractvalue { double, i32 } [[TMP0]], 0
 // AMDGCNSPIRV-NEXT:    ret double [[TMP2]]
 //
@@ -2206,150 +2520,175 @@ extern "C" __device__ double test_frexp(double x, int* y) {
   return frexp(x, y);
 }
 
-// DEFAULT-LABEL: @test_hypotf(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_hypot_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR14]]
+// DEFAULT-LABEL: define dso_local noundef float @test_hypotf(
+// DEFAULT-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_hypot_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_hypotf(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_hypot_f32(float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR14]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_hypotf(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_hypot_f32(float noundef nofpclass(nan inf) [[X]], float noundef nofpclass(nan inf) [[Y]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
-// APPROX-LABEL: @test_hypotf(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_hypot_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR14]]
+// APPROX-LABEL: define dso_local noundef float @test_hypotf(
+// APPROX-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_hypot_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR14]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_hypotf(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_hypot_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR14]]
+// NCRDIV-LABEL: define dso_local noundef float @test_hypotf(
+// NCRDIV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_hypot_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_hypotf(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_hypot_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR12]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_hypotf(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR4]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_hypot_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR12]]
 // AMDGCNSPIRV-NEXT:    ret float [[CALL_I]]
 //
 extern "C" __device__ float test_hypotf(float x, float y) {
   return hypotf(x, y);
 }
 
-// DEFAULT-LABEL: @test_hypot(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_hypot_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR14]]
+// DEFAULT-LABEL: define dso_local noundef double @test_hypot(
+// DEFAULT-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_hypot_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_hypot(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_hypot_f64(double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR14]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_hypot(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_hypot_f64(double noundef nofpclass(nan inf) [[X]], double noundef nofpclass(nan inf) [[Y]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
-// APPROX-LABEL: @test_hypot(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_hypot_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR14]]
+// APPROX-LABEL: define dso_local noundef double @test_hypot(
+// APPROX-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_hypot_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR14]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_hypot(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_hypot_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR14]]
+// NCRDIV-LABEL: define dso_local noundef double @test_hypot(
+// NCRDIV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_hypot_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_hypot(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_hypot_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR12]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_hypot(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR4]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_hypot_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR12]]
 // AMDGCNSPIRV-NEXT:    ret double [[CALL_I]]
 //
 extern "C" __device__ double test_hypot(double x, double y) {
   return hypot(x, y);
 }
 
-// DEFAULT-LABEL: @test_ilogbf(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call noundef i32 @__ocml_ilogb_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// DEFAULT-LABEL: define dso_local noundef i32 @test_ilogbf(
+// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call noundef i32 @__ocml_ilogb_f32(float noundef [[X]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret i32 [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_ilogbf(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call noundef i32 @__ocml_ilogb_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
+// FINITEONLY-LABEL: define dso_local noundef i32 @test_ilogbf(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call noundef i32 @__ocml_ilogb_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret i32 [[CALL_I]]
 //
-// APPROX-LABEL: @test_ilogbf(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call noundef i32 @__ocml_ilogb_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// APPROX-LABEL: define dso_local noundef i32 @test_ilogbf(
+// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call noundef i32 @__ocml_ilogb_f32(float noundef [[X]]) #[[ATTR14]]
 // APPROX-NEXT:    ret i32 [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_ilogbf(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call noundef i32 @__ocml_ilogb_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// NCRDIV-LABEL: define dso_local noundef i32 @test_ilogbf(
+// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call noundef i32 @__ocml_ilogb_f32(float noundef [[X]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret i32 [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_ilogbf(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call spir_func noundef addrspace(4) i32 @__ocml_ilogb_f32(float noundef [[X:%.*]]) #[[ATTR12]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef i32 @test_ilogbf(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR4]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call spir_func noundef addrspace(4) i32 @__ocml_ilogb_f32(float noundef [[X]]) #[[ATTR12]]
 // AMDGCNSPIRV-NEXT:    ret i32 [[CALL_I]]
 //
 extern "C" __device__ int test_ilogbf(float x) {
   return ilogbf(x);
 }
 
-// DEFAULT-LABEL: @test_ilogb(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call noundef i32 @__ocml_ilogb_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// DEFAULT-LABEL: define dso_local noundef i32 @test_ilogb(
+// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call noundef i32 @__ocml_ilogb_f64(double noundef [[X]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret i32 [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_ilogb(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call noundef i32 @__ocml_ilogb_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
+// FINITEONLY-LABEL: define dso_local noundef i32 @test_ilogb(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call noundef i32 @__ocml_ilogb_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret i32 [[CALL_I]]
 //
-// APPROX-LABEL: @test_ilogb(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call noundef i32 @__ocml_ilogb_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// APPROX-LABEL: define dso_local noundef i32 @test_ilogb(
+// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call noundef i32 @__ocml_ilogb_f64(double noundef [[X]]) #[[ATTR14]]
 // APPROX-NEXT:    ret i32 [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_ilogb(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call noundef i32 @__ocml_ilogb_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// NCRDIV-LABEL: define dso_local noundef i32 @test_ilogb(
+// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call noundef i32 @__ocml_ilogb_f64(double noundef [[X]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret i32 [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_ilogb(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call spir_func noundef addrspace(4) i32 @__ocml_ilogb_f64(double noundef [[X:%.*]]) #[[ATTR12]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef i32 @test_ilogb(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR4]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call spir_func noundef addrspace(4) i32 @__ocml_ilogb_f64(double noundef [[X]]) #[[ATTR12]]
 // AMDGCNSPIRV-NEXT:    ret i32 [[CALL_I]]
 //
 extern "C" __device__ int test_ilogb(double x) {
   return ilogb(x);
 }
 
-// DEFAULT-LABEL: @test___finitef(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call float @llvm.fabs.f32(float [[X:%.*]])
+// DEFAULT-LABEL: define dso_local range(i32 0, 2) i32 @test___finitef(
+// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call float @llvm.fabs.f32(float [[X]])
 // DEFAULT-NEXT:    [[TMP1:%.*]] = fcmp one float [[TMP0]], 0x7FF0000000000000
 // DEFAULT-NEXT:    [[CONV:%.*]] = zext i1 [[TMP1]] to i32
 // DEFAULT-NEXT:    ret i32 [[CONV]]
 //
-// FINITEONLY-LABEL: @test___finitef(
-// FINITEONLY-NEXT:  entry:
+// FINITEONLY-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___finitef(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
 // FINITEONLY-NEXT:    ret i32 1
 //
-// APPROX-LABEL: @test___finitef(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[TMP0:%.*]] = tail call float @llvm.fabs.f32(float [[X:%.*]])
+// APPROX-LABEL: define dso_local range(i32 0, 2) i32 @test___finitef(
+// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[TMP0:%.*]] = tail call float @llvm.fabs.f32(float [[X]])
 // APPROX-NEXT:    [[TMP1:%.*]] = fcmp one float [[TMP0]], 0x7FF0000000000000
 // APPROX-NEXT:    [[CONV:%.*]] = zext i1 [[TMP1]] to i32
 // APPROX-NEXT:    ret i32 [[CONV]]
 //
-// NCRDIV-LABEL: @test___finitef(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call float @llvm.fabs.f32(float [[X:%.*]])
+// NCRDIV-LABEL: define dso_local range(i32 0, 2) i32 @test___finitef(
+// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call float @llvm.fabs.f32(float [[X]])
 // NCRDIV-NEXT:    [[TMP1:%.*]] = fcmp one float [[TMP0]], 0x7FF0000000000000
 // NCRDIV-NEXT:    [[CONV:%.*]] = zext i1 [[TMP1]] to i32
 // NCRDIV-NEXT:    ret i32 [[CONV]]
 //
-// AMDGCNSPIRV-LABEL: @test___finitef(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call addrspace(4) float @llvm.fabs.f32(float [[X:%.*]])
+// AMDGCNSPIRV-LABEL: define spir_func range(i32 0, 2) i32 @test___finitef(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call addrspace(4) float @llvm.fabs.f32(float [[X]])
 // AMDGCNSPIRV-NEXT:    [[TMP1:%.*]] = fcmp one float [[TMP0]], 0x7FF0000000000000
 // AMDGCNSPIRV-NEXT:    [[CONV:%.*]] = zext i1 [[TMP1]] to i32
 // AMDGCNSPIRV-NEXT:    ret i32 [[CONV]]
@@ -2358,34 +2697,39 @@ extern "C" __device__ BOOL_TYPE test___finitef(float x) {
   return __finitef(x);
 }
 
-// DEFAULT-LABEL: @test___finite(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call double @llvm.fabs.f64(double [[X:%.*]])
+// DEFAULT-LABEL: define dso_local range(i32 0, 2) i32 @test___finite(
+// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call double @llvm.fabs.f64(double [[X]])
 // DEFAULT-NEXT:    [[TMP1:%.*]] = fcmp one double [[TMP0]], 0x7FF0000000000000
 // DEFAULT-NEXT:    [[CONV:%.*]] = zext i1 [[TMP1]] to i32
 // DEFAULT-NEXT:    ret i32 [[CONV]]
 //
-// FINITEONLY-LABEL: @test___finite(
-// FINITEONLY-NEXT:  entry:
+// FINITEONLY-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___finite(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
 // FINITEONLY-NEXT:    ret i32 1
 //
-// APPROX-LABEL: @test___finite(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[TMP0:%.*]] = tail call double @llvm.fabs.f64(double [[X:%.*]])
+// APPROX-LABEL: define dso_local range(i32 0, 2) i32 @test___finite(
+// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[TMP0:%.*]] = tail call double @llvm.fabs.f64(double [[X]])
 // APPROX-NEXT:    [[TMP1:%.*]] = fcmp one double [[TMP0]], 0x7FF0000000000000
 // APPROX-NEXT:    [[CONV:%.*]] = zext i1 [[TMP1]] to i32
 // APPROX-NEXT:    ret i32 [[CONV]]
 //
-// NCRDIV-LABEL: @test___finite(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call double @llvm.fabs.f64(double [[X:%.*]])
+// NCRDIV-LABEL: define dso_local range(i32 0, 2) i32 @test___finite(
+// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call double @llvm.fabs.f64(double [[X]])
 // NCRDIV-NEXT:    [[TMP1:%.*]] = fcmp one double [[TMP0]], 0x7FF0000000000000
 // NCRDIV-NEXT:    [[CONV:%.*]] = zext i1 [[TMP1]] to i32
 // NCRDIV-NEXT:    ret i32 [[CONV]]
 //
-// AMDGCNSPIRV-LABEL: @test___finite(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call addrspace(4) double @llvm.fabs.f64(double [[X:%.*]])
+// AMDGCNSPIRV-LABEL: define spir_func range(i32 0, 2) i32 @test___finite(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call addrspace(4) double @llvm.fabs.f64(double [[X]])
 // AMDGCNSPIRV-NEXT:    [[TMP1:%.*]] = fcmp one double [[TMP0]], 0x7FF0000000000000
 // AMDGCNSPIRV-NEXT:    [[CONV:%.*]] = zext i1 [[TMP1]] to i32
 // AMDGCNSPIRV-NEXT:    ret i32 [[CONV]]
@@ -2394,34 +2738,39 @@ extern "C" __device__ BOOL_TYPE test___finite(double x) {
   return __finite(x);
 }
 
-// DEFAULT-LABEL: @test___isinff(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call float @llvm.fabs.f32(float [[X:%.*]])
+// DEFAULT-LABEL: define dso_local range(i32 0, 2) i32 @test___isinff(
+// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call float @llvm.fabs.f32(float [[X]])
 // DEFAULT-NEXT:    [[TMP1:%.*]] = fcmp oeq float [[TMP0]], 0x7FF0000000000000
 // DEFAULT-NEXT:    [[CONV:%.*]] = zext i1 [[TMP1]] to i32
 // DEFAULT-NEXT:    ret i32 [[CONV]]
 //
-// FINITEONLY-LABEL: @test___isinff(
-// FINITEONLY-NEXT:  entry:
+// FINITEONLY-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___isinff(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
 // FINITEONLY-NEXT:    ret i32 0
 //
-// APPROX-LABEL: @test___isinff(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[TMP0:%.*]] = tail call float @llvm.fabs.f32(float [[X:%.*]])
+// APPROX-LABEL: define dso_local range(i32 0, 2) i32 @test___isinff(
+// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[TMP0:%.*]] = tail call float @llvm.fabs.f32(float [[X]])
 // APPROX-NEXT:    [[TMP1:%.*]] = fcmp oeq float [[TMP0]], 0x7FF0000000000000
 // APPROX-NEXT:    [[CONV:%.*]] = zext i1 [[TMP1]] to i32
 // APPROX-NEXT:    ret i32 [[CONV]]
 //
-// NCRDIV-LABEL: @test___isinff(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call float @llvm.fabs.f32(float [[X:%.*]])
+// NCRDIV-LABEL: define dso_local range(i32 0, 2) i32 @test___isinff(
+// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call float @llvm.fabs.f32(float [[X]])
 // NCRDIV-NEXT:    [[TMP1:%.*]] = fcmp oeq float [[TMP0]], 0x7FF0000000000000
 // NCRDIV-NEXT:    [[CONV:%.*]] = zext i1 [[TMP1]] to i32
 // NCRDIV-NEXT:    ret i32 [[CONV]]
 //
-// AMDGCNSPIRV-LABEL: @test___isinff(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call addrspace(4) float @llvm.fabs.f32(float [[X:%.*]])
+// AMDGCNSPIRV-LABEL: define spir_func range(i32 0, 2) i32 @test___isinff(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call addrspace(4) float @llvm.fabs.f32(float [[X]])
 // AMDGCNSPIRV-NEXT:    [[TMP1:%.*]] = fcmp oeq float [[TMP0]], 0x7FF0000000000000
 // AMDGCNSPIRV-NEXT:    [[CONV:%.*]] = zext i1 [[TMP1]] to i32
 // AMDGCNSPIRV-NEXT:    ret i32 [[CONV]]
@@ -2430,34 +2779,39 @@ extern "C" __device__ BOOL_TYPE test___isinff(float x) {
   return __isinff(x);
 }
 
-// DEFAULT-LABEL: @test___isinf(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call double @llvm.fabs.f64(double [[X:%.*]])
+// DEFAULT-LABEL: define dso_local range(i32 0, 2) i32 @test___isinf(
+// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call double @llvm.fabs.f64(double [[X]])
 // DEFAULT-NEXT:    [[TMP1:%.*]] = fcmp oeq double [[TMP0]], 0x7FF0000000000000
 // DEFAULT-NEXT:    [[CONV:%.*]] = zext i1 [[TMP1]] to i32
 // DEFAULT-NEXT:    ret i32 [[CONV]]
 //
-// FINITEONLY-LABEL: @test___isinf(
-// FINITEONLY-NEXT:  entry:
+// FINITEONLY-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___isinf(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
 // FINITEONLY-NEXT:    ret i32 0
 //
-// APPROX-LABEL: @test___isinf(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[TMP0:%.*]] = tail call double @llvm.fabs.f64(double [[X:%.*]])
+// APPROX-LABEL: define dso_local range(i32 0, 2) i32 @test___isinf(
+// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[TMP0:%.*]] = tail call double @llvm.fabs.f64(double [[X]])
 // APPROX-NEXT:    [[TMP1:%.*]] = fcmp oeq double [[TMP0]], 0x7FF0000000000000
 // APPROX-NEXT:    [[CONV:%.*]] = zext i1 [[TMP1]] to i32
 // APPROX-NEXT:    ret i32 [[CONV]]
 //
-// NCRDIV-LABEL: @test___isinf(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call double @llvm.fabs.f64(double [[X:%.*]])
+// NCRDIV-LABEL: define dso_local range(i32 0, 2) i32 @test___isinf(
+// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call double @llvm.fabs.f64(double [[X]])
 // NCRDIV-NEXT:    [[TMP1:%.*]] = fcmp oeq double [[TMP0]], 0x7FF0000000000000
 // NCRDIV-NEXT:    [[CONV:%.*]] = zext i1 [[TMP1]] to i32
 // NCRDIV-NEXT:    ret i32 [[CONV]]
 //
-// AMDGCNSPIRV-LABEL: @test___isinf(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call addrspace(4) double @llvm.fabs.f64(double [[X:%.*]])
+// AMDGCNSPIRV-LABEL: define spir_func range(i32 0, 2) i32 @test___isinf(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call addrspace(4) double @llvm.fabs.f64(double [[X]])
 // AMDGCNSPIRV-NEXT:    [[TMP1:%.*]] = fcmp oeq double [[TMP0]], 0x7FF0000000000000
 // AMDGCNSPIRV-NEXT:    [[CONV:%.*]] = zext i1 [[TMP1]] to i32
 // AMDGCNSPIRV-NEXT:    ret i32 [[CONV]]
@@ -2466,31 +2820,36 @@ extern "C" __device__ BOOL_TYPE test___isinf(double x) {
   return __isinf(x);
 }
 
-// DEFAULT-LABEL: @test___isnanf(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[TMP0:%.*]] = fcmp uno float [[X:%.*]], 0.000000e+00
+// DEFAULT-LABEL: define dso_local range(i32 0, 2) i32 @test___isnanf(
+// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[TMP0:%.*]] = fcmp uno float [[X]], 0.000000e+00
 // DEFAULT-NEXT:    [[CONV:%.*]] = zext i1 [[TMP0]] to i32
 // DEFAULT-NEXT:    ret i32 [[CONV]]
 //
-// FINITEONLY-LABEL: @test___isnanf(
-// FINITEONLY-NEXT:  entry:
+// FINITEONLY-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___isnanf(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
 // FINITEONLY-NEXT:    ret i32 0
 //
-// APPROX-LABEL: @test___isnanf(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[TMP0:%.*]] = fcmp uno float [[X:%.*]], 0.000000e+00
+// APPROX-LABEL: define dso_local range(i32 0, 2) i32 @test___isnanf(
+// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[TMP0:%.*]] = fcmp uno float [[X]], 0.000000e+00
 // APPROX-NEXT:    [[CONV:%.*]] = zext i1 [[TMP0]] to i32
 // APPROX-NEXT:    ret i32 [[CONV]]
 //
-// NCRDIV-LABEL: @test___isnanf(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[TMP0:%.*]] = fcmp uno float [[X:%.*]], 0.000000e+00
+// NCRDIV-LABEL: define dso_local range(i32 0, 2) i32 @test___isnanf(
+// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[TMP0:%.*]] = fcmp uno float [[X]], 0.000000e+00
 // NCRDIV-NEXT:    [[CONV:%.*]] = zext i1 [[TMP0]] to i32
 // NCRDIV-NEXT:    ret i32 [[CONV]]
 //
-// AMDGCNSPIRV-LABEL: @test___isnanf(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = fcmp uno float [[X:%.*]], 0.000000e+00
+// AMDGCNSPIRV-LABEL: define spir_func range(i32 0, 2) i32 @test___isnanf(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = fcmp uno float [[X]], 0.000000e+00
 // AMDGCNSPIRV-NEXT:    [[CONV:%.*]] = zext i1 [[TMP0]] to i32
 // AMDGCNSPIRV-NEXT:    ret i32 [[CONV]]
 //
@@ -2498,31 +2857,36 @@ extern "C" __device__ BOOL_TYPE test___isnanf(float x) {
   return __isnanf(x);
 }
 
-// DEFAULT-LABEL: @test___isnan(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[TMP0:%.*]] = fcmp uno double [[X:%.*]], 0.000000e+00
+// DEFAULT-LABEL: define dso_local range(i32 0, 2) i32 @test___isnan(
+// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[TMP0:%.*]] = fcmp uno double [[X]], 0.000000e+00
 // DEFAULT-NEXT:    [[CONV:%.*]] = zext i1 [[TMP0]] to i32
 // DEFAULT-NEXT:    ret i32 [[CONV]]
 //
-// FINITEONLY-LABEL: @test___isnan(
-// FINITEONLY-NEXT:  entry:
+// FINITEONLY-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___isnan(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
 // FINITEONLY-NEXT:    ret i32 0
 //
-// APPROX-LABEL: @test___isnan(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[TMP0:%.*]] = fcmp uno double [[X:%.*]], 0.000000e+00
+// APPROX-LABEL: define dso_local range(i32 0, 2) i32 @test___isnan(
+// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[TMP0:%.*]] = fcmp uno double [[X]], 0.000000e+00
 // APPROX-NEXT:    [[CONV:%.*]] = zext i1 [[TMP0]] to i32
 // APPROX-NEXT:    ret i32 [[CONV]]
 //
-// NCRDIV-LABEL: @test___isnan(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[TMP0:%.*]] = fcmp uno double [[X:%.*]], 0.000000e+00
+// NCRDIV-LABEL: define dso_local range(i32 0, 2) i32 @test___isnan(
+// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[TMP0:%.*]] = fcmp uno double [[X]], 0.000000e+00
 // NCRDIV-NEXT:    [[CONV:%.*]] = zext i1 [[TMP0]] to i32
 // NCRDIV-NEXT:    ret i32 [[CONV]]
 //
-// AMDGCNSPIRV-LABEL: @test___isnan(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = fcmp uno double [[X:%.*]], 0.000000e+00
+// AMDGCNSPIRV-LABEL: define spir_func range(i32 0, 2) i32 @test___isnan(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = fcmp uno double [[X]], 0.000000e+00
 // AMDGCNSPIRV-NEXT:    [[CONV:%.*]] = zext i1 [[TMP0]] to i32
 // AMDGCNSPIRV-NEXT:    ret i32 [[CONV]]
 //
@@ -2530,143 +2894,164 @@ extern "C" __device__ BOOL_TYPE test___isnan(double x) {
   return __isnan(x);
 }
 
-// DEFAULT-LABEL: @test_j0f(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[X:%.*]]) #[[ATTR16]]
+// DEFAULT-LABEL: define dso_local noundef float @test_j0f(
+// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[X]]) #[[ATTR16]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_j0f(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_j0_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_j0f(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_j0_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
-// APPROX-LABEL: @test_j0f(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[X:%.*]]) #[[ATTR16]]
+// APPROX-LABEL: define dso_local noundef float @test_j0f(
+// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[X]]) #[[ATTR16]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_j0f(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[X:%.*]]) #[[ATTR16]]
+// NCRDIV-LABEL: define dso_local noundef float @test_j0f(
+// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[X]]) #[[ATTR16]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_j0f(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_j0_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_j0f(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR6]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_j0_f32(float noundef [[X]]) #[[ATTR14]]
 // AMDGCNSPIRV-NEXT:    ret float [[CALL_I]]
 //
 extern "C" __device__ float test_j0f(float x) {
   return j0f(x);
 }
 
-// DEFAULT-LABEL: @test_j0(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[X:%.*]]) #[[ATTR16]]
+// DEFAULT-LABEL: define dso_local noundef double @test_j0(
+// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[X]]) #[[ATTR16]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_j0(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_j0_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_j0(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_j0_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
-// APPROX-LABEL: @test_j0(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[X:%.*]]) #[[ATTR16]]
+// APPROX-LABEL: define dso_local noundef double @test_j0(
+// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[X]]) #[[ATTR16]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_j0(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[X:%.*]]) #[[ATTR16]]
+// NCRDIV-LABEL: define dso_local noundef double @test_j0(
+// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[X]]) #[[ATTR16]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_j0(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_j0_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_j0(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR6]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_j0_f64(double noundef [[X]]) #[[ATTR14]]
 // AMDGCNSPIRV-NEXT:    ret double [[CALL_I]]
 //
 extern "C" __device__ double test_j0(double x) {
   return j0(x);
 }
 
-// DEFAULT-LABEL: @test_j1f(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_j1_f32(float noundef [[X:%.*]]) #[[ATTR16]]
+// DEFAULT-LABEL: define dso_local noundef float @test_j1f(
+// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_j1_f32(float noundef [[X]]) #[[ATTR16]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_j1f(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_j1_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_j1f(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_j1_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
-// APPROX-LABEL: @test_j1f(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_j1_f32(float noundef [[X:%.*]]) #[[ATTR16]]
+// APPROX-LABEL: define dso_local noundef float @test_j1f(
+// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_j1_f32(float noundef [[X]]) #[[ATTR16]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_j1f(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_j1_f32(float noundef [[X:%.*]]) #[[ATTR16]]
+// NCRDIV-LABEL: define dso_local noundef float @test_j1f(
+// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_j1_f32(float noundef [[X]]) #[[ATTR16]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_j1f(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_j1_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_j1f(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR6]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_j1_f32(float noundef [[X]]) #[[ATTR14]]
 // AMDGCNSPIRV-NEXT:    ret float [[CALL_I]]
 //
 extern "C" __device__ float test_j1f(float x) {
   return j1f(x);
 }
 
-// DEFAULT-LABEL: @test_j1(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_j1_f64(double noundef [[X:%.*]]) #[[ATTR16]]
+// DEFAULT-LABEL: define dso_local noundef double @test_j1(
+// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_j1_f64(double noundef [[X]]) #[[ATTR16]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_j1(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_j1_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_j1(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_j1_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
-// APPROX-LABEL: @test_j1(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_j1_f64(double noundef [[X:%.*]]) #[[ATTR16]]
+// APPROX-LABEL: define dso_local noundef double @test_j1(
+// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_j1_f64(double noundef [[X]]) #[[ATTR16]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_j1(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_j1_f64(double noundef [[X:%.*]]) #[[ATTR16]]
+// NCRDIV-LABEL: define dso_local noundef double @test_j1(
+// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_j1_f64(double noundef [[X]]) #[[ATTR16]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_j1(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_j1_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_j1(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR6]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_j1_f64(double noundef [[X]]) #[[ATTR14]]
 // AMDGCNSPIRV-NEXT:    ret double [[CALL_I]]
 //
 extern "C" __device__ double test_j1(double x) {
   return j1(x);
 }
 
-// DEFAULT-LABEL: @test_jnf(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    switch i32 [[X:%.*]], label [[IF_END4_I:%.*]] [
-// DEFAULT-NEXT:      i32 0, label [[IF_THEN_I:%.*]]
-// DEFAULT-NEXT:      i32 1, label [[IF_THEN2_I:%.*]]
+// DEFAULT-LABEL: define dso_local float @test_jnf(
+// DEFAULT-SAME: i32 noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    switch i32 [[X]], label %[[IF_END4_I:.*]] [
+// DEFAULT-NEXT:      i32 0, label %[[IF_THEN_I:.*]]
+// DEFAULT-NEXT:      i32 1, label %[[IF_THEN2_I:.*]]
 // DEFAULT-NEXT:    ]
-// DEFAULT:       if.then.i:
-// DEFAULT-NEXT:    [[CALL_I20_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[Y:%.*]]) #[[ATTR16]]
-// DEFAULT-NEXT:    br label [[_ZL3JNFIF_EXIT:%.*]]
-// DEFAULT:       if.then2.i:
+// DEFAULT:       [[IF_THEN_I]]:
+// DEFAULT-NEXT:    [[CALL_I20_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[Y]]) #[[ATTR16]]
+// DEFAULT-NEXT:    br label %[[_ZL3JNFIF_EXIT:.*]]
+// DEFAULT:       [[IF_THEN2_I]]:
 // DEFAULT-NEXT:    [[CALL_I22_I:%.*]] = tail call contract noundef float @__ocml_j1_f32(float noundef [[Y]]) #[[ATTR16]]
-// DEFAULT-NEXT:    br label [[_ZL3JNFIF_EXIT]]
-// DEFAULT:       if.end4.i:
+// DEFAULT-NEXT:    br label %[[_ZL3JNFIF_EXIT]]
+// DEFAULT:       [[IF_END4_I]]:
 // DEFAULT-NEXT:    [[CALL_I_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[Y]]) #[[ATTR16]]
 // DEFAULT-NEXT:    [[CALL_I21_I:%.*]] = tail call contract noundef float @__ocml_j1_f32(float noundef [[Y]]) #[[ATTR16]]
 // DEFAULT-NEXT:    [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1
-// DEFAULT-NEXT:    br i1 [[CMP7_I1]], label [[FOR_BODY_I:%.*]], label [[_ZL3JNFIF_EXIT]]
-// DEFAULT:       for.body.i:
-// DEFAULT-NEXT:    [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], [[FOR_BODY_I]] ], [ 1, [[IF_END4_I]] ]
-// DEFAULT-NEXT:    [[__X1_0_I3:%.*]] = phi float [ [[SUB_I:%.*]], [[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ]
-// DEFAULT-NEXT:    [[__X0_0_I2:%.*]] = phi float [ [[__X1_0_I3]], [[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ]
+// DEFAULT-NEXT:    br i1 [[CMP7_I1]], label %[[FOR_BODY_I:.*]], label %[[_ZL3JNFIF_EXIT]]
+// DEFAULT:       [[FOR_BODY_I]]:
+// DEFAULT-NEXT:    [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], %[[FOR_BODY_I]] ], [ 1, %[[IF_END4_I]] ]
+// DEFAULT-NEXT:    [[__X1_0_I3:%.*]] = phi float [ [[SUB_I:%.*]], %[[FOR_BODY_I]] ], [ [[CALL_I21_I]], %[[IF_END4_I]] ]
+// DEFAULT-NEXT:    [[__X0_0_I2:%.*]] = phi float [ [[__X1_0_I3]], %[[FOR_BODY_I]] ], [ [[CALL_I_I]], %[[IF_END4_I]] ]
 // DEFAULT-NEXT:    [[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1
 // DEFAULT-NEXT:    [[CONV_I:%.*]] = uitofp nneg i32 [[MUL_I]] to float
 // DEFAULT-NEXT:    [[DIV_I:%.*]] = fdiv contract float [[CONV_I]], [[Y]]
@@ -2674,32 +3059,33 @@ extern "C" __device__ double test_j1(double x) {
 // DEFAULT-NEXT:    [[SUB_I]] = fsub contract float [[MUL8_I]], [[__X0_0_I2]]
 // DEFAULT-NEXT:    [[INC_I]] = add nuw nsw i32 [[__I_0_I4]], 1
 // DEFAULT-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_I]], [[X]]
-// DEFAULT-NEXT:    br i1 [[EXITCOND_NOT]], label [[_ZL3JNFIF_EXIT]], label [[FOR_BODY_I]], !llvm.loop [[LOOP14:![0-9]+]]
-// DEFAULT:       _ZL3jnfif.exit:
-// DEFAULT-NEXT:    [[RETVAL_0_I:%.*]] = phi float [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ]
+// DEFAULT-NEXT:    br i1 [[EXITCOND_NOT]], label %[[_ZL3JNFIF_EXIT]], label %[[FOR_BODY_I]], !llvm.loop [[LOOP14:![0-9]+]]
+// DEFAULT:       [[_ZL3JNFIF_EXIT]]:
+// DEFAULT-NEXT:    [[RETVAL_0_I:%.*]] = phi float [ [[CALL_I20_I]], %[[IF_THEN_I]] ], [ [[CALL_I22_I]], %[[IF_THEN2_I]] ], [ [[CALL_I21_I]], %[[IF_END4_I]] ], [ [[SUB_I]], %[[FOR_BODY_I]] ]
 // DEFAULT-NEXT:    ret float [[RETVAL_0_I]]
 //
-// FINITEONLY-LABEL: @test_jnf(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    switch i32 [[X:%.*]], label [[IF_END4_I:%.*]] [
-// FINITEONLY-NEXT:      i32 0, label [[IF_THEN_I:%.*]]
-// FINITEONLY-NEXT:      i32 1, label [[IF_THEN2_I:%.*]]
+// FINITEONLY-LABEL: define dso_local nofpclass(nan inf) float @test_jnf(
+// FINITEONLY-SAME: i32 noundef [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    switch i32 [[X]], label %[[IF_END4_I:.*]] [
+// FINITEONLY-NEXT:      i32 0, label %[[IF_THEN_I:.*]]
+// FINITEONLY-NEXT:      i32 1, label %[[IF_THEN2_I:.*]]
 // FINITEONLY-NEXT:    ]
-// FINITEONLY:       if.then.i:
-// FINITEONLY-NEXT:    [[CALL_I20_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_j0_f32(float noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR16]]
-// FINITEONLY-NEXT:    br label [[_ZL3JNFIF_EXIT:%.*]]
-// FINITEONLY:       if.then2.i:
+// FINITEONLY:       [[IF_THEN_I]]:
+// FINITEONLY-NEXT:    [[CALL_I20_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_j0_f32(float noundef nofpclass(nan inf) [[Y]]) #[[ATTR16]]
+// FINITEONLY-NEXT:    br label %[[_ZL3JNFIF_EXIT:.*]]
+// FINITEONLY:       [[IF_THEN2_I]]:
 // FINITEONLY-NEXT:    [[CALL_I22_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_j1_f32(float noundef nofpclass(nan inf) [[Y]]) #[[ATTR16]]
-// FINITEONLY-NEXT:    br label [[_ZL3JNFIF_EXIT]]
-// FINITEONLY:       if.end4.i:
+// FINITEONLY-NEXT:    br label %[[_ZL3JNFIF_EXIT]]
+// FINITEONLY:       [[IF_END4_I]]:
 // FINITEONLY-NEXT:    [[CALL_I_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_j0_f32(float noundef nofpclass(nan inf) [[Y]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    [[CALL_I21_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_j1_f32(float noundef nofpclass(nan inf) [[Y]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1
-// FINITEONLY-NEXT:    br i1 [[CMP7_I1]], label [[FOR_BODY_I:%.*]], label [[_ZL3JNFIF_EXIT]]
-// FINITEONLY:       for.body.i:
-// FINITEONLY-NEXT:    [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], [[FOR_BODY_I]] ], [ 1, [[IF_END4_I]] ]
-// FINITEONLY-NEXT:    [[__X1_0_I3:%.*]] = phi float [ [[SUB_I:%.*]], [[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ]
-// FINITEONLY-NEXT:    [[__X0_0_I2:%.*]] = phi float [ [[__X1_0_I3]], [[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ]
+// FINITEONLY-NEXT:    br i1 [[CMP7_I1]], label %[[FOR_BODY_I:.*]], label %[[_ZL3JNFIF_EXIT]]
+// FINITEONLY:       [[FOR_BODY_I]]:
+// FINITEONLY-NEXT:    [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], %[[FOR_BODY_I]] ], [ 1, %[[IF_END4_I]] ]
+// FINITEONLY-NEXT:    [[__X1_0_I3:%.*]] = phi float [ [[SUB_I:%.*]], %[[FOR_BODY_I]] ], [ [[CALL_I21_I]], %[[IF_END4_I]] ]
+// FINITEONLY-NEXT:    [[__X0_0_I2:%.*]] = phi float [ [[__X1_0_I3]], %[[FOR_BODY_I]] ], [ [[CALL_I_I]], %[[IF_END4_I]] ]
 // FINITEONLY-NEXT:    [[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1
 // FINITEONLY-NEXT:    [[CONV_I:%.*]] = uitofp nneg i32 [[MUL_I]] to float
 // FINITEONLY-NEXT:    [[DIV_I:%.*]] = fdiv nnan ninf contract float [[CONV_I]], [[Y]]
@@ -2707,32 +3093,33 @@ extern "C" __device__ double test_j1(double x) {
 // FINITEONLY-NEXT:    [[SUB_I]] = fsub nnan ninf contract float [[MUL8_I]], [[__X0_0_I2]]
 // FINITEONLY-NEXT:    [[INC_I]] = add nuw nsw i32 [[__I_0_I4]], 1
 // FINITEONLY-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_I]], [[X]]
-// FINITEONLY-NEXT:    br i1 [[EXITCOND_NOT]], label [[_ZL3JNFIF_EXIT]], label [[FOR_BODY_I]], !llvm.loop [[LOOP14:![0-9]+]]
-// FINITEONLY:       _ZL3jnfif.exit:
-// FINITEONLY-NEXT:    [[RETVAL_0_I:%.*]] = phi float [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ]
+// FINITEONLY-NEXT:    br i1 [[EXITCOND_NOT]], label %[[_ZL3JNFIF_EXIT]], label %[[FOR_BODY_I]], !llvm.loop [[LOOP14:![0-9]+]]
+// FINITEONLY:       [[_ZL3JNFIF_EXIT]]:
+// FINITEONLY-NEXT:    [[RETVAL_0_I:%.*]] = phi float [ [[CALL_I20_I]], %[[IF_THEN_I]] ], [ [[CALL_I22_I]], %[[IF_THEN2_I]] ], [ [[CALL_I21_I]], %[[IF_END4_I]] ], [ [[SUB_I]], %[[FOR_BODY_I]] ]
 // FINITEONLY-NEXT:    ret float [[RETVAL_0_I]]
 //
-// APPROX-LABEL: @test_jnf(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    switch i32 [[X:%.*]], label [[IF_END4_I:%.*]] [
-// APPROX-NEXT:      i32 0, label [[IF_THEN_I:%.*]]
-// APPROX-NEXT:      i32 1, label [[IF_THEN2_I:%.*]]
+// APPROX-LABEL: define dso_local float @test_jnf(
+// APPROX-SAME: i32 noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    switch i32 [[X]], label %[[IF_END4_I:.*]] [
+// APPROX-NEXT:      i32 0, label %[[IF_THEN_I:.*]]
+// APPROX-NEXT:      i32 1, label %[[IF_THEN2_I:.*]]
 // APPROX-NEXT:    ]
-// APPROX:       if.then.i:
-// APPROX-NEXT:    [[CALL_I20_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[Y:%.*]]) #[[ATTR16]]
-// APPROX-NEXT:    br label [[_ZL3JNFIF_EXIT:%.*]]
-// APPROX:       if.then2.i:
+// APPROX:       [[IF_THEN_I]]:
+// APPROX-NEXT:    [[CALL_I20_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[Y]]) #[[ATTR16]]
+// APPROX-NEXT:    br label %[[_ZL3JNFIF_EXIT:.*]]
+// APPROX:       [[IF_THEN2_I]]:
 // APPROX-NEXT:    [[CALL_I22_I:%.*]] = tail call contract noundef float @__ocml_j1_f32(float noundef [[Y]]) #[[ATTR16]]
-// APPROX-NEXT:    br label [[_ZL3JNFIF_EXIT]]
-// APPROX:       if.end4.i:
+// APPROX-NEXT:    br label %[[_ZL3JNFIF_EXIT]]
+// APPROX:       [[IF_END4_I]]:
 // APPROX-NEXT:    [[CALL_I_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[Y]]) #[[ATTR16]]
 // APPROX-NEXT:    [[CALL_I21_I:%.*]] = tail call contract noundef float @__ocml_j1_f32(float noundef [[Y]]) #[[ATTR16]]
 // APPROX-NEXT:    [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1
-// APPROX-NEXT:    br i1 [[CMP7_I1]], label [[FOR_BODY_I:%.*]], label [[_ZL3JNFIF_EXIT]]
-// APPROX:       for.body.i:
-// APPROX-NEXT:    [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], [[FOR_BODY_I]] ], [ 1, [[IF_END4_I]] ]
-// APPROX-NEXT:    [[__X1_0_I3:%.*]] = phi float [ [[SUB_I:%.*]], [[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ]
-// APPROX-NEXT:    [[__X0_0_I2:%.*]] = phi float [ [[__X1_0_I3]], [[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ]
+// APPROX-NEXT:    br i1 [[CMP7_I1]], label %[[FOR_BODY_I:.*]], label %[[_ZL3JNFIF_EXIT]]
+// APPROX:       [[FOR_BODY_I]]:
+// APPROX-NEXT:    [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], %[[FOR_BODY_I]] ], [ 1, %[[IF_END4_I]] ]
+// APPROX-NEXT:    [[__X1_0_I3:%.*]] = phi float [ [[SUB_I:%.*]], %[[FOR_BODY_I]] ], [ [[CALL_I21_I]], %[[IF_END4_I]] ]
+// APPROX-NEXT:    [[__X0_0_I2:%.*]] = phi float [ [[__X1_0_I3]], %[[FOR_BODY_I]] ], [ [[CALL_I_I]], %[[IF_END4_I]] ]
 // APPROX-NEXT:    [[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1
 // APPROX-NEXT:    [[CONV_I:%.*]] = uitofp nneg i32 [[MUL_I]] to float
 // APPROX-NEXT:    [[DIV_I:%.*]] = fdiv contract float [[CONV_I]], [[Y]]
@@ -2740,32 +3127,33 @@ extern "C" __device__ double test_j1(double x) {
 // APPROX-NEXT:    [[SUB_I]] = fsub contract float [[MUL8_I]], [[__X0_0_I2]]
 // APPROX-NEXT:    [[INC_I]] = add nuw nsw i32 [[__I_0_I4]], 1
 // APPROX-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_I]], [[X]]
-// APPROX-NEXT:    br i1 [[EXITCOND_NOT]], label [[_ZL3JNFIF_EXIT]], label [[FOR_BODY_I]], !llvm.loop [[LOOP14:![0-9]+]]
-// APPROX:       _ZL3jnfif.exit:
-// APPROX-NEXT:    [[RETVAL_0_I:%.*]] = phi float [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ]
+// APPROX-NEXT:    br i1 [[EXITCOND_NOT]], label %[[_ZL3JNFIF_EXIT]], label %[[FOR_BODY_I]], !llvm.loop [[LOOP14:![0-9]+]]
+// APPROX:       [[_ZL3JNFIF_EXIT]]:
+// APPROX-NEXT:    [[RETVAL_0_I:%.*]] = phi float [ [[CALL_I20_I]], %[[IF_THEN_I]] ], [ [[CALL_I22_I]], %[[IF_THEN2_I]] ], [ [[CALL_I21_I]], %[[IF_END4_I]] ], [ [[SUB_I]], %[[FOR_BODY_I]] ]
 // APPROX-NEXT:    ret float [[RETVAL_0_I]]
 //
-// NCRDIV-LABEL: @test_jnf(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    switch i32 [[X:%.*]], label [[IF_END4_I:%.*]] [
-// NCRDIV-NEXT:      i32 0, label [[IF_THEN_I:%.*]]
-// NCRDIV-NEXT:      i32 1, label [[IF_THEN2_I:%.*]]
+// NCRDIV-LABEL: define dso_local float @test_jnf(
+// NCRDIV-SAME: i32 noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    switch i32 [[X]], label %[[IF_END4_I:.*]] [
+// NCRDIV-NEXT:      i32 0, label %[[IF_THEN_I:.*]]
+// NCRDIV-NEXT:      i32 1, label %[[IF_THEN2_I:.*]]
 // NCRDIV-NEXT:    ]
-// NCRDIV:       if.then.i:
-// NCRDIV-NEXT:    [[CALL_I20_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[Y:%.*]]) #[[ATTR16]]
-// NCRDIV-NEXT:    br label [[_ZL3JNFIF_EXIT:%.*]]
-// NCRDIV:       if.then2.i:
+// NCRDIV:       [[IF_THEN_I]]:
+// NCRDIV-NEXT:    [[CALL_I20_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[Y]]) #[[ATTR16]]
+// NCRDIV-NEXT:    br label %[[_ZL3JNFIF_EXIT:.*]]
+// NCRDIV:       [[IF_THEN2_I]]:
 // NCRDIV-NEXT:    [[CALL_I22_I:%.*]] = tail call contract noundef float @__ocml_j1_f32(float noundef [[Y]]) #[[ATTR16]]
-// NCRDIV-NEXT:    br label [[_ZL3JNFIF_EXIT]]
-// NCRDIV:       if.end4.i:
+// NCRDIV-NEXT:    br label %[[_ZL3JNFIF_EXIT]]
+// NCRDIV:       [[IF_END4_I]]:
 // NCRDIV-NEXT:    [[CALL_I_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[Y]]) #[[ATTR16]]
 // NCRDIV-NEXT:    [[CALL_I21_I:%.*]] = tail call contract noundef float @__ocml_j1_f32(float noundef [[Y]]) #[[ATTR16]]
 // NCRDIV-NEXT:    [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1
-// NCRDIV-NEXT:    br i1 [[CMP7_I1]], label [[FOR_BODY_I:%.*]], label [[_ZL3JNFIF_EXIT]]
-// NCRDIV:       for.body.i:
-// NCRDIV-NEXT:    [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], [[FOR_BODY_I]] ], [ 1, [[IF_END4_I]] ]
-// NCRDIV-NEXT:    [[__X1_0_I3:%.*]] = phi float [ [[SUB_I:%.*]], [[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ]
-// NCRDIV-NEXT:    [[__X0_0_I2:%.*]] = phi float [ [[__X1_0_I3]], [[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ]
+// NCRDIV-NEXT:    br i1 [[CMP7_I1]], label %[[FOR_BODY_I:.*]], label %[[_ZL3JNFIF_EXIT]]
+// NCRDIV:       [[FOR_BODY_I]]:
+// NCRDIV-NEXT:    [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], %[[FOR_BODY_I]] ], [ 1, %[[IF_END4_I]] ]
+// NCRDIV-NEXT:    [[__X1_0_I3:%.*]] = phi float [ [[SUB_I:%.*]], %[[FOR_BODY_I]] ], [ [[CALL_I21_I]], %[[IF_END4_I]] ]
+// NCRDIV-NEXT:    [[__X0_0_I2:%.*]] = phi float [ [[__X1_0_I3]], %[[FOR_BODY_I]] ], [ [[CALL_I_I]], %[[IF_END4_I]] ]
 // NCRDIV-NEXT:    [[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1
 // NCRDIV-NEXT:    [[CONV_I:%.*]] = uitofp nneg i32 [[MUL_I]] to float
 // NCRDIV-NEXT:    [[DIV_I:%.*]] = fdiv contract float [[CONV_I]], [[Y]], !fpmath [[META12]]
@@ -2773,32 +3161,33 @@ extern "C" __device__ double test_j1(double x) {
 // NCRDIV-NEXT:    [[SUB_I]] = fsub contract float [[MUL8_I]], [[__X0_0_I2]]
 // NCRDIV-NEXT:    [[INC_I]] = add nuw nsw i32 [[__I_0_I4]], 1
 // NCRDIV-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_I]], [[X]]
-// NCRDIV-NEXT:    br i1 [[EXITCOND_NOT]], label [[_ZL3JNFIF_EXIT]], label [[FOR_BODY_I]], !llvm.loop [[LOOP15:![0-9]+]]
-// NCRDIV:       _ZL3jnfif.exit:
-// NCRDIV-NEXT:    [[RETVAL_0_I:%.*]] = phi float [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ]
+// NCRDIV-NEXT:    br i1 [[EXITCOND_NOT]], label %[[_ZL3JNFIF_EXIT]], label %[[FOR_BODY_I]], !llvm.loop [[LOOP15:![0-9]+]]
+// NCRDIV:       [[_ZL3JNFIF_EXIT]]:
+// NCRDIV-NEXT:    [[RETVAL_0_I:%.*]] = phi float [ [[CALL_I20_I]], %[[IF_THEN_I]] ], [ [[CALL_I22_I]], %[[IF_THEN2_I]] ], [ [[CALL_I21_I]], %[[IF_END4_I]] ], [ [[SUB_I]], %[[FOR_BODY_I]] ]
 // NCRDIV-NEXT:    ret float [[RETVAL_0_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_jnf(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    switch i32 [[X:%.*]], label [[IF_END4_I:%.*]] [
-// AMDGCNSPIRV-NEXT:      i32 0, label [[IF_THEN_I:%.*]]
-// AMDGCNSPIRV-NEXT:      i32 1, label [[IF_THEN2_I:%.*]]
+// AMDGCNSPIRV-LABEL: define spir_func float @test_jnf(
+// AMDGCNSPIRV-SAME: i32 noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR6]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    switch i32 [[X]], label %[[IF_END4_I:.*]] [
+// AMDGCNSPIRV-NEXT:      i32 0, label %[[IF_THEN_I:.*]]
+// AMDGCNSPIRV-NEXT:      i32 1, label %[[IF_THEN2_I:.*]]
 // AMDGCNSPIRV-NEXT:    ]
-// AMDGCNSPIRV:       if.then.i:
-// AMDGCNSPIRV-NEXT:    [[CALL_I20_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_j0_f32(float noundef [[Y:%.*]]) #[[ATTR14]]
-// AMDGCNSPIRV-NEXT:    br label [[_ZL3JNFIF_EXIT:%.*]]
-// AMDGCNSPIRV:       if.then2.i:
+// AMDGCNSPIRV:       [[IF_THEN_I]]:
+// AMDGCNSPIRV-NEXT:    [[CALL_I20_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_j0_f32(float noundef [[Y]]) #[[ATTR14]]
+// AMDGCNSPIRV-NEXT:    br label %[[_ZL3JNFIF_EXIT:.*]]
+// AMDGCNSPIRV:       [[IF_THEN2_I]]:
 // AMDGCNSPIRV-NEXT:    [[CALL_I22_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_j1_f32(float noundef [[Y]]) #[[ATTR14]]
-// AMDGCNSPIRV-NEXT:    br label [[_ZL3JNFIF_EXIT]]
-// AMDGCNSPIRV:       if.end4.i:
+// AMDGCNSPIRV-NEXT:    br label %[[_ZL3JNFIF_EXIT]]
+// AMDGCNSPIRV:       [[IF_END4_I]]:
 // AMDGCNSPIRV-NEXT:    [[CALL_I_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_j0_f32(float noundef [[Y]]) #[[ATTR14]]
 // AMDGCNSPIRV-NEXT:    [[CALL_I21_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_j1_f32(float noundef [[Y]]) #[[ATTR14]]
 // AMDGCNSPIRV-NEXT:    [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1
-// AMDGCNSPIRV-NEXT:    br i1 [[CMP7_I1]], label [[FOR_BODY_I:%.*]], label [[_ZL3JNFIF_EXIT]]
-// AMDGCNSPIRV:       for.body.i:
-// AMDGCNSPIRV-NEXT:    [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], [[FOR_BODY_I]] ], [ 1, [[IF_END4_I]] ]
-// AMDGCNSPIRV-NEXT:    [[__X1_0_I3:%.*]] = phi float [ [[SUB_I:%.*]], [[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ]
-// AMDGCNSPIRV-NEXT:    [[__X0_0_I2:%.*]] = phi float [ [[__X1_0_I3]], [[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ]
+// AMDGCNSPIRV-NEXT:    br i1 [[CMP7_I1]], label %[[FOR_BODY_I:.*]], label %[[_ZL3JNFIF_EXIT]]
+// AMDGCNSPIRV:       [[FOR_BODY_I]]:
+// AMDGCNSPIRV-NEXT:    [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], %[[FOR_BODY_I]] ], [ 1, %[[IF_END4_I]] ]
+// AMDGCNSPIRV-NEXT:    [[__X1_0_I3:%.*]] = phi float [ [[SUB_I:%.*]], %[[FOR_BODY_I]] ], [ [[CALL_I21_I]], %[[IF_END4_I]] ]
+// AMDGCNSPIRV-NEXT:    [[__X0_0_I2:%.*]] = phi float [ [[__X1_0_I3]], %[[FOR_BODY_I]] ], [ [[CALL_I_I]], %[[IF_END4_I]] ]
 // AMDGCNSPIRV-NEXT:    [[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1
 // AMDGCNSPIRV-NEXT:    [[CONV_I:%.*]] = uitofp nneg i32 [[MUL_I]] to float
 // AMDGCNSPIRV-NEXT:    [[DIV_I:%.*]] = fdiv contract float [[CONV_I]], [[Y]]
@@ -2806,36 +3195,37 @@ extern "C" __device__ double test_j1(double x) {
 // AMDGCNSPIRV-NEXT:    [[SUB_I]] = fsub contract float [[MUL8_I]], [[__X0_0_I2]]
 // AMDGCNSPIRV-NEXT:    [[INC_I]] = add nuw nsw i32 [[__I_0_I4]], 1
 // AMDGCNSPIRV-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_I]], [[X]]
-// AMDGCNSPIRV-NEXT:    br i1 [[EXITCOND_NOT]], label [[_ZL3JNFIF_EXIT]], label [[FOR_BODY_I]], !llvm.loop [[LOOP15:![0-9]+]]
-// AMDGCNSPIRV:       _ZL3jnfif.exit:
-// AMDGCNSPIRV-NEXT:    [[RETVAL_0_I:%.*]] = phi float [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ]
+// AMDGCNSPIRV-NEXT:    br i1 [[EXITCOND_NOT]], label %[[_ZL3JNFIF_EXIT]], label %[[FOR_BODY_I]], !llvm.loop [[LOOP15:![0-9]+]]
+// AMDGCNSPIRV:       [[_ZL3JNFIF_EXIT]]:
+// AMDGCNSPIRV-NEXT:    [[RETVAL_0_I:%.*]] = phi float [ [[CALL_I20_I]], %[[IF_THEN_I]] ], [ [[CALL_I22_I]], %[[IF_THEN2_I]] ], [ [[CALL_I21_I]], %[[IF_END4_I]] ], [ [[SUB_I]], %[[FOR_BODY_I]] ]
 // AMDGCNSPIRV-NEXT:    ret float [[RETVAL_0_I]]
 //
 extern "C" __device__ float test_jnf(int x, float y) {
   return jnf(x, y);
 }
 
-// DEFAULT-LABEL: @test_jn(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    switch i32 [[X:%.*]], label [[IF_END4_I:%.*]] [
-// DEFAULT-NEXT:      i32 0, label [[IF_THEN_I:%.*]]
-// DEFAULT-NEXT:      i32 1, label [[IF_THEN2_I:%.*]]
+// DEFAULT-LABEL: define dso_local double @test_jn(
+// DEFAULT-SAME: i32 noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    switch i32 [[X]], label %[[IF_END4_I:.*]] [
+// DEFAULT-NEXT:      i32 0, label %[[IF_THEN_I:.*]]
+// DEFAULT-NEXT:      i32 1, label %[[IF_THEN2_I:.*]]
 // DEFAULT-NEXT:    ]
-// DEFAULT:       if.then.i:
-// DEFAULT-NEXT:    [[CALL_I20_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[Y:%.*]]) #[[ATTR16]]
-// DEFAULT-NEXT:    br label [[_ZL2JNID_EXIT:%.*]]
-// DEFAULT:       if.then2.i:
+// DEFAULT:       [[IF_THEN_I]]:
+// DEFAULT-NEXT:    [[CALL_I20_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[Y]]) #[[ATTR16]]
+// DEFAULT-NEXT:    br label %[[_ZL2JNID_EXIT:.*]]
+// DEFAULT:       [[IF_THEN2_I]]:
 // DEFAULT-NEXT:    [[CALL_I22_I:%.*]] = tail call contract noundef double @__ocml_j1_f64(double noundef [[Y]]) #[[ATTR16]]
-// DEFAULT-NEXT:    br label [[_ZL2JNID_EXIT]]
-// DEFAULT:       if.end4.i:
+// DEFAULT-NEXT:    br label %[[_ZL2JNID_EXIT]]
+// DEFAULT:       [[IF_END4_I]]:
 // DEFAULT-NEXT:    [[CALL_I_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[Y]]) #[[ATTR16]]
 // DEFAULT-NEXT:    [[CALL_I21_I:%.*]] = tail call contract noundef double @__ocml_j1_f64(double noundef [[Y]]) #[[ATTR16]]
 // DEFAULT-NEXT:    [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1
-// DEFAULT-NEXT:    br i1 [[CMP7_I1]], label [[FOR_BODY_I:%.*]], label [[_ZL2JNID_EXIT]]
-// DEFAULT:       for.body.i:
-// DEFAULT-NEXT:    [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], [[FOR_BODY_I]] ], [ 1, [[IF_END4_I]] ]
-// DEFAULT-NEXT:    [[__X1_0_I3:%.*]] = phi double [ [[SUB_I:%.*]], [[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ]
-// DEFAULT-NEXT:    [[__X0_0_I2:%.*]] = phi double [ [[__X1_0_I3]], [[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ]
+// DEFAULT-NEXT:    br i1 [[CMP7_I1]], label %[[FOR_BODY_I:.*]], label %[[_ZL2JNID_EXIT]]
+// DEFAULT:       [[FOR_BODY_I]]:
+// DEFAULT-NEXT:    [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], %[[FOR_BODY_I]] ], [ 1, %[[IF_END4_I]] ]
+// DEFAULT-NEXT:    [[__X1_0_I3:%.*]] = phi double [ [[SUB_I:%.*]], %[[FOR_BODY_I]] ], [ [[CALL_I21_I]], %[[IF_END4_I]] ]
+// DEFAULT-NEXT:    [[__X0_0_I2:%.*]] = phi double [ [[__X1_0_I3]], %[[FOR_BODY_I]] ], [ [[CALL_I_I]], %[[IF_END4_I]] ]
 // DEFAULT-NEXT:    [[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1
 // DEFAULT-NEXT:    [[CONV_I:%.*]] = uitofp nneg i32 [[MUL_I]] to double
 // DEFAULT-NEXT:    [[DIV_I:%.*]] = fdiv contract double [[CONV_I]], [[Y]]
@@ -2843,32 +3233,33 @@ extern "C" __device__ float test_jnf(int x, float y) {
 // DEFAULT-NEXT:    [[SUB_I]] = fsub contract double [[MUL8_I]], [[__X0_0_I2]]
 // DEFAULT-NEXT:    [[INC_I]] = add nuw nsw i32 [[__I_0_I4]], 1
 // DEFAULT-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_I]], [[X]]
-// DEFAULT-NEXT:    br i1 [[EXITCOND_NOT]], label [[_ZL2JNID_EXIT]], label [[FOR_BODY_I]], !llvm.loop [[LOOP15:![0-9]+]]
-// DEFAULT:       _ZL2jnid.exit:
-// DEFAULT-NEXT:    [[RETVAL_0_I:%.*]] = phi double [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ]
+// DEFAULT-NEXT:    br i1 [[EXITCOND_NOT]], label %[[_ZL2JNID_EXIT]], label %[[FOR_BODY_I]], !llvm.loop [[LOOP15:![0-9]+]]
+// DEFAULT:       [[_ZL2JNID_EXIT]]:
+// DEFAULT-NEXT:    [[RETVAL_0_I:%.*]] = phi double [ [[CALL_I20_I]], %[[IF_THEN_I]] ], [ [[CALL_I22_I]], %[[IF_THEN2_I]] ], [ [[CALL_I21_I]], %[[IF_END4_I]] ], [ [[SUB_I]], %[[FOR_BODY_I]] ]
 // DEFAULT-NEXT:    ret double [[RETVAL_0_I]]
 //
-// FINITEONLY-LABEL: @test_jn(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    switch i32 [[X:%.*]], label [[IF_END4_I:%.*]] [
-// FINITEONLY-NEXT:      i32 0, label [[IF_THEN_I:%.*]]
-// FINITEONLY-NEXT:      i32 1, label [[IF_THEN2_I:%.*]]
+// FINITEONLY-LABEL: define dso_local nofpclass(nan inf) double @test_jn(
+// FINITEONLY-SAME: i32 noundef [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    switch i32 [[X]], label %[[IF_END4_I:.*]] [
+// FINITEONLY-NEXT:      i32 0, label %[[IF_THEN_I:.*]]
+// FINITEONLY-NEXT:      i32 1, label %[[IF_THEN2_I:.*]]
 // FINITEONLY-NEXT:    ]
-// FINITEONLY:       if.then.i:
-// FINITEONLY-NEXT:    [[CALL_I20_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_j0_f64(double noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR16]]
-// FINITEONLY-NEXT:    br label [[_ZL2JNID_EXIT:%.*]]
-// FINITEONLY:       if.then2.i:
+// FINITEONLY:       [[IF_THEN_I]]:
+// FINITEONLY-NEXT:    [[CALL_I20_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_j0_f64(double noundef nofpclass(nan inf) [[Y]]) #[[ATTR16]]
+// FINITEONLY-NEXT:    br label %[[_ZL2JNID_EXIT:.*]]
+// FINITEONLY:       [[IF_THEN2_I]]:
 // FINITEONLY-NEXT:    [[CALL_I22_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_j1_f64(double noundef nofpclass(nan inf) [[Y]]) #[[ATTR16]]
-// FINITEONLY-NEXT:    br label [[_ZL2JNID_EXIT]]
-// FINITEONLY:       if.end4.i:
+// FINITEONLY-NEXT:    br label %[[_ZL2JNID_EXIT]]
+// FINITEONLY:       [[IF_END4_I]]:
 // FINITEONLY-NEXT:    [[CALL_I_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_j0_f64(double noundef nofpclass(nan inf) [[Y]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    [[CALL_I21_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_j1_f64(double noundef nofpclass(nan inf) [[Y]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1
-// FINITEONLY-NEXT:    br i1 [[CMP7_I1]], label [[FOR_BODY_I:%.*]], label [[_ZL2JNID_EXIT]]
-// FINITEONLY:       for.body.i:
-// FINITEONLY-NEXT:    [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], [[FOR_BODY_I]] ], [ 1, [[IF_END4_I]] ]
-// FINITEONLY-NEXT:    [[__X1_0_I3:%.*]] = phi double [ [[SUB_I:%.*]], [[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ]
-// FINITEONLY-NEXT:    [[__X0_0_I2:%.*]] = phi double [ [[__X1_0_I3]], [[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ]
+// FINITEONLY-NEXT:    br i1 [[CMP7_I1]], label %[[FOR_BODY_I:.*]], label %[[_ZL2JNID_EXIT]]
+// FINITEONLY:       [[FOR_BODY_I]]:
+// FINITEONLY-NEXT:    [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], %[[FOR_BODY_I]] ], [ 1, %[[IF_END4_I]] ]
+// FINITEONLY-NEXT:    [[__X1_0_I3:%.*]] = phi double [ [[SUB_I:%.*]], %[[FOR_BODY_I]] ], [ [[CALL_I21_I]], %[[IF_END4_I]] ]
+// FINITEONLY-NEXT:    [[__X0_0_I2:%.*]] = phi double [ [[__X1_0_I3]], %[[FOR_BODY_I]] ], [ [[CALL_I_I]], %[[IF_END4_I]] ]
 // FINITEONLY-NEXT:    [[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1
 // FINITEONLY-NEXT:    [[CONV_I:%.*]] = uitofp nneg i32 [[MUL_I]] to double
 // FINITEONLY-NEXT:    [[DIV_I:%.*]] = fdiv nnan ninf contract double [[CONV_I]], [[Y]]
@@ -2876,32 +3267,33 @@ extern "C" __device__ float test_jnf(int x, float y) {
 // FINITEONLY-NEXT:    [[SUB_I]] = fsub nnan ninf contract double [[MUL8_I]], [[__X0_0_I2]]
 // FINITEONLY-NEXT:    [[INC_I]] = add nuw nsw i32 [[__I_0_I4]], 1
 // FINITEONLY-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_I]], [[X]]
-// FINITEONLY-NEXT:    br i1 [[EXITCOND_NOT]], label [[_ZL2JNID_EXIT]], label [[FOR_BODY_I]], !llvm.loop [[LOOP15:![0-9]+]]
-// FINITEONLY:       _ZL2jnid.exit:
-// FINITEONLY-NEXT:    [[RETVAL_0_I:%.*]] = phi double [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ]
+// FINITEONLY-NEXT:    br i1 [[EXITCOND_NOT]], label %[[_ZL2JNID_EXIT]], label %[[FOR_BODY_I]], !llvm.loop [[LOOP15:![0-9]+]]
+// FINITEONLY:       [[_ZL2JNID_EXIT]]:
+// FINITEONLY-NEXT:    [[RETVAL_0_I:%.*]] = phi double [ [[CALL_I20_I]], %[[IF_THEN_I]] ], [ [[CALL_I22_I]], %[[IF_THEN2_I]] ], [ [[CALL_I21_I]], %[[IF_END4_I]] ], [ [[SUB_I]], %[[FOR_BODY_I]] ]
 // FINITEONLY-NEXT:    ret double [[RETVAL_0_I]]
 //
-// APPROX-LABEL: @test_jn(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    switch i32 [[X:%.*]], label [[IF_END4_I:%.*]] [
-// APPROX-NEXT:      i32 0, label [[IF_THEN_I:%.*]]
-// APPROX-NEXT:      i32 1, label [[IF_THEN2_I:%.*]]
+// APPROX-LABEL: define dso_local double @test_jn(
+// APPROX-SAME: i32 noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    switch i32 [[X]], label %[[IF_END4_I:.*]] [
+// APPROX-NEXT:      i32 0, label %[[IF_THEN_I:.*]]
+// APPROX-NEXT:      i32 1, label %[[IF_THEN2_I:.*]]
 // APPROX-NEXT:    ]
-// APPROX:       if.then.i:
-// APPROX-NEXT:    [[CALL_I20_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[Y:%.*]]) #[[ATTR16]]
-// APPROX-NEXT:    br label [[_ZL2JNID_EXIT:%.*]]
-// APPROX:       if.then2.i:
+// APPROX:       [[IF_THEN_I]]:
+// APPROX-NEXT:    [[CALL_I20_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[Y]]) #[[ATTR16]]
+// APPROX-NEXT:    br label %[[_ZL2JNID_EXIT:.*]]
+// APPROX:       [[IF_THEN2_I]]:
 // APPROX-NEXT:    [[CALL_I22_I:%.*]] = tail call contract noundef double @__ocml_j1_f64(double noundef [[Y]]) #[[ATTR16]]
-// APPROX-NEXT:    br label [[_ZL2JNID_EXIT]]
-// APPROX:       if.end4.i:
+// APPROX-NEXT:    br label %[[_ZL2JNID_EXIT]]
+// APPROX:       [[IF_END4_I]]:
 // APPROX-NEXT:    [[CALL_I_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[Y]]) #[[ATTR16]]
 // APPROX-NEXT:    [[CALL_I21_I:%.*]] = tail call contract noundef double @__ocml_j1_f64(double noundef [[Y]]) #[[ATTR16]]
 // APPROX-NEXT:    [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1
-// APPROX-NEXT:    br i1 [[CMP7_I1]], label [[FOR_BODY_I:%.*]], label [[_ZL2JNID_EXIT]]
-// APPROX:       for.body.i:
-// APPROX-NEXT:    [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], [[FOR_BODY_I]] ], [ 1, [[IF_END4_I]] ]
-// APPROX-NEXT:    [[__X1_0_I3:%.*]] = phi double [ [[SUB_I:%.*]], [[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ]
-// APPROX-NEXT:    [[__X0_0_I2:%.*]] = phi double [ [[__X1_0_I3]], [[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ]
+// APPROX-NEXT:    br i1 [[CMP7_I1]], label %[[FOR_BODY_I:.*]], label %[[_ZL2JNID_EXIT]]
+// APPROX:       [[FOR_BODY_I]]:
+// APPROX-NEXT:    [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], %[[FOR_BODY_I]] ], [ 1, %[[IF_END4_I]] ]
+// APPROX-NEXT:    [[__X1_0_I3:%.*]] = phi double [ [[SUB_I:%.*]], %[[FOR_BODY_I]] ], [ [[CALL_I21_I]], %[[IF_END4_I]] ]
+// APPROX-NEXT:    [[__X0_0_I2:%.*]] = phi double [ [[__X1_0_I3]], %[[FOR_BODY_I]] ], [ [[CALL_I_I]], %[[IF_END4_I]] ]
 // APPROX-NEXT:    [[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1
 // APPROX-NEXT:    [[CONV_I:%.*]] = uitofp nneg i32 [[MUL_I]] to double
 // APPROX-NEXT:    [[DIV_I:%.*]] = fdiv contract double [[CONV_I]], [[Y]]
@@ -2909,32 +3301,33 @@ extern "C" __device__ float test_jnf(int x, float y) {
 // APPROX-NEXT:    [[SUB_I]] = fsub contract double [[MUL8_I]], [[__X0_0_I2]]
 // APPROX-NEXT:    [[INC_I]] = add nuw nsw i32 [[__I_0_I4]], 1
 // APPROX-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_I]], [[X]]
-// APPROX-NEXT:    br i1 [[EXITCOND_NOT]], label [[_ZL2JNID_EXIT]], label [[FOR_BODY_I]], !llvm.loop [[LOOP15:![0-9]+]]
-// APPROX:       _ZL2jnid.exit:
-// APPROX-NEXT:    [[RETVAL_0_I:%.*]] = phi double [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ]
+// APPROX-NEXT:    br i1 [[EXITCOND_NOT]], label %[[_ZL2JNID_EXIT]], label %[[FOR_BODY_I]], !llvm.loop [[LOOP15:![0-9]+]]
+// APPROX:       [[_ZL2JNID_EXIT]]:
+// APPROX-NEXT:    [[RETVAL_0_I:%.*]] = phi double [ [[CALL_I20_I]], %[[IF_THEN_I]] ], [ [[CALL_I22_I]], %[[IF_THEN2_I]] ], [ [[CALL_I21_I]], %[[IF_END4_I]] ], [ [[SUB_I]], %[[FOR_BODY_I]] ]
 // APPROX-NEXT:    ret double [[RETVAL_0_I]]
 //
-// NCRDIV-LABEL: @test_jn(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    switch i32 [[X:%.*]], label [[IF_END4_I:%.*]] [
-// NCRDIV-NEXT:      i32 0, label [[IF_THEN_I:%.*]]
-// NCRDIV-NEXT:      i32 1, label [[IF_THEN2_I:%.*]]
+// NCRDIV-LABEL: define dso_local double @test_jn(
+// NCRDIV-SAME: i32 noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    switch i32 [[X]], label %[[IF_END4_I:.*]] [
+// NCRDIV-NEXT:      i32 0, label %[[IF_THEN_I:.*]]
+// NCRDIV-NEXT:      i32 1, label %[[IF_THEN2_I:.*]]
 // NCRDIV-NEXT:    ]
-// NCRDIV:       if.then.i:
-// NCRDIV-NEXT:    [[CALL_I20_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[Y:%.*]]) #[[ATTR16]]
-// NCRDIV-NEXT:    br label [[_ZL2JNID_EXIT:%.*]]
-// NCRDIV:       if.then2.i:
+// NCRDIV:       [[IF_THEN_I]]:
+// NCRDIV-NEXT:    [[CALL_I20_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[Y]]) #[[ATTR16]]
+// NCRDIV-NEXT:    br label %[[_ZL2JNID_EXIT:.*]]
+// NCRDIV:       [[IF_THEN2_I]]:
 // NCRDIV-NEXT:    [[CALL_I22_I:%.*]] = tail call contract noundef double @__ocml_j1_f64(double noundef [[Y]]) #[[ATTR16]]
-// NCRDIV-NEXT:    br label [[_ZL2JNID_EXIT]]
-// NCRDIV:       if.end4.i:
+// NCRDIV-NEXT:    br label %[[_ZL2JNID_EXIT]]
+// NCRDIV:       [[IF_END4_I]]:
 // NCRDIV-NEXT:    [[CALL_I_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[Y]]) #[[ATTR16]]
 // NCRDIV-NEXT:    [[CALL_I21_I:%.*]] = tail call contract noundef double @__ocml_j1_f64(double noundef [[Y]]) #[[ATTR16]]
 // NCRDIV-NEXT:    [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1
-// NCRDIV-NEXT:    br i1 [[CMP7_I1]], label [[FOR_BODY_I:%.*]], label [[_ZL2JNID_EXIT]]
-// NCRDIV:       for.body.i:
-// NCRDIV-NEXT:    [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], [[FOR_BODY_I]] ], [ 1, [[IF_END4_I]] ]
-// NCRDIV-NEXT:    [[__X1_0_I3:%.*]] = phi double [ [[SUB_I:%.*]], [[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ]
-// NCRDIV-NEXT:    [[__X0_0_I2:%.*]] = phi double [ [[__X1_0_I3]], [[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ]
+// NCRDIV-NEXT:    br i1 [[CMP7_I1]], label %[[FOR_BODY_I:.*]], label %[[_ZL2JNID_EXIT]]
+// NCRDIV:       [[FOR_BODY_I]]:
+// NCRDIV-NEXT:    [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], %[[FOR_BODY_I]] ], [ 1, %[[IF_END4_I]] ]
+// NCRDIV-NEXT:    [[__X1_0_I3:%.*]] = phi double [ [[SUB_I:%.*]], %[[FOR_BODY_I]] ], [ [[CALL_I21_I]], %[[IF_END4_I]] ]
+// NCRDIV-NEXT:    [[__X0_0_I2:%.*]] = phi double [ [[__X1_0_I3]], %[[FOR_BODY_I]] ], [ [[CALL_I_I]], %[[IF_END4_I]] ]
 // NCRDIV-NEXT:    [[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1
 // NCRDIV-NEXT:    [[CONV_I:%.*]] = uitofp nneg i32 [[MUL_I]] to double
 // NCRDIV-NEXT:    [[DIV_I:%.*]] = fdiv contract double [[CONV_I]], [[Y]]
@@ -2942,32 +3335,33 @@ extern "C" __device__ float test_jnf(int x, float y) {
 // NCRDIV-NEXT:    [[SUB_I]] = fsub contract double [[MUL8_I]], [[__X0_0_I2]]
 // NCRDIV-NEXT:    [[INC_I]] = add nuw nsw i32 [[__I_0_I4]], 1
 // NCRDIV-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_I]], [[X]]
-// NCRDIV-NEXT:    br i1 [[EXITCOND_NOT]], label [[_ZL2JNID_EXIT]], label [[FOR_BODY_I]], !llvm.loop [[LOOP16:![0-9]+]]
-// NCRDIV:       _ZL2jnid.exit:
-// NCRDIV-NEXT:    [[RETVAL_0_I:%.*]] = phi double [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ]
+// NCRDIV-NEXT:    br i1 [[EXITCOND_NOT]], label %[[_ZL2JNID_EXIT]], label %[[FOR_BODY_I]], !llvm.loop [[LOOP16:![0-9]+]]
+// NCRDIV:       [[_ZL2JNID_EXIT]]:
+// NCRDIV-NEXT:    [[RETVAL_0_I:%.*]] = phi double [ [[CALL_I20_I]], %[[IF_THEN_I]] ], [ [[CALL_I22_I]], %[[IF_THEN2_I]] ], [ [[CALL_I21_I]], %[[IF_END4_I]] ], [ [[SUB_I]], %[[FOR_BODY_I]] ]
 // NCRDIV-NEXT:    ret double [[RETVAL_0_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_jn(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    switch i32 [[X:%.*]], label [[IF_END4_I:%.*]] [
-// AMDGCNSPIRV-NEXT:      i32 0, label [[IF_THEN_I:%.*]]
-// AMDGCNSPIRV-NEXT:      i32 1, label [[IF_THEN2_I:%.*]]
+// AMDGCNSPIRV-LABEL: define spir_func double @test_jn(
+// AMDGCNSPIRV-SAME: i32 noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR6]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    switch i32 [[X]], label %[[IF_END4_I:.*]] [
+// AMDGCNSPIRV-NEXT:      i32 0, label %[[IF_THEN_I:.*]]
+// AMDGCNSPIRV-NEXT:      i32 1, label %[[IF_THEN2_I:.*]]
 // AMDGCNSPIRV-NEXT:    ]
-// AMDGCNSPIRV:       if.then.i:
-// AMDGCNSPIRV-NEXT:    [[CALL_I20_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_j0_f64(double noundef [[Y:%.*]]) #[[ATTR14]]
-// AMDGCNSPIRV-NEXT:    br label [[_ZL2JNID_EXIT:%.*]]
-// AMDGCNSPIRV:       if.then2.i:
+// AMDGCNSPIRV:       [[IF_THEN_I]]:
+// AMDGCNSPIRV-NEXT:    [[CALL_I20_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_j0_f64(double noundef [[Y]]) #[[ATTR14]]
+// AMDGCNSPIRV-NEXT:    br label %[[_ZL2JNID_EXIT:.*]]
+// AMDGCNSPIRV:       [[IF_THEN2_I]]:
 // AMDGCNSPIRV-NEXT:    [[CALL_I22_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_j1_f64(double noundef [[Y]]) #[[ATTR14]]
-// AMDGCNSPIRV-NEXT:    br label [[_ZL2JNID_EXIT]]
-// AMDGCNSPIRV:       if.end4.i:
+// AMDGCNSPIRV-NEXT:    br label %[[_ZL2JNID_EXIT]]
+// AMDGCNSPIRV:       [[IF_END4_I]]:
 // AMDGCNSPIRV-NEXT:    [[CALL_I_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_j0_f64(double noundef [[Y]]) #[[ATTR14]]
 // AMDGCNSPIRV-NEXT:    [[CALL_I21_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_j1_f64(double noundef [[Y]]) #[[ATTR14]]
 // AMDGCNSPIRV-NEXT:    [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1
-// AMDGCNSPIRV-NEXT:    br i1 [[CMP7_I1]], label [[FOR_BODY_I:%.*]], label [[_ZL2JNID_EXIT]]
-// AMDGCNSPIRV:       for.body.i:
-// AMDGCNSPIRV-NEXT:    [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], [[FOR_BODY_I]] ], [ 1, [[IF_END4_I]] ]
-// AMDGCNSPIRV-NEXT:    [[__X1_0_I3:%.*]] = phi double [ [[SUB_I:%.*]], [[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ]
-// AMDGCNSPIRV-NEXT:    [[__X0_0_I2:%.*]] = phi double [ [[__X1_0_I3]], [[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ]
+// AMDGCNSPIRV-NEXT:    br i1 [[CMP7_I1]], label %[[FOR_BODY_I:.*]], label %[[_ZL2JNID_EXIT]]
+// AMDGCNSPIRV:       [[FOR_BODY_I]]:
+// AMDGCNSPIRV-NEXT:    [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], %[[FOR_BODY_I]] ], [ 1, %[[IF_END4_I]] ]
+// AMDGCNSPIRV-NEXT:    [[__X1_0_I3:%.*]] = phi double [ [[SUB_I:%.*]], %[[FOR_BODY_I]] ], [ [[CALL_I21_I]], %[[IF_END4_I]] ]
+// AMDGCNSPIRV-NEXT:    [[__X0_0_I2:%.*]] = phi double [ [[__X1_0_I3]], %[[FOR_BODY_I]] ], [ [[CALL_I_I]], %[[IF_END4_I]] ]
 // AMDGCNSPIRV-NEXT:    [[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1
 // AMDGCNSPIRV-NEXT:    [[CONV_I:%.*]] = uitofp nneg i32 [[MUL_I]] to double
 // AMDGCNSPIRV-NEXT:    [[DIV_I:%.*]] = fdiv contract double [[CONV_I]], [[Y]]
@@ -2975,158 +3369,183 @@ extern "C" __device__ float test_jnf(int x, float y) {
 // AMDGCNSPIRV-NEXT:    [[SUB_I]] = fsub contract double [[MUL8_I]], [[__X0_0_I2]]
 // AMDGCNSPIRV-NEXT:    [[INC_I]] = add nuw nsw i32 [[__I_0_I4]], 1
 // AMDGCNSPIRV-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_I]], [[X]]
-// AMDGCNSPIRV-NEXT:    br i1 [[EXITCOND_NOT]], label [[_ZL2JNID_EXIT]], label [[FOR_BODY_I]], !llvm.loop [[LOOP16:![0-9]+]]
-// AMDGCNSPIRV:       _ZL2jnid.exit:
-// AMDGCNSPIRV-NEXT:    [[RETVAL_0_I:%.*]] = phi double [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ]
+// AMDGCNSPIRV-NEXT:    br i1 [[EXITCOND_NOT]], label %[[_ZL2JNID_EXIT]], label %[[FOR_BODY_I]], !llvm.loop [[LOOP16:![0-9]+]]
+// AMDGCNSPIRV:       [[_ZL2JNID_EXIT]]:
+// AMDGCNSPIRV-NEXT:    [[RETVAL_0_I:%.*]] = phi double [ [[CALL_I20_I]], %[[IF_THEN_I]] ], [ [[CALL_I22_I]], %[[IF_THEN2_I]] ], [ [[CALL_I21_I]], %[[IF_END4_I]] ], [ [[SUB_I]], %[[FOR_BODY_I]] ]
 // AMDGCNSPIRV-NEXT:    ret double [[RETVAL_0_I]]
 //
 extern "C" __device__ double test_jn(int x, double y) {
   return jn(x, y);
 }
 
-// DEFAULT-LABEL: @test_ldexpf(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.ldexp.f32.i32(float [[X:%.*]], i32 [[Y:%.*]])
+// DEFAULT-LABEL: define dso_local noundef float @test_ldexpf(
+// DEFAULT-SAME: float noundef [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.ldexp.f32.i32(float [[X]], i32 [[Y]])
 // DEFAULT-NEXT:    ret float [[TMP0]]
 //
-// FINITEONLY-LABEL: @test_ldexpf(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.ldexp.f32.i32(float nofpclass(nan inf) [[X:%.*]], i32 [[Y:%.*]])
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_ldexpf(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.ldexp.f32.i32(float nofpclass(nan inf) [[X]], i32 [[Y]])
 // FINITEONLY-NEXT:    ret float [[TMP0]]
 //
-// APPROX-LABEL: @test_ldexpf(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.ldexp.f32.i32(float [[X:%.*]], i32 [[Y:%.*]])
+// APPROX-LABEL: define dso_local noundef float @test_ldexpf(
+// APPROX-SAME: float noundef [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.ldexp.f32.i32(float [[X]], i32 [[Y]])
 // APPROX-NEXT:    ret float [[TMP0]]
 //
-// NCRDIV-LABEL: @test_ldexpf(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.ldexp.f32.i32(float [[X:%.*]], i32 [[Y:%.*]])
+// NCRDIV-LABEL: define dso_local noundef float @test_ldexpf(
+// NCRDIV-SAME: float noundef [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.ldexp.f32.i32(float [[X]], i32 [[Y]])
 // NCRDIV-NEXT:    ret float [[TMP0]]
 //
-// AMDGCNSPIRV-LABEL: @test_ldexpf(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.ldexp.f32.i32(float [[X:%.*]], i32 [[Y:%.*]])
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_ldexpf(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.ldexp.f32.i32(float [[X]], i32 [[Y]])
 // AMDGCNSPIRV-NEXT:    ret float [[TMP0]]
 //
 extern "C" __device__ float test_ldexpf(float x, int y) {
   return ldexpf(x, y);
 }
 
-// DEFAULT-LABEL: @test_ldexp(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.ldexp.f64.i32(double [[X:%.*]], i32 [[Y:%.*]])
+// DEFAULT-LABEL: define dso_local noundef double @test_ldexp(
+// DEFAULT-SAME: double noundef [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.ldexp.f64.i32(double [[X]], i32 [[Y]])
 // DEFAULT-NEXT:    ret double [[TMP0]]
 //
-// FINITEONLY-LABEL: @test_ldexp(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef double @llvm.ldexp.f64.i32(double nofpclass(nan inf) [[X:%.*]], i32 [[Y:%.*]])
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_ldexp(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef double @llvm.ldexp.f64.i32(double nofpclass(nan inf) [[X]], i32 [[Y]])
 // FINITEONLY-NEXT:    ret double [[TMP0]]
 //
-// APPROX-LABEL: @test_ldexp(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.ldexp.f64.i32(double [[X:%.*]], i32 [[Y:%.*]])
+// APPROX-LABEL: define dso_local noundef double @test_ldexp(
+// APPROX-SAME: double noundef [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.ldexp.f64.i32(double [[X]], i32 [[Y]])
 // APPROX-NEXT:    ret double [[TMP0]]
 //
-// NCRDIV-LABEL: @test_ldexp(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.ldexp.f64.i32(double [[X:%.*]], i32 [[Y:%.*]])
+// NCRDIV-LABEL: define dso_local noundef double @test_ldexp(
+// NCRDIV-SAME: double noundef [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.ldexp.f64.i32(double [[X]], i32 [[Y]])
 // NCRDIV-NEXT:    ret double [[TMP0]]
 //
-// AMDGCNSPIRV-LABEL: @test_ldexp(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.ldexp.f64.i32(double [[X:%.*]], i32 [[Y:%.*]])
+// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_ldexp(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.ldexp.f64.i32(double [[X]], i32 [[Y]])
 // AMDGCNSPIRV-NEXT:    ret double [[TMP0]]
 //
 extern "C" __device__ double test_ldexp(double x, int y) {
   return ldexp(x, y);
 }
 
-// DEFAULT-LABEL: @test_lgammaf(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_lgamma_f32(float noundef [[X:%.*]]) #[[ATTR16]]
+// DEFAULT-LABEL: define dso_local noundef float @test_lgammaf(
+// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_lgamma_f32(float noundef [[X]]) #[[ATTR16]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_lgammaf(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_lgamma_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_lgammaf(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_lgamma_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
-// APPROX-LABEL: @test_lgammaf(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_lgamma_f32(float noundef [[X:%.*]]) #[[ATTR16]]
+// APPROX-LABEL: define dso_local noundef float @test_lgammaf(
+// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_lgamma_f32(float noundef [[X]]) #[[ATTR16]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_lgammaf(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_lgamma_f32(float noundef [[X:%.*]]) #[[ATTR16]]
+// NCRDIV-LABEL: define dso_local noundef float @test_lgammaf(
+// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_lgamma_f32(float noundef [[X]]) #[[ATTR16]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_lgammaf(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_lgamma_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_lgammaf(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR6]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_lgamma_f32(float noundef [[X]]) #[[ATTR14]]
 // AMDGCNSPIRV-NEXT:    ret float [[CALL_I]]
 //
 extern "C" __device__ float test_lgammaf(float x) {
   return lgammaf(x);
 }
 
-// DEFAULT-LABEL: @test_lgamma(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_lgamma_f64(double noundef [[X:%.*]]) #[[ATTR16]]
+// DEFAULT-LABEL: define dso_local noundef double @test_lgamma(
+// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_lgamma_f64(double noundef [[X]]) #[[ATTR16]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_lgamma(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_lgamma_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_lgamma(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_lgamma_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
-// APPROX-LABEL: @test_lgamma(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_lgamma_f64(double noundef [[X:%.*]]) #[[ATTR16]]
+// APPROX-LABEL: define dso_local noundef double @test_lgamma(
+// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_lgamma_f64(double noundef [[X]]) #[[ATTR16]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_lgamma(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_lgamma_f64(double noundef [[X:%.*]]) #[[ATTR16]]
+// NCRDIV-LABEL: define dso_local noundef double @test_lgamma(
+// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_lgamma_f64(double noundef [[X]]) #[[ATTR16]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_lgamma(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_lgamma_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_lgamma(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR6]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_lgamma_f64(double noundef [[X]]) #[[ATTR14]]
 // AMDGCNSPIRV-NEXT:    ret double [[CALL_I]]
 //
 extern "C" __device__ double test_lgamma(double x) {
   return lgamma(x);
 }
 
-// DEFAULT-LABEL: @test_llrintf(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract float @llvm.rint.f32(float [[X:%.*]])
+// DEFAULT-LABEL: define dso_local i64 @test_llrintf(
+// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract float @llvm.rint.f32(float [[X]])
 // DEFAULT-NEXT:    [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64
 // DEFAULT-NEXT:    ret i64 [[CONV_I]]
 //
-// FINITEONLY-LABEL: @test_llrintf(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract float @llvm.rint.f32(float nofpclass(nan inf) [[X:%.*]])
+// FINITEONLY-LABEL: define dso_local i64 @test_llrintf(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract float @llvm.rint.f32(float nofpclass(nan inf) [[X]])
 // FINITEONLY-NEXT:    [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64
 // FINITEONLY-NEXT:    ret i64 [[CONV_I]]
 //
-// APPROX-LABEL: @test_llrintf(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract float @llvm.rint.f32(float [[X:%.*]])
+// APPROX-LABEL: define dso_local i64 @test_llrintf(
+// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract float @llvm.rint.f32(float [[X]])
 // APPROX-NEXT:    [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64
 // APPROX-NEXT:    ret i64 [[CONV_I]]
 //
-// NCRDIV-LABEL: @test_llrintf(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract float @llvm.rint.f32(float [[X:%.*]])
+// NCRDIV-LABEL: define dso_local i64 @test_llrintf(
+// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract float @llvm.rint.f32(float [[X]])
 // NCRDIV-NEXT:    [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64
 // NCRDIV-NEXT:    ret i64 [[CONV_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_llrintf(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract addrspace(4) float @llvm.rint.f32(float [[X:%.*]])
+// AMDGCNSPIRV-LABEL: define spir_func i64 @test_llrintf(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract addrspace(4) float @llvm.rint.f32(float [[X]])
 // AMDGCNSPIRV-NEXT:    [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64
 // AMDGCNSPIRV-NEXT:    ret i64 [[CONV_I]]
 //
@@ -3134,33 +3553,38 @@ extern "C" __device__ long long int test_llrintf(float x) {
   return llrintf(x);
 }
 
-// DEFAULT-LABEL: @test_llrint(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract double @llvm.rint.f64(double [[X:%.*]])
+// DEFAULT-LABEL: define dso_local i64 @test_llrint(
+// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract double @llvm.rint.f64(double [[X]])
 // DEFAULT-NEXT:    [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64
 // DEFAULT-NEXT:    ret i64 [[CONV_I]]
 //
-// FINITEONLY-LABEL: @test_llrint(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract double @llvm.rint.f64(double nofpclass(nan inf) [[X:%.*]])
+// FINITEONLY-LABEL: define dso_local i64 @test_llrint(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract double @llvm.rint.f64(double nofpclass(nan inf) [[X]])
 // FINITEONLY-NEXT:    [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64
 // FINITEONLY-NEXT:    ret i64 [[CONV_I]]
 //
-// APPROX-LABEL: @test_llrint(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract double @llvm.rint.f64(double [[X:%.*]])
+// APPROX-LABEL: define dso_local i64 @test_llrint(
+// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract double @llvm.rint.f64(double [[X]])
 // APPROX-NEXT:    [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64
 // APPROX-NEXT:    ret i64 [[CONV_I]]
 //
-// NCRDIV-LABEL: @test_llrint(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract double @llvm.rint.f64(double [[X:%.*]])
+// NCRDIV-LABEL: define dso_local i64 @test_llrint(
+// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract double @llvm.rint.f64(double [[X]])
 // NCRDIV-NEXT:    [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64
 // NCRDIV-NEXT:    ret i64 [[CONV_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_llrint(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract addrspace(4) double @llvm.rint.f64(double [[X:%.*]])
+// AMDGCNSPIRV-LABEL: define spir_func i64 @test_llrint(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract addrspace(4) double @llvm.rint.f64(double [[X]])
 // AMDGCNSPIRV-NEXT:    [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64
 // AMDGCNSPIRV-NEXT:    ret i64 [[CONV_I]]
 //
@@ -3168,33 +3592,38 @@ extern "C" __device__ long long int test_llrint(double x) {
   return llrint(x);
 }
 
-// DEFAULT-LABEL: @test_llroundf(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract float @llvm.round.f32(float [[X:%.*]])
+// DEFAULT-LABEL: define dso_local i64 @test_llroundf(
+// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract float @llvm.round.f32(float [[X]])
 // DEFAULT-NEXT:    [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64
 // DEFAULT-NEXT:    ret i64 [[CONV_I]]
 //
-// FINITEONLY-LABEL: @test_llroundf(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract float @llvm.round.f32(float nofpclass(nan inf) [[X:%.*]])
+// FINITEONLY-LABEL: define dso_local i64 @test_llroundf(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract float @llvm.round.f32(float nofpclass(nan inf) [[X]])
 // FINITEONLY-NEXT:    [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64
 // FINITEONLY-NEXT:    ret i64 [[CONV_I]]
 //
-// APPROX-LABEL: @test_llroundf(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract float @llvm.round.f32(float [[X:%.*]])
+// APPROX-LABEL: define dso_local i64 @test_llroundf(
+// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract float @llvm.round.f32(float [[X]])
 // APPROX-NEXT:    [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64
 // APPROX-NEXT:    ret i64 [[CONV_I]]
 //
-// NCRDIV-LABEL: @test_llroundf(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract float @llvm.round.f32(float [[X:%.*]])
+// NCRDIV-LABEL: define dso_local i64 @test_llroundf(
+// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract float @llvm.round.f32(float [[X]])
 // NCRDIV-NEXT:    [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64
 // NCRDIV-NEXT:    ret i64 [[CONV_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_llroundf(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract addrspace(4) float @llvm.round.f32(float [[X:%.*]])
+// AMDGCNSPIRV-LABEL: define spir_func i64 @test_llroundf(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract addrspace(4) float @llvm.round.f32(float [[X]])
 // AMDGCNSPIRV-NEXT:    [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64
 // AMDGCNSPIRV-NEXT:    ret i64 [[CONV_I]]
 //
@@ -3202,33 +3631,38 @@ extern "C" __device__ long long int test_llroundf(float x) {
   return llroundf(x);
 }
 
-// DEFAULT-LABEL: @test_llround(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract double @llvm.round.f64(double [[X:%.*]])
+// DEFAULT-LABEL: define dso_local i64 @test_llround(
+// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract double @llvm.round.f64(double [[X]])
 // DEFAULT-NEXT:    [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64
 // DEFAULT-NEXT:    ret i64 [[CONV_I]]
 //
-// FINITEONLY-LABEL: @test_llround(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract double @llvm.round.f64(double nofpclass(nan inf) [[X:%.*]])
+// FINITEONLY-LABEL: define dso_local i64 @test_llround(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract double @llvm.round.f64(double nofpclass(nan inf) [[X]])
 // FINITEONLY-NEXT:    [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64
 // FINITEONLY-NEXT:    ret i64 [[CONV_I]]
 //
-// APPROX-LABEL: @test_llround(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract double @llvm.round.f64(double [[X:%.*]])
+// APPROX-LABEL: define dso_local i64 @test_llround(
+// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract double @llvm.round.f64(double [[X]])
 // APPROX-NEXT:    [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64
 // APPROX-NEXT:    ret i64 [[CONV_I]]
 //
-// NCRDIV-LABEL: @test_llround(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract double @llvm.round.f64(double [[X:%.*]])
+// NCRDIV-LABEL: define dso_local i64 @test_llround(
+// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract double @llvm.round.f64(double [[X]])
 // NCRDIV-NEXT:    [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64
 // NCRDIV-NEXT:    ret i64 [[CONV_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_llround(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract addrspace(4) double @llvm.round.f64(double [[X:%.*]])
+// AMDGCNSPIRV-LABEL: define spir_func i64 @test_llround(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract addrspace(4) double @llvm.round.f64(double [[X]])
 // AMDGCNSPIRV-NEXT:    [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64
 // AMDGCNSPIRV-NEXT:    ret i64 [[CONV_I]]
 //
@@ -3236,294 +3670,344 @@ extern "C" __device__ long long int test_llround(double x) {
   return llround(x);
 }
 
-// DEFAULT-LABEL: @test_log10f(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.log10.f32(float [[X:%.*]])
+// DEFAULT-LABEL: define dso_local noundef float @test_log10f(
+// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.log10.f32(float [[X]])
 // DEFAULT-NEXT:    ret float [[TMP0]]
 //
-// FINITEONLY-LABEL: @test_log10f(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.log10.f32(float nofpclass(nan inf) [[X:%.*]])
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_log10f(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.log10.f32(float nofpclass(nan inf) [[X]])
 // FINITEONLY-NEXT:    ret float [[TMP0]]
 //
-// APPROX-LABEL: @test_log10f(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.log10.f32(float [[X:%.*]])
+// APPROX-LABEL: define dso_local noundef float @test_log10f(
+// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.log10.f32(float [[X]])
 // APPROX-NEXT:    ret float [[TMP0]]
 //
-// NCRDIV-LABEL: @test_log10f(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.log10.f32(float [[X:%.*]])
+// NCRDIV-LABEL: define dso_local noundef float @test_log10f(
+// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.log10.f32(float [[X]])
 // NCRDIV-NEXT:    ret float [[TMP0]]
 //
-// AMDGCNSPIRV-LABEL: @test_log10f(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.log10.f32(float [[X:%.*]])
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_log10f(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.log10.f32(float [[X]])
 // AMDGCNSPIRV-NEXT:    ret float [[TMP0]]
 //
 extern "C" __device__ float test_log10f(float x) {
   return log10f(x);
 }
 
-// DEFAULT-LABEL: @test_log10(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log10_f64(double noundef [[X:%.*]]) #[[ATTR15]]
+// DEFAULT-LABEL: define dso_local noundef double @test_log10(
+// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log10_f64(double noundef [[X]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_log10(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_log10_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_log10(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_log10_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
-// APPROX-LABEL: @test_log10(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log10_f64(double noundef [[X:%.*]]) #[[ATTR15]]
+// APPROX-LABEL: define dso_local noundef double @test_log10(
+// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log10_f64(double noundef [[X]]) #[[ATTR15]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_log10(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log10_f64(double noundef [[X:%.*]]) #[[ATTR15]]
+// NCRDIV-LABEL: define dso_local noundef double @test_log10(
+// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log10_f64(double noundef [[X]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_log10(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_log10_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_log10(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR5]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_log10_f64(double noundef [[X]]) #[[ATTR13]]
 // AMDGCNSPIRV-NEXT:    ret double [[CALL_I]]
 //
 extern "C" __device__ double test_log10(double x) {
   return log10(x);
 }
 
-// DEFAULT-LABEL: @test_log1pf(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_log1p_f32(float noundef [[X:%.*]]) #[[ATTR15]]
+// DEFAULT-LABEL: define dso_local noundef float @test_log1pf(
+// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_log1p_f32(float noundef [[X]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_log1pf(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_log1p_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_log1pf(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_log1p_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
-// APPROX-LABEL: @test_log1pf(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_log1p_f32(float noundef [[X:%.*]]) #[[ATTR15]]
+// APPROX-LABEL: define dso_local noundef float @test_log1pf(
+// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_log1p_f32(float noundef [[X]]) #[[ATTR15]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_log1pf(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_log1p_f32(float noundef [[X:%.*]]) #[[ATTR15]]
+// NCRDIV-LABEL: define dso_local noundef float @test_log1pf(
+// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_log1p_f32(float noundef [[X]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_log1pf(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_log1p_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_log1pf(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR5]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_log1p_f32(float noundef [[X]]) #[[ATTR13]]
 // AMDGCNSPIRV-NEXT:    ret float [[CALL_I]]
 //
 extern "C" __device__ float test_log1pf(float x) {
   return log1pf(x);
 }
 
-// DEFAULT-LABEL: @test_log1p(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log1p_f64(double noundef [[X:%.*]]) #[[ATTR15]]
+// DEFAULT-LABEL: define dso_local noundef double @test_log1p(
+// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log1p_f64(double noundef [[X]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_log1p(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_log1p_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_log1p(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_log1p_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
-// APPROX-LABEL: @test_log1p(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log1p_f64(double noundef [[X:%.*]]) #[[ATTR15]]
+// APPROX-LABEL: define dso_local noundef double @test_log1p(
+// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log1p_f64(double noundef [[X]]) #[[ATTR15]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_log1p(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log1p_f64(double noundef [[X:%.*]]) #[[ATTR15]]
+// NCRDIV-LABEL: define dso_local noundef double @test_log1p(
+// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log1p_f64(double noundef [[X]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_log1p(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_log1p_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_log1p(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR5]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_log1p_f64(double noundef [[X]]) #[[ATTR13]]
 // AMDGCNSPIRV-NEXT:    ret double [[CALL_I]]
 //
 extern "C" __device__ double test_log1p(double x) {
   return log1p(x);
 }
 
-// DEFAULT-LABEL: @test_log2f(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.log2.f32(float [[X:%.*]])
+// DEFAULT-LABEL: define dso_local noundef float @test_log2f(
+// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.log2.f32(float [[X]])
 // DEFAULT-NEXT:    ret float [[TMP0]]
 //
-// FINITEONLY-LABEL: @test_log2f(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.log2.f32(float nofpclass(nan inf) [[X:%.*]])
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_log2f(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.log2.f32(float nofpclass(nan inf) [[X]])
 // FINITEONLY-NEXT:    ret float [[TMP0]]
 //
-// APPROX-LABEL: @test_log2f(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.amdgcn.log.f32(float [[X:%.*]])
+// APPROX-LABEL: define dso_local noundef float @test_log2f(
+// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.amdgcn.log.f32(float [[X]])
 // APPROX-NEXT:    ret float [[TMP0]]
 //
-// NCRDIV-LABEL: @test_log2f(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.log2.f32(float [[X:%.*]])
+// NCRDIV-LABEL: define dso_local noundef float @test_log2f(
+// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.log2.f32(float [[X]])
 // NCRDIV-NEXT:    ret float [[TMP0]]
 //
-// AMDGCNSPIRV-LABEL: @test_log2f(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.log2.f32(float [[X:%.*]])
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_log2f(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.log2.f32(float [[X]])
 // AMDGCNSPIRV-NEXT:    ret float [[TMP0]]
 //
 extern "C" __device__ float test_log2f(float x) {
   return log2f(x);
 }
 
-// DEFAULT-LABEL: @test_log2(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log2_f64(double noundef [[X:%.*]]) #[[ATTR15]]
+// DEFAULT-LABEL: define dso_local noundef double @test_log2(
+// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log2_f64(double noundef [[X]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_log2(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_log2_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_log2(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_log2_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
-// APPROX-LABEL: @test_log2(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log2_f64(double noundef [[X:%.*]]) #[[ATTR15]]
+// APPROX-LABEL: define dso_local noundef double @test_log2(
+// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log2_f64(double noundef [[X]]) #[[ATTR15]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_log2(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log2_f64(double noundef [[X:%.*]]) #[[ATTR15]]
+// NCRDIV-LABEL: define dso_local noundef double @test_log2(
+// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log2_f64(double noundef [[X]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_log2(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_log2_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_log2(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR5]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_log2_f64(double noundef [[X]]) #[[ATTR13]]
 // AMDGCNSPIRV-NEXT:    ret double [[CALL_I]]
 //
 extern "C" __device__ double test_log2(double x) {
   return log2(x);
 }
 
-// DEFAULT-LABEL: @test_logbf(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_logb_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// DEFAULT-LABEL: define dso_local noundef float @test_logbf(
+// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_logb_f32(float noundef [[X]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_logbf(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_logb_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_logbf(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_logb_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
-// APPROX-LABEL: @test_logbf(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_logb_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// APPROX-LABEL: define dso_local noundef float @test_logbf(
+// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_logb_f32(float noundef [[X]]) #[[ATTR14]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_logbf(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_logb_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// NCRDIV-LABEL: define dso_local noundef float @test_logbf(
+// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_logb_f32(float noundef [[X]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_logbf(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_logb_f32(float noundef [[X:%.*]]) #[[ATTR12]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_logbf(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR4]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_logb_f32(float noundef [[X]]) #[[ATTR12]]
 // AMDGCNSPIRV-NEXT:    ret float [[CALL_I]]
 //
 extern "C" __device__ float test_logbf(float x) {
   return logbf(x);
 }
 
-// DEFAULT-LABEL: @test_logb(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_logb_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// DEFAULT-LABEL: define dso_local noundef double @test_logb(
+// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_logb_f64(double noundef [[X]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_logb(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_logb_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_logb(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_logb_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
-// APPROX-LABEL: @test_logb(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_logb_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// APPROX-LABEL: define dso_local noundef double @test_logb(
+// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_logb_f64(double noundef [[X]]) #[[ATTR14]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_logb(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_logb_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// NCRDIV-LABEL: define dso_local noundef double @test_logb(
+// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_logb_f64(double noundef [[X]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_logb(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_logb_f64(double noundef [[X:%.*]]) #[[ATTR12]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_logb(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR4]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_logb_f64(double noundef [[X]]) #[[ATTR12]]
 // AMDGCNSPIRV-NEXT:    ret double [[CALL_I]]
 //
 extern "C" __device__ double test_logb(double x) {
   return logb(x);
 }
 
-// DEFAULT-LABEL: @test_logf(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.log.f32(float [[X:%.*]])
+// DEFAULT-LABEL: define dso_local noundef float @test_logf(
+// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.log.f32(float [[X]])
 // DEFAULT-NEXT:    ret float [[TMP0]]
 //
-// FINITEONLY-LABEL: @test_logf(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.log.f32(float nofpclass(nan inf) [[X:%.*]])
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_logf(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.log.f32(float nofpclass(nan inf) [[X]])
 // FINITEONLY-NEXT:    ret float [[TMP0]]
 //
-// APPROX-LABEL: @test_logf(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.log.f32(float [[X:%.*]])
+// APPROX-LABEL: define dso_local noundef float @test_logf(
+// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.log.f32(float [[X]])
 // APPROX-NEXT:    ret float [[TMP0]]
 //
-// NCRDIV-LABEL: @test_logf(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.log.f32(float [[X:%.*]])
+// NCRDIV-LABEL: define dso_local noundef float @test_logf(
+// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.log.f32(float [[X]])
 // NCRDIV-NEXT:    ret float [[TMP0]]
 //
-// AMDGCNSPIRV-LABEL: @test_logf(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.log.f32(float [[X:%.*]])
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_logf(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.log.f32(float [[X]])
 // AMDGCNSPIRV-NEXT:    ret float [[TMP0]]
 //
 extern "C" __device__ float test_logf(float x) {
   return logf(x);
 }
 
-// DEFAULT-LABEL: @test_lrintf(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract float @llvm.rint.f32(float [[X:%.*]])
+// DEFAULT-LABEL: define dso_local i64 @test_lrintf(
+// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract float @llvm.rint.f32(float [[X]])
 // DEFAULT-NEXT:    [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64
 // DEFAULT-NEXT:    ret i64 [[CONV_I]]
 //
-// FINITEONLY-LABEL: @test_lrintf(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract float @llvm.rint.f32(float nofpclass(nan inf) [[X:%.*]])
+// FINITEONLY-LABEL: define dso_local i64 @test_lrintf(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract float @llvm.rint.f32(float nofpclass(nan inf) [[X]])
 // FINITEONLY-NEXT:    [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64
 // FINITEONLY-NEXT:    ret i64 [[CONV_I]]
 //
-// APPROX-LABEL: @test_lrintf(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract float @llvm.rint.f32(float [[X:%.*]])
+// APPROX-LABEL: define dso_local i64 @test_lrintf(
+// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract float @llvm.rint.f32(float [[X]])
 // APPROX-NEXT:    [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64
 // APPROX-NEXT:    ret i64 [[CONV_I]]
 //
-// NCRDIV-LABEL: @test_lrintf(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract float @llvm.rint.f32(float [[X:%.*]])
+// NCRDIV-LABEL: define dso_local i64 @test_lrintf(
+// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract float @llvm.rint.f32(float [[X]])
 // NCRDIV-NEXT:    [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64
 // NCRDIV-NEXT:    ret i64 [[CONV_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_lrintf(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract addrspace(4) float @llvm.rint.f32(float [[X:%.*]])
+// AMDGCNSPIRV-LABEL: define spir_func i64 @test_lrintf(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract addrspace(4) float @llvm.rint.f32(float [[X]])
 // AMDGCNSPIRV-NEXT:    [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64
 // AMDGCNSPIRV-NEXT:    ret i64 [[CONV_I]]
 //
@@ -3531,33 +4015,38 @@ extern "C" __device__ long int test_lrintf(float x) {
   return lrintf(x);
 }
 
-// DEFAULT-LABEL: @test_lrint(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract double @llvm.rint.f64(double [[X:%.*]])
+// DEFAULT-LABEL: define dso_local i64 @test_lrint(
+// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract double @llvm.rint.f64(double [[X]])
 // DEFAULT-NEXT:    [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64
 // DEFAULT-NEXT:    ret i64 [[CONV_I]]
 //
-// FINITEONLY-LABEL: @test_lrint(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract double @llvm.rint.f64(double nofpclass(nan inf) [[X:%.*]])
+// FINITEONLY-LABEL: define dso_local i64 @test_lrint(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract double @llvm.rint.f64(double nofpclass(nan inf) [[X]])
 // FINITEONLY-NEXT:    [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64
 // FINITEONLY-NEXT:    ret i64 [[CONV_I]]
 //
-// APPROX-LABEL: @test_lrint(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract double @llvm.rint.f64(double [[X:%.*]])
+// APPROX-LABEL: define dso_local i64 @test_lrint(
+// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract double @llvm.rint.f64(double [[X]])
 // APPROX-NEXT:    [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64
 // APPROX-NEXT:    ret i64 [[CONV_I]]
 //
-// NCRDIV-LABEL: @test_lrint(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract double @llvm.rint.f64(double [[X:%.*]])
+// NCRDIV-LABEL: define dso_local i64 @test_lrint(
+// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract double @llvm.rint.f64(double [[X]])
 // NCRDIV-NEXT:    [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64
 // NCRDIV-NEXT:    ret i64 [[CONV_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_lrint(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract addrspace(4) double @llvm.rint.f64(double [[X:%.*]])
+// AMDGCNSPIRV-LABEL: define spir_func i64 @test_lrint(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract addrspace(4) double @llvm.rint.f64(double [[X]])
 // AMDGCNSPIRV-NEXT:    [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64
 // AMDGCNSPIRV-NEXT:    ret i64 [[CONV_I]]
 //
@@ -3565,33 +4054,38 @@ extern "C" __device__ long int test_lrint(double x) {
   return lrint(x);
 }
 
-// DEFAULT-LABEL: @test_lroundf(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract float @llvm.round.f32(float [[X:%.*]])
+// DEFAULT-LABEL: define dso_local i64 @test_lroundf(
+// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract float @llvm.round.f32(float [[X]])
 // DEFAULT-NEXT:    [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64
 // DEFAULT-NEXT:    ret i64 [[CONV_I]]
 //
-// FINITEONLY-LABEL: @test_lroundf(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract float @llvm.round.f32(float nofpclass(nan inf) [[X:%.*]])
+// FINITEONLY-LABEL: define dso_local i64 @test_lroundf(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract float @llvm.round.f32(float nofpclass(nan inf) [[X]])
 // FINITEONLY-NEXT:    [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64
 // FINITEONLY-NEXT:    ret i64 [[CONV_I]]
 //
-// APPROX-LABEL: @test_lroundf(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract float @llvm.round.f32(float [[X:%.*]])
+// APPROX-LABEL: define dso_local i64 @test_lroundf(
+// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract float @llvm.round.f32(float [[X]])
 // APPROX-NEXT:    [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64
 // APPROX-NEXT:    ret i64 [[CONV_I]]
 //
-// NCRDIV-LABEL: @test_lroundf(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract float @llvm.round.f32(float [[X:%.*]])
+// NCRDIV-LABEL: define dso_local i64 @test_lroundf(
+// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract float @llvm.round.f32(float [[X]])
 // NCRDIV-NEXT:    [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64
 // NCRDIV-NEXT:    ret i64 [[CONV_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_lroundf(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract addrspace(4) float @llvm.round.f32(float [[X:%.*]])
+// AMDGCNSPIRV-LABEL: define spir_func i64 @test_lroundf(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract addrspace(4) float @llvm.round.f32(float [[X]])
 // AMDGCNSPIRV-NEXT:    [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64
 // AMDGCNSPIRV-NEXT:    ret i64 [[CONV_I]]
 //
@@ -3599,33 +4093,38 @@ extern "C" __device__ long int test_lroundf(float x) {
   return lroundf(x);
 }
 
-// DEFAULT-LABEL: @test_lround(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract double @llvm.round.f64(double [[X:%.*]])
+// DEFAULT-LABEL: define dso_local i64 @test_lround(
+// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract double @llvm.round.f64(double [[X]])
 // DEFAULT-NEXT:    [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64
 // DEFAULT-NEXT:    ret i64 [[CONV_I]]
 //
-// FINITEONLY-LABEL: @test_lround(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract double @llvm.round.f64(double nofpclass(nan inf) [[X:%.*]])
+// FINITEONLY-LABEL: define dso_local i64 @test_lround(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract double @llvm.round.f64(double nofpclass(nan inf) [[X]])
 // FINITEONLY-NEXT:    [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64
 // FINITEONLY-NEXT:    ret i64 [[CONV_I]]
 //
-// APPROX-LABEL: @test_lround(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract double @llvm.round.f64(double [[X:%.*]])
+// APPROX-LABEL: define dso_local i64 @test_lround(
+// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract double @llvm.round.f64(double [[X]])
 // APPROX-NEXT:    [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64
 // APPROX-NEXT:    ret i64 [[CONV_I]]
 //
-// NCRDIV-LABEL: @test_lround(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract double @llvm.round.f64(double [[X:%.*]])
+// NCRDIV-LABEL: define dso_local i64 @test_lround(
+// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract double @llvm.round.f64(double [[X]])
 // NCRDIV-NEXT:    [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64
 // NCRDIV-NEXT:    ret i64 [[CONV_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_lround(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract addrspace(4) double @llvm.round.f64(double [[X:%.*]])
+// AMDGCNSPIRV-LABEL: define spir_func i64 @test_lround(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract addrspace(4) double @llvm.round.f64(double [[X]])
 // AMDGCNSPIRV-NEXT:    [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64
 // AMDGCNSPIRV-NEXT:    ret i64 [[CONV_I]]
 //
@@ -3633,54 +4132,59 @@ extern "C" __device__ long int test_lround(double x) {
   return lround(x);
 }
 
-// DEFAULT-LABEL: @test_modff(
-// DEFAULT-NEXT:  entry:
+// DEFAULT-LABEL: define dso_local noundef float @test_modff(
+// DEFAULT-SAME: float noundef [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Y:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
 // DEFAULT-NEXT:    [[__TMP_I:%.*]] = alloca float, align 4, addrspace(5)
 // DEFAULT-NEXT:    call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17:[0-9]+]]
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = call contract noundef float @__ocml_modf_f32(float noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
-// DEFAULT-NEXT:    [[TMP0:%.*]] = load float, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[TBAA16:![0-9]+]]
-// DEFAULT-NEXT:    store float [[TMP0]], ptr [[Y:%.*]], align 4, !tbaa [[TBAA16]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = call contract noundef float @__ocml_modf_f32(float noundef [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
+// DEFAULT-NEXT:    [[TMP0:%.*]] = load float, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[FLOAT_TBAA16:![0-9]+]]
+// DEFAULT-NEXT:    store float [[TMP0]], ptr [[Y]], align 4, !tbaa [[FLOAT_TBAA16]]
 // DEFAULT-NEXT:    call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_modff(
-// FINITEONLY-NEXT:  entry:
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_modff(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Y:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
 // FINITEONLY-NEXT:    [[__TMP_I:%.*]] = alloca float, align 4, addrspace(5)
 // FINITEONLY-NEXT:    call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17:[0-9]+]]
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_modf_f32(float noundef nofpclass(nan inf) [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
-// FINITEONLY-NEXT:    [[TMP0:%.*]] = load float, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[TBAA16:![0-9]+]]
-// FINITEONLY-NEXT:    store float [[TMP0]], ptr [[Y:%.*]], align 4, !tbaa [[TBAA16]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_modf_f32(float noundef nofpclass(nan inf) [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
+// FINITEONLY-NEXT:    [[TMP0:%.*]] = load float, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[FLOAT_TBAA16:![0-9]+]]
+// FINITEONLY-NEXT:    store float [[TMP0]], ptr [[Y]], align 4, !tbaa [[FLOAT_TBAA16]]
 // FINITEONLY-NEXT:    call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
-// APPROX-LABEL: @test_modff(
-// APPROX-NEXT:  entry:
+// APPROX-LABEL: define dso_local noundef float @test_modff(
+// APPROX-SAME: float noundef [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Y:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
 // APPROX-NEXT:    [[__TMP_I:%.*]] = alloca float, align 4, addrspace(5)
 // APPROX-NEXT:    call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17:[0-9]+]]
-// APPROX-NEXT:    [[CALL_I:%.*]] = call contract noundef float @__ocml_modf_f32(float noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
-// APPROX-NEXT:    [[TMP0:%.*]] = load float, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[TBAA16:![0-9]+]]
-// APPROX-NEXT:    store float [[TMP0]], ptr [[Y:%.*]], align 4, !tbaa [[TBAA16]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = call contract noundef float @__ocml_modf_f32(float noundef [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
+// APPROX-NEXT:    [[TMP0:%.*]] = load float, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[FLOAT_TBAA16:![0-9]+]]
+// APPROX-NEXT:    store float [[TMP0]], ptr [[Y]], align 4, !tbaa [[FLOAT_TBAA16]]
 // APPROX-NEXT:    call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_modff(
-// NCRDIV-NEXT:  entry:
+// NCRDIV-LABEL: define dso_local noundef float @test_modff(
+// NCRDIV-SAME: float noundef [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Y:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
 // NCRDIV-NEXT:    [[__TMP_I:%.*]] = alloca float, align 4, addrspace(5)
 // NCRDIV-NEXT:    call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17:[0-9]+]]
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = call contract noundef float @__ocml_modf_f32(float noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
-// NCRDIV-NEXT:    [[TMP0:%.*]] = load float, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[TBAA17:![0-9]+]]
-// NCRDIV-NEXT:    store float [[TMP0]], ptr [[Y:%.*]], align 4, !tbaa [[TBAA17]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = call contract noundef float @__ocml_modf_f32(float noundef [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
+// NCRDIV-NEXT:    [[TMP0:%.*]] = load float, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[FLOAT_TBAA17:![0-9]+]]
+// NCRDIV-NEXT:    store float [[TMP0]], ptr [[Y]], align 4, !tbaa [[FLOAT_TBAA17]]
 // NCRDIV-NEXT:    call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_modff(
-// AMDGCNSPIRV-NEXT:  entry:
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_modff(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]], ptr addrspace(4) noundef writeonly captures(none) initializes((0, 4)) [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR6]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
 // AMDGCNSPIRV-NEXT:    [[__TMP_I:%.*]] = alloca float, align 4
 // AMDGCNSPIRV-NEXT:    [[__TMP_ASCAST_I:%.*]] = addrspacecast ptr [[__TMP_I]] to ptr addrspace(4)
 // AMDGCNSPIRV-NEXT:    call addrspace(4) void @llvm.lifetime.start.p0(ptr nonnull [[__TMP_I]]) #[[ATTR15:[0-9]+]]
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = call contract spir_func noundef addrspace(4) float @__ocml_modf_f32(float noundef [[X:%.*]], ptr noundef nonnull [[__TMP_I]]) #[[ATTR14]]
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = load float, ptr addrspace(4) [[__TMP_ASCAST_I]], align 4, !tbaa [[TBAA17:![0-9]+]]
-// AMDGCNSPIRV-NEXT:    store float [[TMP0]], ptr addrspace(4) [[Y:%.*]], align 4, !tbaa [[TBAA17]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = call contract spir_func noundef addrspace(4) float @__ocml_modf_f32(float noundef [[X]], ptr noundef nonnull [[__TMP_I]]) #[[ATTR14]]
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = load float, ptr addrspace(4) [[__TMP_ASCAST_I]], align 4, !tbaa [[FLOAT_TBAA17:![0-9]+]]
+// AMDGCNSPIRV-NEXT:    store float [[TMP0]], ptr addrspace(4) [[Y]], align 4, !tbaa [[FLOAT_TBAA17]]
 // AMDGCNSPIRV-NEXT:    call addrspace(4) void @llvm.lifetime.end.p0(ptr nonnull [[__TMP_I]]) #[[ATTR15]]
 // AMDGCNSPIRV-NEXT:    ret float [[CALL_I]]
 //
@@ -3688,54 +4192,59 @@ extern "C" __device__ float test_modff(float x, float* y) {
   return modff(x, y);
 }
 
-// DEFAULT-LABEL: @test_modf(
-// DEFAULT-NEXT:  entry:
+// DEFAULT-LABEL: define dso_local noundef double @test_modf(
+// DEFAULT-SAME: double noundef [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 8)) [[Y:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
 // DEFAULT-NEXT:    [[__TMP_I:%.*]] = alloca double, align 8, addrspace(5)
 // DEFAULT-NEXT:    call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = call contract noundef double @__ocml_modf_f64(double noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
-// DEFAULT-NEXT:    [[TMP0:%.*]] = load double, ptr addrspace(5) [[__TMP_I]], align 8, !tbaa [[TBAA18:![0-9]+]]
-// DEFAULT-NEXT:    store double [[TMP0]], ptr [[Y:%.*]], align 8, !tbaa [[TBAA18]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = call contract noundef double @__ocml_modf_f64(double noundef [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
+// DEFAULT-NEXT:    [[TMP0:%.*]] = load double, ptr addrspace(5) [[__TMP_I]], align 8, !tbaa [[DOUBLE_TBAA18:![0-9]+]]
+// DEFAULT-NEXT:    store double [[TMP0]], ptr [[Y]], align 8, !tbaa [[DOUBLE_TBAA18]]
 // DEFAULT-NEXT:    call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_modf(
-// FINITEONLY-NEXT:  entry:
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_modf(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 8)) [[Y:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
 // FINITEONLY-NEXT:    [[__TMP_I:%.*]] = alloca double, align 8, addrspace(5)
 // FINITEONLY-NEXT:    call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_modf_f64(double noundef nofpclass(nan inf) [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
-// FINITEONLY-NEXT:    [[TMP0:%.*]] = load double, ptr addrspace(5) [[__TMP_I]], align 8, !tbaa [[TBAA18:![0-9]+]]
-// FINITEONLY-NEXT:    store double [[TMP0]], ptr [[Y:%.*]], align 8, !tbaa [[TBAA18]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_modf_f64(double noundef nofpclass(nan inf) [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
+// FINITEONLY-NEXT:    [[TMP0:%.*]] = load double, ptr addrspace(5) [[__TMP_I]], align 8, !tbaa [[DOUBLE_TBAA18:![0-9]+]]
+// FINITEONLY-NEXT:    store double [[TMP0]], ptr [[Y]], align 8, !tbaa [[DOUBLE_TBAA18]]
 // FINITEONLY-NEXT:    call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
-// APPROX-LABEL: @test_modf(
-// APPROX-NEXT:  entry:
+// APPROX-LABEL: define dso_local noundef double @test_modf(
+// APPROX-SAME: double noundef [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 8)) [[Y:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
 // APPROX-NEXT:    [[__TMP_I:%.*]] = alloca double, align 8, addrspace(5)
 // APPROX-NEXT:    call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
-// APPROX-NEXT:    [[CALL_I:%.*]] = call contract noundef double @__ocml_modf_f64(double noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
-// APPROX-NEXT:    [[TMP0:%.*]] = load double, ptr addrspace(5) [[__TMP_I]], align 8, !tbaa [[TBAA18:![0-9]+]]
-// APPROX-NEXT:    store double [[TMP0]], ptr [[Y:%.*]], align 8, !tbaa [[TBAA18]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = call contract noundef double @__ocml_modf_f64(double noundef [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
+// APPROX-NEXT:    [[TMP0:%.*]] = load double, ptr addrspace(5) [[__TMP_I]], align 8, !tbaa [[DOUBLE_TBAA18:![0-9]+]]
+// APPROX-NEXT:    store double [[TMP0]], ptr [[Y]], align 8, !tbaa [[DOUBLE_TBAA18]]
 // APPROX-NEXT:    call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_modf(
-// NCRDIV-NEXT:  entry:
+// NCRDIV-LABEL: define dso_local noundef double @test_modf(
+// NCRDIV-SAME: double noundef [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 8)) [[Y:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
 // NCRDIV-NEXT:    [[__TMP_I:%.*]] = alloca double, align 8, addrspace(5)
 // NCRDIV-NEXT:    call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = call contract noundef double @__ocml_modf_f64(double noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
-// NCRDIV-NEXT:    [[TMP0:%.*]] = load double, ptr addrspace(5) [[__TMP_I]], align 8, !tbaa [[TBAA19:![0-9]+]]
-// NCRDIV-NEXT:    store double [[TMP0]], ptr [[Y:%.*]], align 8, !tbaa [[TBAA19]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = call contract noundef double @__ocml_modf_f64(double noundef [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
+// NCRDIV-NEXT:    [[TMP0:%.*]] = load double, ptr addrspace(5) [[__TMP_I]], align 8, !tbaa [[DOUBLE_TBAA19:![0-9]+]]
+// NCRDIV-NEXT:    store double [[TMP0]], ptr [[Y]], align 8, !tbaa [[DOUBLE_TBAA19]]
 // NCRDIV-NEXT:    call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_modf(
-// AMDGCNSPIRV-NEXT:  entry:
+// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_modf(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]], ptr addrspace(4) noundef writeonly captures(none) initializes((0, 8)) [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR6]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
 // AMDGCNSPIRV-NEXT:    [[__TMP_I:%.*]] = alloca double, align 8
 // AMDGCNSPIRV-NEXT:    [[__TMP_ASCAST_I:%.*]] = addrspacecast ptr [[__TMP_I]] to ptr addrspace(4)
 // AMDGCNSPIRV-NEXT:    call addrspace(4) void @llvm.lifetime.start.p0(ptr nonnull [[__TMP_I]]) #[[ATTR15]]
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = call contract spir_func noundef addrspace(4) double @__ocml_modf_f64(double noundef [[X:%.*]], ptr noundef nonnull [[__TMP_I]]) #[[ATTR14]]
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = load double, ptr addrspace(4) [[__TMP_ASCAST_I]], align 8, !tbaa [[TBAA19:![0-9]+]]
-// AMDGCNSPIRV-NEXT:    store double [[TMP0]], ptr addrspace(4) [[Y:%.*]], align 8, !tbaa [[TBAA19]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = call contract spir_func noundef addrspace(4) double @__ocml_modf_f64(double noundef [[X]], ptr noundef nonnull [[__TMP_I]]) #[[ATTR14]]
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = load double, ptr addrspace(4) [[__TMP_ASCAST_I]], align 8, !tbaa [[DOUBLE_TBAA19:![0-9]+]]
+// AMDGCNSPIRV-NEXT:    store double [[TMP0]], ptr addrspace(4) [[Y]], align 8, !tbaa [[DOUBLE_TBAA19]]
 // AMDGCNSPIRV-NEXT:    call addrspace(4) void @llvm.lifetime.end.p0(ptr nonnull [[__TMP_I]]) #[[ATTR15]]
 // AMDGCNSPIRV-NEXT:    ret double [[CALL_I]]
 //
@@ -3743,325 +4252,330 @@ extern "C" __device__ double test_modf(double x, double* y) {
   return modf(x, y);
 }
 
-// DEFAULT-LABEL: @test_nanf(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[TMP0:%.*]] = load i8, ptr [[TAG:%.*]], align 1, !tbaa [[TBAA4]]
+// DEFAULT-LABEL: define dso_local float @test_nanf(
+// DEFAULT-SAME: ptr noundef readonly captures(none) [[TAG:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[TMP0:%.*]] = load i8, ptr [[TAG]], align 1, !tbaa [[CHAR_TBAA4]]
 // DEFAULT-NEXT:    [[CMP_I_I:%.*]] = icmp eq i8 [[TMP0]], 48
-// DEFAULT-NEXT:    br i1 [[CMP_I_I]], label [[IF_THEN_I_I:%.*]], label [[WHILE_COND_I14_I_I_PREHEADER:%.*]]
-// DEFAULT:       while.cond.i14.i.i.preheader:
-// DEFAULT-NEXT:    [[TMP1:%.*]] = load i8, ptr [[TAG]], align 1, !tbaa [[TBAA4]]
+// DEFAULT-NEXT:    br i1 [[CMP_I_I]], label %[[IF_THEN_I_I:.*]], label %[[WHILE_COND_I14_I_I_PREHEADER:.*]]
+// DEFAULT:       [[WHILE_COND_I14_I_I_PREHEADER]]:
+// DEFAULT-NEXT:    [[TMP1:%.*]] = load i8, ptr [[TAG]], align 1, !tbaa [[CHAR_TBAA4]]
 // DEFAULT-NEXT:    [[CMP_NOT_I17_I_I5:%.*]] = icmp eq i8 [[TMP1]], 0
-// DEFAULT-NEXT:    br i1 [[CMP_NOT_I17_I_I5]], label [[_ZL4NANFPKC_EXIT:%.*]], label [[WHILE_BODY_I18_I_I:%.*]]
-// DEFAULT:       if.then.i.i:
+// DEFAULT-NEXT:    br i1 [[CMP_NOT_I17_I_I5]], label %[[_ZL4NANFPKC_EXIT:.*]], label %[[WHILE_BODY_I18_I_I:.*]]
+// DEFAULT:       [[IF_THEN_I_I]]:
 // DEFAULT-NEXT:    [[INCDEC_PTR_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[TAG]], i64 1
-// DEFAULT-NEXT:    [[TMP2:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]]
-// DEFAULT-NEXT:    switch i8 [[TMP2]], label [[WHILE_COND_I_I_I_PREHEADER:%.*]] [
-// DEFAULT-NEXT:      i8 120, label [[IF_THEN5_I_I:%.*]]
-// DEFAULT-NEXT:      i8 88, label [[IF_THEN5_I_I]]
+// DEFAULT-NEXT:    [[TMP2:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA4]]
+// DEFAULT-NEXT:    switch i8 [[TMP2]], label %[[WHILE_COND_I_I_I_PREHEADER:.*]] [
+// DEFAULT-NEXT:      i8 120, label %[[IF_THEN5_I_I:.*]]
+// DEFAULT-NEXT:      i8 88, label %[[IF_THEN5_I_I]]
 // DEFAULT-NEXT:    ]
-// DEFAULT:       while.cond.i.i.i.preheader:
-// DEFAULT-NEXT:    [[TMP3:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]]
+// DEFAULT:       [[WHILE_COND_I_I_I_PREHEADER]]:
+// DEFAULT-NEXT:    [[TMP3:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA4]]
 // DEFAULT-NEXT:    [[CMP_NOT_I_I_I14:%.*]] = icmp eq i8 [[TMP3]], 0
-// DEFAULT-NEXT:    br i1 [[CMP_NOT_I_I_I14]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I_I_I:%.*]]
-// DEFAULT:       if.then5.i.i:
-// DEFAULT-NEXT:    [[TMP4:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]]
+// DEFAULT-NEXT:    br i1 [[CMP_NOT_I_I_I14]], label %[[_ZL4NANFPKC_EXIT]], label %[[WHILE_BODY_I_I_I:.*]]
+// DEFAULT:       [[IF_THEN5_I_I]]:
+// DEFAULT-NEXT:    [[TMP4:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA4]]
 // DEFAULT-NEXT:    [[CMP_NOT_I30_I_I9:%.*]] = icmp eq i8 [[TMP4]], 0
-// DEFAULT-NEXT:    br i1 [[CMP_NOT_I30_I_I9]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I31_I_I:%.*]]
-// DEFAULT:       while.body.i31.i.i:
-// DEFAULT-NEXT:    [[TMP5:%.*]] = phi i8 [ [[TMP9:%.*]], [[IF_END31_I_I_I:%.*]] ], [ [[TMP4]], [[IF_THEN5_I_I]] ]
-// DEFAULT-NEXT:    [[__R_0_I29_I_I11:%.*]] = phi i64 [ [[ADD28_I_I_I:%.*]], [[IF_END31_I_I_I]] ], [ 0, [[IF_THEN5_I_I]] ]
-// DEFAULT-NEXT:    [[__TAGP_ADDR_0_I28_I_I10:%.*]] = phi ptr [ [[INCDEC_PTR_I34_I_I:%.*]], [[IF_END31_I_I_I]] ], [ [[INCDEC_PTR_I_I]], [[IF_THEN5_I_I]] ]
+// DEFAULT-NEXT:    br i1 [[CMP_NOT_I30_I_I9]], label %[[_ZL4NANFPKC_EXIT]], label %[[WHILE_BODY_I31_I_I:.*]]
+// DEFAULT:       [[WHILE_BODY_I31_I_I]]:
+// DEFAULT-NEXT:    [[TMP5:%.*]] = phi i8 [ [[TMP9:%.*]], %[[IF_END31_I_I_I:.*]] ], [ [[TMP4]], %[[IF_THEN5_I_I]] ]
+// DEFAULT-NEXT:    [[__R_0_I29_I_I11:%.*]] = phi i64 [ [[ADD28_I_I_I:%.*]], %[[IF_END31_I_I_I]] ], [ 0, %[[IF_THEN5_I_I]] ]
+// DEFAULT-NEXT:    [[__TAGP_ADDR_0_I28_I_I10:%.*]] = phi ptr [ [[INCDEC_PTR_I34_I_I:%.*]], %[[IF_END31_I_I_I]] ], [ [[INCDEC_PTR_I_I]], %[[IF_THEN5_I_I]] ]
 // DEFAULT-NEXT:    [[TMP6:%.*]] = add i8 [[TMP5]], -48
 // DEFAULT-NEXT:    [[OR_COND_I32_I_I:%.*]] = icmp ult i8 [[TMP6]], 10
-// DEFAULT-NEXT:    br i1 [[OR_COND_I32_I_I]], label [[IF_END31_I_I_I]], label [[IF_ELSE_I_I_I:%.*]]
-// DEFAULT:       if.else.i.i.i:
+// DEFAULT-NEXT:    br i1 [[OR_COND_I32_I_I]], label %[[IF_END31_I_I_I]], label %[[IF_ELSE_I_I_I:.*]]
+// DEFAULT:       [[IF_ELSE_I_I_I]]:
 // DEFAULT-NEXT:    [[TMP7:%.*]] = add i8 [[TMP5]], -97
 // DEFAULT-NEXT:    [[OR_COND33_I_I_I:%.*]] = icmp ult i8 [[TMP7]], 6
-// DEFAULT-NEXT:    br i1 [[OR_COND33_I_I_I]], label [[IF_END31_I_I_I]], label [[IF_ELSE17_I_I_I:%.*]]
-// DEFAULT:       if.else17.i.i.i:
+// DEFAULT-NEXT:    br i1 [[OR_COND33_I_I_I]], label %[[IF_END31_I_I_I]], label %[[IF_ELSE17_I_I_I:.*]]
+// DEFAULT:       [[IF_ELSE17_I_I_I]]:
 // DEFAULT-NEXT:    [[TMP8:%.*]] = add i8 [[TMP5]], -65
 // DEFAULT-NEXT:    [[OR_COND34_I_I_I:%.*]] = icmp ult i8 [[TMP8]], 6
-// DEFAULT-NEXT:    br i1 [[OR_COND34_I_I_I]], label [[IF_END31_I_I_I]], label [[_ZL4NANFPKC_EXIT]]
-// DEFAULT:       if.end31.i.i.i:
-// DEFAULT-NEXT:    [[DOTSINK:%.*]] = phi i64 [ -48, [[WHILE_BODY_I31_I_I]] ], [ -87, [[IF_ELSE_I_I_I]] ], [ -55, [[IF_ELSE17_I_I_I]] ]
+// DEFAULT-NEXT:    br i1 [[OR_COND34_I_I_I]], label %[[IF_END31_I_I_I]], label %[[_ZL4NANFPKC_EXIT]]
+// DEFAULT:       [[IF_END31_I_I_I]]:
+// DEFAULT-NEXT:    [[DOTSINK:%.*]] = phi i64 [ -48, %[[WHILE_BODY_I31_I_I]] ], [ -87, %[[IF_ELSE_I_I_I]] ], [ -55, %[[IF_ELSE17_I_I_I]] ]
 // DEFAULT-NEXT:    [[MUL24_I_I_I:%.*]] = shl i64 [[__R_0_I29_I_I11]], 4
 // DEFAULT-NEXT:    [[CONV25_I_I_I:%.*]] = zext nneg i8 [[TMP5]] to i64
 // DEFAULT-NEXT:    [[ADD26_I_I_I:%.*]] = add i64 [[MUL24_I_I_I]], [[DOTSINK]]
 // DEFAULT-NEXT:    [[ADD28_I_I_I]] = add i64 [[ADD26_I_I_I]], [[CONV25_I_I_I]]
 // DEFAULT-NEXT:    [[INCDEC_PTR_I34_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I28_I_I10]], i64 1
-// DEFAULT-NEXT:    [[TMP9]] = load i8, ptr [[INCDEC_PTR_I34_I_I]], align 1, !tbaa [[TBAA4]]
+// DEFAULT-NEXT:    [[TMP9]] = load i8, ptr [[INCDEC_PTR_I34_I_I]], align 1, !tbaa [[CHAR_TBAA4]]
 // DEFAULT-NEXT:    [[CMP_NOT_I30_I_I:%.*]] = icmp eq i8 [[TMP9]], 0
-// DEFAULT-NEXT:    br i1 [[CMP_NOT_I30_I_I]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I31_I_I]], !llvm.loop [[LOOP11]]
-// DEFAULT:       while.body.i.i.i:
-// DEFAULT-NEXT:    [[TMP10:%.*]] = phi i8 [ [[TMP12:%.*]], [[IF_THEN_I_I_I:%.*]] ], [ [[TMP3]], [[WHILE_COND_I_I_I_PREHEADER]] ]
-// DEFAULT-NEXT:    [[__R_0_I_I_I16:%.*]] = phi i64 [ [[SUB_I_I_I:%.*]], [[IF_THEN_I_I_I]] ], [ 0, [[WHILE_COND_I_I_I_PREHEADER]] ]
-// DEFAULT-NEXT:    [[__TAGP_ADDR_0_I_I_I15:%.*]] = phi ptr [ [[INCDEC_PTR_I_I_I:%.*]], [[IF_THEN_I_I_I]] ], [ [[INCDEC_PTR_I_I]], [[WHILE_COND_I_I_I_PREHEADER]] ]
+// DEFAULT-NEXT:    br i1 [[CMP_NOT_I30_I_I]], label %[[_ZL4NANFPKC_EXIT]], label %[[WHILE_BODY_I31_I_I]], !llvm.loop [[LOOP11]]
+// DEFAULT:       [[WHILE_BODY_I_I_I]]:
+// DEFAULT-NEXT:    [[TMP10:%.*]] = phi i8 [ [[TMP12:%.*]], %[[IF_THEN_I_I_I:.*]] ], [ [[TMP3]], %[[WHILE_COND_I_I_I_PREHEADER]] ]
+// DEFAULT-NEXT:    [[__R_0_I_I_I16:%.*]] = phi i64 [ [[SUB_I_I_I:%.*]], %[[IF_THEN_I_I_I]] ], [ 0, %[[WHILE_COND_I_I_I_PREHEADER]] ]
+// DEFAULT-NEXT:    [[__TAGP_ADDR_0_I_I_I15:%.*]] = phi ptr [ [[INCDEC_PTR_I_I_I:%.*]], %[[IF_THEN_I_I_I]] ], [ [[INCDEC_PTR_I_I]], %[[WHILE_COND_I_I_I_PREHEADER]] ]
 // DEFAULT-NEXT:    [[TMP11:%.*]] = and i8 [[TMP10]], -8
 // DEFAULT-NEXT:    [[OR_COND_I_I_I:%.*]] = icmp eq i8 [[TMP11]], 48
-// DEFAULT-NEXT:    br i1 [[OR_COND_I_I_I]], label [[IF_THEN_I_I_I]], label [[_ZL4NANFPKC_EXIT]]
-// DEFAULT:       if.then.i.i.i:
+// DEFAULT-NEXT:    br i1 [[OR_COND_I_I_I]], label %[[IF_THEN_I_I_I]], label %[[_ZL4NANFPKC_EXIT]]
+// DEFAULT:       [[IF_THEN_I_I_I]]:
 // DEFAULT-NEXT:    [[MUL_I_I_I:%.*]] = shl i64 [[__R_0_I_I_I16]], 3
 // DEFAULT-NEXT:    [[CONV5_I_I_I:%.*]] = zext nneg i8 [[TMP10]] to i64
 // DEFAULT-NEXT:    [[ADD_I_I_I:%.*]] = add i64 [[MUL_I_I_I]], -48
 // DEFAULT-NEXT:    [[SUB_I_I_I]] = add i64 [[ADD_I_I_I]], [[CONV5_I_I_I]]
 // DEFAULT-NEXT:    [[INCDEC_PTR_I_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I_I_I15]], i64 1
-// DEFAULT-NEXT:    [[TMP12]] = load i8, ptr [[INCDEC_PTR_I_I_I]], align 1, !tbaa [[TBAA4]]
+// DEFAULT-NEXT:    [[TMP12]] = load i8, ptr [[INCDEC_PTR_I_I_I]], align 1, !tbaa [[CHAR_TBAA4]]
 // DEFAULT-NEXT:    [[CMP_NOT_I_I_I:%.*]] = icmp eq i8 [[TMP12]], 0
-// DEFAULT-NEXT:    br i1 [[CMP_NOT_I_I_I]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I_I_I]], !llvm.loop [[LOOP7]]
-// DEFAULT:       while.body.i18.i.i:
-// DEFAULT-NEXT:    [[TMP13:%.*]] = phi i8 [ [[TMP15:%.*]], [[IF_THEN_I21_I_I:%.*]] ], [ [[TMP1]], [[WHILE_COND_I14_I_I_PREHEADER]] ]
-// DEFAULT-NEXT:    [[__R_0_I16_I_I7:%.*]] = phi i64 [ [[SUB_I25_I_I:%.*]], [[IF_THEN_I21_I_I]] ], [ 0, [[WHILE_COND_I14_I_I_PREHEADER]] ]
-// DEFAULT-NEXT:    [[__TAGP_ADDR_0_I15_I_I6:%.*]] = phi ptr [ [[INCDEC_PTR_I26_I_I:%.*]], [[IF_THEN_I21_I_I]] ], [ [[TAG]], [[WHILE_COND_I14_I_I_PREHEADER]] ]
+// DEFAULT-NEXT:    br i1 [[CMP_NOT_I_I_I]], label %[[_ZL4NANFPKC_EXIT]], label %[[WHILE_BODY_I_I_I]], !llvm.loop [[LOOP7]]
+// DEFAULT:       [[WHILE_BODY_I18_I_I]]:
+// DEFAULT-NEXT:    [[TMP13:%.*]] = phi i8 [ [[TMP15:%.*]], %[[IF_THEN_I21_I_I:.*]] ], [ [[TMP1]], %[[WHILE_COND_I14_I_I_PREHEADER]] ]
+// DEFAULT-NEXT:    [[__R_0_I16_I_I7:%.*]] = phi i64 [ [[SUB_I25_I_I:%.*]], %[[IF_THEN_I21_I_I]] ], [ 0, %[[WHILE_COND_I14_I_I_PREHEADER]] ]
+// DEFAULT-NEXT:    [[__TAGP_ADDR_0_I15_I_I6:%.*]] = phi ptr [ [[INCDEC_PTR_I26_I_I:%.*]], %[[IF_THEN_I21_I_I]] ], [ [[TAG]], %[[WHILE_COND_I14_I_I_PREHEADER]] ]
 // DEFAULT-NEXT:    [[TMP14:%.*]] = add i8 [[TMP13]], -48
 // DEFAULT-NEXT:    [[OR_COND_I19_I_I:%.*]] = icmp ult i8 [[TMP14]], 10
-// DEFAULT-NEXT:    br i1 [[OR_COND_I19_I_I]], label [[IF_THEN_I21_I_I]], label [[_ZL4NANFPKC_EXIT]]
-// DEFAULT:       if.then.i21.i.i:
+// DEFAULT-NEXT:    br i1 [[OR_COND_I19_I_I]], label %[[IF_THEN_I21_I_I]], label %[[_ZL4NANFPKC_EXIT]]
+// DEFAULT:       [[IF_THEN_I21_I_I]]:
 // DEFAULT-NEXT:    [[MUL_I22_I_I:%.*]] = mul i64 [[__R_0_I16_I_I7]], 10
 // DEFAULT-NEXT:    [[CONV5_I23_I_I:%.*]] = zext nneg i8 [[TMP13]] to i64
 // DEFAULT-NEXT:    [[ADD_I24_I_I:%.*]] = add i64 [[MUL_I22_I_I]], -48
 // DEFAULT-NEXT:    [[SUB_I25_I_I]] = add i64 [[ADD_I24_I_I]], [[CONV5_I23_I_I]]
 // DEFAULT-NEXT:    [[INCDEC_PTR_I26_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I15_I_I6]], i64 1
-// DEFAULT-NEXT:    [[TMP15]] = load i8, ptr [[INCDEC_PTR_I26_I_I]], align 1, !tbaa [[TBAA4]]
+// DEFAULT-NEXT:    [[TMP15]] = load i8, ptr [[INCDEC_PTR_I26_I_I]], align 1, !tbaa [[CHAR_TBAA4]]
 // DEFAULT-NEXT:    [[CMP_NOT_I17_I_I:%.*]] = icmp eq i8 [[TMP15]], 0
-// DEFAULT-NEXT:    br i1 [[CMP_NOT_I17_I_I]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I18_I_I]], !llvm.loop [[LOOP10]]
-// DEFAULT:       _ZL4nanfPKc.exit:
-// DEFAULT-NEXT:    [[RETVAL_0_I_I:%.*]] = phi i64 [ 0, [[WHILE_COND_I_I_I_PREHEADER]] ], [ 0, [[IF_THEN5_I_I]] ], [ 0, [[WHILE_COND_I14_I_I_PREHEADER]] ], [ [[SUB_I_I_I]], [[IF_THEN_I_I_I]] ], [ 0, [[WHILE_BODY_I_I_I]] ], [ [[ADD28_I_I_I]], [[IF_END31_I_I_I]] ], [ 0, [[IF_ELSE17_I_I_I]] ], [ [[SUB_I25_I_I]], [[IF_THEN_I21_I_I]] ], [ 0, [[WHILE_BODY_I18_I_I]] ]
+// DEFAULT-NEXT:    br i1 [[CMP_NOT_I17_I_I]], label %[[_ZL4NANFPKC_EXIT]], label %[[WHILE_BODY_I18_I_I]], !llvm.loop [[LOOP10]]
+// DEFAULT:       [[_ZL4NANFPKC_EXIT]]:
+// DEFAULT-NEXT:    [[RETVAL_0_I_I:%.*]] = phi i64 [ 0, %[[WHILE_COND_I_I_I_PREHEADER]] ], [ 0, %[[IF_THEN5_I_I]] ], [ 0, %[[WHILE_COND_I14_I_I_PREHEADER]] ], [ [[SUB_I_I_I]], %[[IF_THEN_I_I_I]] ], [ 0, %[[WHILE_BODY_I_I_I]] ], [ [[ADD28_I_I_I]], %[[IF_END31_I_I_I]] ], [ 0, %[[IF_ELSE17_I_I_I]] ], [ [[SUB_I25_I_I]], %[[IF_THEN_I21_I_I]] ], [ 0, %[[WHILE_BODY_I18_I_I]] ]
 // DEFAULT-NEXT:    [[CONV_I:%.*]] = trunc i64 [[RETVAL_0_I_I]] to i32
 // DEFAULT-NEXT:    [[BF_VALUE_I:%.*]] = and i32 [[CONV_I]], 4194303
 // DEFAULT-NEXT:    [[BF_SET9_I:%.*]] = or disjoint i32 [[BF_VALUE_I]], 2143289344
 // DEFAULT-NEXT:    [[TMP16:%.*]] = bitcast i32 [[BF_SET9_I]] to float
 // DEFAULT-NEXT:    ret float [[TMP16]]
 //
-// FINITEONLY-LABEL: @test_nanf(
-// FINITEONLY-NEXT:  entry:
+// FINITEONLY-LABEL: define dso_local nofpclass(nan inf) float @test_nanf(
+// FINITEONLY-SAME: ptr noundef readonly captures(none) [[TAG:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
 // FINITEONLY-NEXT:    ret float poison
 //
-// APPROX-LABEL: @test_nanf(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[TMP0:%.*]] = load i8, ptr [[TAG:%.*]], align 1, !tbaa [[TBAA4]]
+// APPROX-LABEL: define dso_local float @test_nanf(
+// APPROX-SAME: ptr noundef readonly captures(none) [[TAG:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[TMP0:%.*]] = load i8, ptr [[TAG]], align 1, !tbaa [[CHAR_TBAA4]]
 // APPROX-NEXT:    [[CMP_I_I:%.*]] = icmp eq i8 [[TMP0]], 48
-// APPROX-NEXT:    br i1 [[CMP_I_I]], label [[IF_THEN_I_I:%.*]], label [[WHILE_COND_I14_I_I_PREHEADER:%.*]]
-// APPROX:       while.cond.i14.i.i.preheader:
-// APPROX-NEXT:    [[TMP1:%.*]] = load i8, ptr [[TAG]], align 1, !tbaa [[TBAA4]]
+// APPROX-NEXT:    br i1 [[CMP_I_I]], label %[[IF_THEN_I_I:.*]], label %[[WHILE_COND_I14_I_I_PREHEADER:.*]]
+// APPROX:       [[WHILE_COND_I14_I_I_PREHEADER]]:
+// APPROX-NEXT:    [[TMP1:%.*]] = load i8, ptr [[TAG]], align 1, !tbaa [[CHAR_TBAA4]]
 // APPROX-NEXT:    [[CMP_NOT_I17_I_I5:%.*]] = icmp eq i8 [[TMP1]], 0
-// APPROX-NEXT:    br i1 [[CMP_NOT_I17_I_I5]], label [[_ZL4NANFPKC_EXIT:%.*]], label [[WHILE_BODY_I18_I_I:%.*]]
-// APPROX:       if.then.i.i:
+// APPROX-NEXT:    br i1 [[CMP_NOT_I17_I_I5]], label %[[_ZL4NANFPKC_EXIT:.*]], label %[[WHILE_BODY_I18_I_I:.*]]
+// APPROX:       [[IF_THEN_I_I]]:
 // APPROX-NEXT:    [[INCDEC_PTR_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[TAG]], i64 1
-// APPROX-NEXT:    [[TMP2:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]]
-// APPROX-NEXT:    switch i8 [[TMP2]], label [[WHILE_COND_I_I_I_PREHEADER:%.*]] [
-// APPROX-NEXT:      i8 120, label [[IF_THEN5_I_I:%.*]]
-// APPROX-NEXT:      i8 88, label [[IF_THEN5_I_I]]
+// APPROX-NEXT:    [[TMP2:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA4]]
+// APPROX-NEXT:    switch i8 [[TMP2]], label %[[WHILE_COND_I_I_I_PREHEADER:.*]] [
+// APPROX-NEXT:      i8 120, label %[[IF_THEN5_I_I:.*]]
+// APPROX-NEXT:      i8 88, label %[[IF_THEN5_I_I]]
 // APPROX-NEXT:    ]
-// APPROX:       while.cond.i.i.i.preheader:
-// APPROX-NEXT:    [[TMP3:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]]
+// APPROX:       [[WHILE_COND_I_I_I_PREHEADER]]:
+// APPROX-NEXT:    [[TMP3:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA4]]
 // APPROX-NEXT:    [[CMP_NOT_I_I_I14:%.*]] = icmp eq i8 [[TMP3]], 0
-// APPROX-NEXT:    br i1 [[CMP_NOT_I_I_I14]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I_I_I:%.*]]
-// APPROX:       if.then5.i.i:
-// APPROX-NEXT:    [[TMP4:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]]
+// APPROX-NEXT:    br i1 [[CMP_NOT_I_I_I14]], label %[[_ZL4NANFPKC_EXIT]], label %[[WHILE_BODY_I_I_I:.*]]
+// APPROX:       [[IF_THEN5_I_I]]:
+// APPROX-NEXT:    [[TMP4:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA4]]
 // APPROX-NEXT:    [[CMP_NOT_I30_I_I9:%.*]] = icmp eq i8 [[TMP4]], 0
-// APPROX-NEXT:    br i1 [[CMP_NOT_I30_I_I9]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I31_I_I:%.*]]
-// APPROX:       while.body.i31.i.i:
-// APPROX-NEXT:    [[TMP5:%.*]] = phi i8 [ [[TMP9:%.*]], [[IF_END31_I_I_I:%.*]] ], [ [[TMP4]], [[IF_THEN5_I_I]] ]
-// APPROX-NEXT:    [[__R_0_I29_I_I11:%.*]] = phi i64 [ [[ADD28_I_I_I:%.*]], [[IF_END31_I_I_I]] ], [ 0, [[IF_THEN5_I_I]] ]
-// APPROX-NEXT:    [[__TAGP_ADDR_0_I28_I_I10:%.*]] = phi ptr [ [[INCDEC_PTR_I34_I_I:%.*]], [[IF_END31_I_I_I]] ], [ [[INCDEC_PTR_I_I]], [[IF_THEN5_I_I]] ]
+// APPROX-NEXT:    br i1 [[CMP_NOT_I30_I_I9]], label %[[_ZL4NANFPKC_EXIT]], label %[[WHILE_BODY_I31_I_I:.*]]
+// APPROX:       [[WHILE_BODY_I31_I_I]]:
+// APPROX-NEXT:    [[TMP5:%.*]] = phi i8 [ [[TMP9:%.*]], %[[IF_END31_I_I_I:.*]] ], [ [[TMP4]], %[[IF_THEN5_I_I]] ]
+// APPROX-NEXT:    [[__R_0_I29_I_I11:%.*]] = phi i64 [ [[ADD28_I_I_I:%.*]], %[[IF_END31_I_I_I]] ], [ 0, %[[IF_THEN5_I_I]] ]
+// APPROX-NEXT:    [[__TAGP_ADDR_0_I28_I_I10:%.*]] = phi ptr [ [[INCDEC_PTR_I34_I_I:%.*]], %[[IF_END31_I_I_I]] ], [ [[INCDEC_PTR_I_I]], %[[IF_THEN5_I_I]] ]
 // APPROX-NEXT:    [[TMP6:%.*]] = add i8 [[TMP5]], -48
 // APPROX-NEXT:    [[OR_COND_I32_I_I:%.*]] = icmp ult i8 [[TMP6]], 10
-// APPROX-NEXT:    br i1 [[OR_COND_I32_I_I]], label [[IF_END31_I_I_I]], label [[IF_ELSE_I_I_I:%.*]]
-// APPROX:       if.else.i.i.i:
+// APPROX-NEXT:    br i1 [[OR_COND_I32_I_I]], label %[[IF_END31_I_I_I]], label %[[IF_ELSE_I_I_I:.*]]
+// APPROX:       [[IF_ELSE_I_I_I]]:
 // APPROX-NEXT:    [[TMP7:%.*]] = add i8 [[TMP5]], -97
 // APPROX-NEXT:    [[OR_COND33_I_I_I:%.*]] = icmp ult i8 [[TMP7]], 6
-// APPROX-NEXT:    br i1 [[OR_COND33_I_I_I]], label [[IF_END31_I_I_I]], label [[IF_ELSE17_I_I_I:%.*]]
-// APPROX:       if.else17.i.i.i:
+// APPROX-NEXT:    br i1 [[OR_COND33_I_I_I]], label %[[IF_END31_I_I_I]], label %[[IF_ELSE17_I_I_I:.*]]
+// APPROX:       [[IF_ELSE17_I_I_I]]:
 // APPROX-NEXT:    [[TMP8:%.*]] = add i8 [[TMP5]], -65
 // APPROX-NEXT:    [[OR_COND34_I_I_I:%.*]] = icmp ult i8 [[TMP8]], 6
-// APPROX-NEXT:    br i1 [[OR_COND34_I_I_I]], label [[IF_END31_I_I_I]], label [[_ZL4NANFPKC_EXIT]]
-// APPROX:       if.end31.i.i.i:
-// APPROX-NEXT:    [[DOTSINK:%.*]] = phi i64 [ -48, [[WHILE_BODY_I31_I_I]] ], [ -87, [[IF_ELSE_I_I_I]] ], [ -55, [[IF_ELSE17_I_I_I]] ]
+// APPROX-NEXT:    br i1 [[OR_COND34_I_I_I]], label %[[IF_END31_I_I_I]], label %[[_ZL4NANFPKC_EXIT]]
+// APPROX:       [[IF_END31_I_I_I]]:
+// APPROX-NEXT:    [[DOTSINK:%.*]] = phi i64 [ -48, %[[WHILE_BODY_I31_I_I]] ], [ -87, %[[IF_ELSE_I_I_I]] ], [ -55, %[[IF_ELSE17_I_I_I]] ]
 // APPROX-NEXT:    [[MUL24_I_I_I:%.*]] = shl i64 [[__R_0_I29_I_I11]], 4
 // APPROX-NEXT:    [[CONV25_I_I_I:%.*]] = zext nneg i8 [[TMP5]] to i64
 // APPROX-NEXT:    [[ADD26_I_I_I:%.*]] = add i64 [[MUL24_I_I_I]], [[DOTSINK]]
 // APPROX-NEXT:    [[ADD28_I_I_I]] = add i64 [[ADD26_I_I_I]], [[CONV25_I_I_I]]
 // APPROX-NEXT:    [[INCDEC_PTR_I34_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I28_I_I10]], i64 1
-// APPROX-NEXT:    [[TMP9]] = load i8, ptr [[INCDEC_PTR_I34_I_I]], align 1, !tbaa [[TBAA4]]
+// APPROX-NEXT:    [[TMP9]] = load i8, ptr [[INCDEC_PTR_I34_I_I]], align 1, !tbaa [[CHAR_TBAA4]]
 // APPROX-NEXT:    [[CMP_NOT_I30_I_I:%.*]] = icmp eq i8 [[TMP9]], 0
-// APPROX-NEXT:    br i1 [[CMP_NOT_I30_I_I]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I31_I_I]], !llvm.loop [[LOOP11]]
-// APPROX:       while.body.i.i.i:
-// APPROX-NEXT:    [[TMP10:%.*]] = phi i8 [ [[TMP12:%.*]], [[IF_THEN_I_I_I:%.*]] ], [ [[TMP3]], [[WHILE_COND_I_I_I_PREHEADER]] ]
-// APPROX-NEXT:    [[__R_0_I_I_I16:%.*]] = phi i64 [ [[SUB_I_I_I:%.*]], [[IF_THEN_I_I_I]] ], [ 0, [[WHILE_COND_I_I_I_PREHEADER]] ]
-// APPROX-NEXT:    [[__TAGP_ADDR_0_I_I_I15:%.*]] = phi ptr [ [[INCDEC_PTR_I_I_I:%.*]], [[IF_THEN_I_I_I]] ], [ [[INCDEC_PTR_I_I]], [[WHILE_COND_I_I_I_PREHEADER]] ]
+// APPROX-NEXT:    br i1 [[CMP_NOT_I30_I_I]], label %[[_ZL4NANFPKC_EXIT]], label %[[WHILE_BODY_I31_I_I]], !llvm.loop [[LOOP11]]
+// APPROX:       [[WHILE_BODY_I_I_I]]:
+// APPROX-NEXT:    [[TMP10:%.*]] = phi i8 [ [[TMP12:%.*]], %[[IF_THEN_I_I_I:.*]] ], [ [[TMP3]], %[[WHILE_COND_I_I_I_PREHEADER]] ]
+// APPROX-NEXT:    [[__R_0_I_I_I16:%.*]] = phi i64 [ [[SUB_I_I_I:%.*]], %[[IF_THEN_I_I_I]] ], [ 0, %[[WHILE_COND_I_I_I_PREHEADER]] ]
+// APPROX-NEXT:    [[__TAGP_ADDR_0_I_I_I15:%.*]] = phi ptr [ [[INCDEC_PTR_I_I_I:%.*]], %[[IF_THEN_I_I_I]] ], [ [[INCDEC_PTR_I_I]], %[[WHILE_COND_I_I_I_PREHEADER]] ]
 // APPROX-NEXT:    [[TMP11:%.*]] = and i8 [[TMP10]], -8
 // APPROX-NEXT:    [[OR_COND_I_I_I:%.*]] = icmp eq i8 [[TMP11]], 48
-// APPROX-NEXT:    br i1 [[OR_COND_I_I_I]], label [[IF_THEN_I_I_I]], label [[_ZL4NANFPKC_EXIT]]
-// APPROX:       if.then.i.i.i:
+// APPROX-NEXT:    br i1 [[OR_COND_I_I_I]], label %[[IF_THEN_I_I_I]], label %[[_ZL4NANFPKC_EXIT]]
+// APPROX:       [[IF_THEN_I_I_I]]:
 // APPROX-NEXT:    [[MUL_I_I_I:%.*]] = shl i64 [[__R_0_I_I_I16]], 3
 // APPROX-NEXT:    [[CONV5_I_I_I:%.*]] = zext nneg i8 [[TMP10]] to i64
 // APPROX-NEXT:    [[ADD_I_I_I:%.*]] = add i64 [[MUL_I_I_I]], -48
 // APPROX-NEXT:    [[SUB_I_I_I]] = add i64 [[ADD_I_I_I]], [[CONV5_I_I_I]]
 // APPROX-NEXT:    [[INCDEC_PTR_I_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I_I_I15]], i64 1
-// APPROX-NEXT:    [[TMP12]] = load i8, ptr [[INCDEC_PTR_I_I_I]], align 1, !tbaa [[TBAA4]]
+// APPROX-NEXT:    [[TMP12]] = load i8, ptr [[INCDEC_PTR_I_I_I]], align 1, !tbaa [[CHAR_TBAA4]]
 // APPROX-NEXT:    [[CMP_NOT_I_I_I:%.*]] = icmp eq i8 [[TMP12]], 0
-// APPROX-NEXT:    br i1 [[CMP_NOT_I_I_I]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I_I_I]], !llvm.loop [[LOOP7]]
-// APPROX:       while.body.i18.i.i:
-// APPROX-NEXT:    [[TMP13:%.*]] = phi i8 [ [[TMP15:%.*]], [[IF_THEN_I21_I_I:%.*]] ], [ [[TMP1]], [[WHILE_COND_I14_I_I_PREHEADER]] ]
-// APPROX-NEXT:    [[__R_0_I16_I_I7:%.*]] = phi i64 [ [[SUB_I25_I_I:%.*]], [[IF_THEN_I21_I_I]] ], [ 0, [[WHILE_COND_I14_I_I_PREHEADER]] ]
-// APPROX-NEXT:    [[__TAGP_ADDR_0_I15_I_I6:%.*]] = phi ptr [ [[INCDEC_PTR_I26_I_I:%.*]], [[IF_THEN_I21_I_I]] ], [ [[TAG]], [[WHILE_COND_I14_I_I_PREHEADER]] ]
+// APPROX-NEXT:    br i1 [[CMP_NOT_I_I_I]], label %[[_ZL4NANFPKC_EXIT]], label %[[WHILE_BODY_I_I_I]], !llvm.loop [[LOOP7]]
+// APPROX:       [[WHILE_BODY_I18_I_I]]:
+// APPROX-NEXT:    [[TMP13:%.*]] = phi i8 [ [[TMP15:%.*]], %[[IF_THEN_I21_I_I:.*]] ], [ [[TMP1]], %[[WHILE_COND_I14_I_I_PREHEADER]] ]
+// APPROX-NEXT:    [[__R_0_I16_I_I7:%.*]] = phi i64 [ [[SUB_I25_I_I:%.*]], %[[IF_THEN_I21_I_I]] ], [ 0, %[[WHILE_COND_I14_I_I_PREHEADER]] ]
+// APPROX-NEXT:    [[__TAGP_ADDR_0_I15_I_I6:%.*]] = phi ptr [ [[INCDEC_PTR_I26_I_I:%.*]], %[[IF_THEN_I21_I_I]] ], [ [[TAG]], %[[WHILE_COND_I14_I_I_PREHEADER]] ]
 // APPROX-NEXT:    [[TMP14:%.*]] = add i8 [[TMP13]], -48
 // APPROX-NEXT:    [[OR_COND_I19_I_I:%.*]] = icmp ult i8 [[TMP14]], 10
-// APPROX-NEXT:    br i1 [[OR_COND_I19_I_I]], label [[IF_THEN_I21_I_I]], label [[_ZL4NANFPKC_EXIT]]
-// APPROX:       if.then.i21.i.i:
+// APPROX-NEXT:    br i1 [[OR_COND_I19_I_I]], label %[[IF_THEN_I21_I_I]], label %[[_ZL4NANFPKC_EXIT]]
+// APPROX:       [[IF_THEN_I21_I_I]]:
 // APPROX-NEXT:    [[MUL_I22_I_I:%.*]] = mul i64 [[__R_0_I16_I_I7]], 10
 // APPROX-NEXT:    [[CONV5_I23_I_I:%.*]] = zext nneg i8 [[TMP13]] to i64
 // APPROX-NEXT:    [[ADD_I24_I_I:%.*]] = add i64 [[MUL_I22_I_I]], -48
 // APPROX-NEXT:    [[SUB_I25_I_I]] = add i64 [[ADD_I24_I_I]], [[CONV5_I23_I_I]]
 // APPROX-NEXT:    [[INCDEC_PTR_I26_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I15_I_I6]], i64 1
-// APPROX-NEXT:    [[TMP15]] = load i8, ptr [[INCDEC_PTR_I26_I_I]], align 1, !tbaa [[TBAA4]]
+// APPROX-NEXT:    [[TMP15]] = load i8, ptr [[INCDEC_PTR_I26_I_I]], align 1, !tbaa [[CHAR_TBAA4]]
 // APPROX-NEXT:    [[CMP_NOT_I17_I_I:%.*]] = icmp eq i8 [[TMP15]], 0
-// APPROX-NEXT:    br i1 [[CMP_NOT_I17_I_I]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I18_I_I]], !llvm.loop [[LOOP10]]
-// APPROX:       _ZL4nanfPKc.exit:
-// APPROX-NEXT:    [[RETVAL_0_I_I:%.*]] = phi i64 [ 0, [[WHILE_COND_I_I_I_PREHEADER]] ], [ 0, [[IF_THEN5_I_I]] ], [ 0, [[WHILE_COND_I14_I_I_PREHEADER]] ], [ [[SUB_I_I_I]], [[IF_THEN_I_I_I]] ], [ 0, [[WHILE_BODY_I_I_I]] ], [ [[ADD28_I_I_I]], [[IF_END31_I_I_I]] ], [ 0, [[IF_ELSE17_I_I_I]] ], [ [[SUB_I25_I_I]], [[IF_THEN_I21_I_I]] ], [ 0, [[WHILE_BODY_I18_I_I]] ]
+// APPROX-NEXT:    br i1 [[CMP_NOT_I17_I_I]], label %[[_ZL4NANFPKC_EXIT]], label %[[WHILE_BODY_I18_I_I]], !llvm.loop [[LOOP10]]
+// APPROX:       [[_ZL4NANFPKC_EXIT]]:
+// APPROX-NEXT:    [[RETVAL_0_I_I:%.*]] = phi i64 [ 0, %[[WHILE_COND_I_I_I_PREHEADER]] ], [ 0, %[[IF_THEN5_I_I]] ], [ 0, %[[WHILE_COND_I14_I_I_PREHEADER]] ], [ [[SUB_I_I_I]], %[[IF_THEN_I_I_I]] ], [ 0, %[[WHILE_BODY_I_I_I]] ], [ [[ADD28_I_I_I]], %[[IF_END31_I_I_I]] ], [ 0, %[[IF_ELSE17_I_I_I]] ], [ [[SUB_I25_I_I]], %[[IF_THEN_I21_I_I]] ], [ 0, %[[WHILE_BODY_I18_I_I]] ]
 // APPROX-NEXT:    [[CONV_I:%.*]] = trunc i64 [[RETVAL_0_I_I]] to i32
 // APPROX-NEXT:    [[BF_VALUE_I:%.*]] = and i32 [[CONV_I]], 4194303
 // APPROX-NEXT:    [[BF_SET9_I:%.*]] = or disjoint i32 [[BF_VALUE_I]], 2143289344
 // APPROX-NEXT:    [[TMP16:%.*]] = bitcast i32 [[BF_SET9_I]] to float
 // APPROX-NEXT:    ret float [[TMP16]]
 //
-// NCRDIV-LABEL: @test_nanf(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[TMP0:%.*]] = load i8, ptr [[TAG:%.*]], align 1, !tbaa [[TBAA4]]
+// NCRDIV-LABEL: define dso_local float @test_nanf(
+// NCRDIV-SAME: ptr noundef readonly captures(none) [[TAG:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[TMP0:%.*]] = load i8, ptr [[TAG]], align 1, !tbaa [[CHAR_TBAA4]]
 // NCRDIV-NEXT:    [[CMP_I_I:%.*]] = icmp eq i8 [[TMP0]], 48
-// NCRDIV-NEXT:    br i1 [[CMP_I_I]], label [[IF_THEN_I_I:%.*]], label [[WHILE_COND_I14_I_I_PREHEADER:%.*]]
-// NCRDIV:       while.cond.i14.i.i.preheader:
-// NCRDIV-NEXT:    [[TMP1:%.*]] = load i8, ptr [[TAG]], align 1, !tbaa [[TBAA4]]
+// NCRDIV-NEXT:    br i1 [[CMP_I_I]], label %[[IF_THEN_I_I:.*]], label %[[WHILE_COND_I14_I_I_PREHEADER:.*]]
+// NCRDIV:       [[WHILE_COND_I14_I_I_PREHEADER]]:
+// NCRDIV-NEXT:    [[TMP1:%.*]] = load i8, ptr [[TAG]], align 1, !tbaa [[CHAR_TBAA4]]
 // NCRDIV-NEXT:    [[CMP_NOT_I17_I_I5:%.*]] = icmp eq i8 [[TMP1]], 0
-// NCRDIV-NEXT:    br i1 [[CMP_NOT_I17_I_I5]], label [[_ZL4NANFPKC_EXIT:%.*]], label [[WHILE_BODY_I18_I_I:%.*]]
-// NCRDIV:       if.then.i.i:
+// NCRDIV-NEXT:    br i1 [[CMP_NOT_I17_I_I5]], label %[[_ZL4NANFPKC_EXIT:.*]], label %[[WHILE_BODY_I18_I_I:.*]]
+// NCRDIV:       [[IF_THEN_I_I]]:
 // NCRDIV-NEXT:    [[INCDEC_PTR_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[TAG]], i64 1
-// NCRDIV-NEXT:    [[TMP2:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]]
-// NCRDIV-NEXT:    switch i8 [[TMP2]], label [[WHILE_COND_I_I_I_PREHEADER:%.*]] [
-// NCRDIV-NEXT:      i8 120, label [[IF_THEN5_I_I:%.*]]
-// NCRDIV-NEXT:      i8 88, label [[IF_THEN5_I_I]]
+// NCRDIV-NEXT:    [[TMP2:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA4]]
+// NCRDIV-NEXT:    switch i8 [[TMP2]], label %[[WHILE_COND_I_I_I_PREHEADER:.*]] [
+// NCRDIV-NEXT:      i8 120, label %[[IF_THEN5_I_I:.*]]
+// NCRDIV-NEXT:      i8 88, label %[[IF_THEN5_I_I]]
 // NCRDIV-NEXT:    ]
-// NCRDIV:       while.cond.i.i.i.preheader:
-// NCRDIV-NEXT:    [[TMP3:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]]
+// NCRDIV:       [[WHILE_COND_I_I_I_PREHEADER]]:
+// NCRDIV-NEXT:    [[TMP3:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA4]]
 // NCRDIV-NEXT:    [[CMP_NOT_I_I_I14:%.*]] = icmp eq i8 [[TMP3]], 0
-// NCRDIV-NEXT:    br i1 [[CMP_NOT_I_I_I14]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I_I_I:%.*]]
-// NCRDIV:       if.then5.i.i:
-// NCRDIV-NEXT:    [[TMP4:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]]
+// NCRDIV-NEXT:    br i1 [[CMP_NOT_I_I_I14]], label %[[_ZL4NANFPKC_EXIT]], label %[[WHILE_BODY_I_I_I:.*]]
+// NCRDIV:       [[IF_THEN5_I_I]]:
+// NCRDIV-NEXT:    [[TMP4:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA4]]
 // NCRDIV-NEXT:    [[CMP_NOT_I30_I_I9:%.*]] = icmp eq i8 [[TMP4]], 0
-// NCRDIV-NEXT:    br i1 [[CMP_NOT_I30_I_I9]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I31_I_I:%.*]]
-// NCRDIV:       while.body.i31.i.i:
-// NCRDIV-NEXT:    [[TMP5:%.*]] = phi i8 [ [[TMP9:%.*]], [[IF_END31_I_I_I:%.*]] ], [ [[TMP4]], [[IF_THEN5_I_I]] ]
-// NCRDIV-NEXT:    [[__R_0_I29_I_I11:%.*]] = phi i64 [ [[ADD28_I_I_I:%.*]], [[IF_END31_I_I_I]] ], [ 0, [[IF_THEN5_I_I]] ]
-// NCRDIV-NEXT:    [[__TAGP_ADDR_0_I28_I_I10:%.*]] = phi ptr [ [[INCDEC_PTR_I34_I_I:%.*]], [[IF_END31_I_I_I]] ], [ [[INCDEC_PTR_I_I]], [[IF_THEN5_I_I]] ]
+// NCRDIV-NEXT:    br i1 [[CMP_NOT_I30_I_I9]], label %[[_ZL4NANFPKC_EXIT]], label %[[WHILE_BODY_I31_I_I:.*]]
+// NCRDIV:       [[WHILE_BODY_I31_I_I]]:
+// NCRDIV-NEXT:    [[TMP5:%.*]] = phi i8 [ [[TMP9:%.*]], %[[IF_END31_I_I_I:.*]] ], [ [[TMP4]], %[[IF_THEN5_I_I]] ]
+// NCRDIV-NEXT:    [[__R_0_I29_I_I11:%.*]] = phi i64 [ [[ADD28_I_I_I:%.*]], %[[IF_END31_I_I_I]] ], [ 0, %[[IF_THEN5_I_I]] ]
+// NCRDIV-NEXT:    [[__TAGP_ADDR_0_I28_I_I10:%.*]] = phi ptr [ [[INCDEC_PTR_I34_I_I:%.*]], %[[IF_END31_I_I_I]] ], [ [[INCDEC_PTR_I_I]], %[[IF_THEN5_I_I]] ]
 // NCRDIV-NEXT:    [[TMP6:%.*]] = add i8 [[TMP5]], -48
 // NCRDIV-NEXT:    [[OR_COND_I32_I_I:%.*]] = icmp ult i8 [[TMP6]], 10
-// NCRDIV-NEXT:    br i1 [[OR_COND_I32_I_I]], label [[IF_END31_I_I_I]], label [[IF_ELSE_I_I_I:%.*]]
-// NCRDIV:       if.else.i.i.i:
+// NCRDIV-NEXT:    br i1 [[OR_COND_I32_I_I]], label %[[IF_END31_I_I_I]], label %[[IF_ELSE_I_I_I:.*]]
+// NCRDIV:       [[IF_ELSE_I_I_I]]:
 // NCRDIV-NEXT:    [[TMP7:%.*]] = add i8 [[TMP5]], -97
 // NCRDIV-NEXT:    [[OR_COND33_I_I_I:%.*]] = icmp ult i8 [[TMP7]], 6
-// NCRDIV-NEXT:    br i1 [[OR_COND33_I_I_I]], label [[IF_END31_I_I_I]], label [[IF_ELSE17_I_I_I:%.*]]
-// NCRDIV:       if.else17.i.i.i:
+// NCRDIV-NEXT:    br i1 [[OR_COND33_I_I_I]], label %[[IF_END31_I_I_I]], label %[[IF_ELSE17_I_I_I:.*]]
+// NCRDIV:       [[IF_ELSE17_I_I_I]]:
 // NCRDIV-NEXT:    [[TMP8:%.*]] = add i8 [[TMP5]], -65
 // NCRDIV-NEXT:    [[OR_COND34_I_I_I:%.*]] = icmp ult i8 [[TMP8]], 6
-// NCRDIV-NEXT:    br i1 [[OR_COND34_I_I_I]], label [[IF_END31_I_I_I]], label [[_ZL4NANFPKC_EXIT]]
-// NCRDIV:       if.end31.i.i.i:
-// NCRDIV-NEXT:    [[DOTSINK:%.*]] = phi i64 [ -48, [[WHILE_BODY_I31_I_I]] ], [ -87, [[IF_ELSE_I_I_I]] ], [ -55, [[IF_ELSE17_I_I_I]] ]
+// NCRDIV-NEXT:    br i1 [[OR_COND34_I_I_I]], label %[[IF_END31_I_I_I]], label %[[_ZL4NANFPKC_EXIT]]
+// NCRDIV:       [[IF_END31_I_I_I]]:
+// NCRDIV-NEXT:    [[DOTSINK:%.*]] = phi i64 [ -48, %[[WHILE_BODY_I31_I_I]] ], [ -87, %[[IF_ELSE_I_I_I]] ], [ -55, %[[IF_ELSE17_I_I_I]] ]
 // NCRDIV-NEXT:    [[MUL24_I_I_I:%.*]] = shl i64 [[__R_0_I29_I_I11]], 4
 // NCRDIV-NEXT:    [[CONV25_I_I_I:%.*]] = zext nneg i8 [[TMP5]] to i64
 // NCRDIV-NEXT:    [[ADD26_I_I_I:%.*]] = add i64 [[MUL24_I_I_I]], [[DOTSINK]]
 // NCRDIV-NEXT:    [[ADD28_I_I_I]] = add i64 [[ADD26_I_I_I]], [[CONV25_I_I_I]]
 // NCRDIV-NEXT:    [[INCDEC_PTR_I34_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I28_I_I10]], i64 1
-// NCRDIV-NEXT:    [[TMP9]] = load i8, ptr [[INCDEC_PTR_I34_I_I]], align 1, !tbaa [[TBAA4]]
+// NCRDIV-NEXT:    [[TMP9]] = load i8, ptr [[INCDEC_PTR_I34_I_I]], align 1, !tbaa [[CHAR_TBAA4]]
 // NCRDIV-NEXT:    [[CMP_NOT_I30_I_I:%.*]] = icmp eq i8 [[TMP9]], 0
-// NCRDIV-NEXT:    br i1 [[CMP_NOT_I30_I_I]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I31_I_I]], !llvm.loop [[LOOP11]]
-// NCRDIV:       while.body.i.i.i:
-// NCRDIV-NEXT:    [[TMP10:%.*]] = phi i8 [ [[TMP12:%.*]], [[IF_THEN_I_I_I:%.*]] ], [ [[TMP3]], [[WHILE_COND_I_I_I_PREHEADER]] ]
-// NCRDIV-NEXT:    [[__R_0_I_I_I16:%.*]] = phi i64 [ [[SUB_I_I_I:%.*]], [[IF_THEN_I_I_I]] ], [ 0, [[WHILE_COND_I_I_I_PREHEADER]] ]
-// NCRDIV-NEXT:    [[__TAGP_ADDR_0_I_I_I15:%.*]] = phi ptr [ [[INCDEC_PTR_I_I_I:%.*]], [[IF_THEN_I_I_I]] ], [ [[INCDEC_PTR_I_I]], [[WHILE_COND_I_I_I_PREHEADER]] ]
+// NCRDIV-NEXT:    br i1 [[CMP_NOT_I30_I_I]], label %[[_ZL4NANFPKC_EXIT]], label %[[WHILE_BODY_I31_I_I]], !llvm.loop [[LOOP11]]
+// NCRDIV:       [[WHILE_BODY_I_I_I]]:
+// NCRDIV-NEXT:    [[TMP10:%.*]] = phi i8 [ [[TMP12:%.*]], %[[IF_THEN_I_I_I:.*]] ], [ [[TMP3]], %[[WHILE_COND_I_I_I_PREHEADER]] ]
+// NCRDIV-NEXT:    [[__R_0_I_I_I16:%.*]] = phi i64 [ [[SUB_I_I_I:%.*]], %[[IF_THEN_I_I_I]] ], [ 0, %[[WHILE_COND_I_I_I_PREHEADER]] ]
+// NCRDIV-NEXT:    [[__TAGP_ADDR_0_I_I_I15:%.*]] = phi ptr [ [[INCDEC_PTR_I_I_I:%.*]], %[[IF_THEN_I_I_I]] ], [ [[INCDEC_PTR_I_I]], %[[WHILE_COND_I_I_I_PREHEADER]] ]
 // NCRDIV-NEXT:    [[TMP11:%.*]] = and i8 [[TMP10]], -8
 // NCRDIV-NEXT:    [[OR_COND_I_I_I:%.*]] = icmp eq i8 [[TMP11]], 48
-// NCRDIV-NEXT:    br i1 [[OR_COND_I_I_I]], label [[IF_THEN_I_I_I]], label [[_ZL4NANFPKC_EXIT]]
-// NCRDIV:       if.then.i.i.i:
+// NCRDIV-NEXT:    br i1 [[OR_COND_I_I_I]], label %[[IF_THEN_I_I_I]], label %[[_ZL4NANFPKC_EXIT]]
+// NCRDIV:       [[IF_THEN_I_I_I]]:
 // NCRDIV-NEXT:    [[MUL_I_I_I:%.*]] = shl i64 [[__R_0_I_I_I16]], 3
 // NCRDIV-NEXT:    [[CONV5_I_I_I:%.*]] = zext nneg i8 [[TMP10]] to i64
 // NCRDIV-NEXT:    [[ADD_I_I_I:%.*]] = add i64 [[MUL_I_I_I]], -48
 // NCRDIV-NEXT:    [[SUB_I_I_I]] = add i64 [[ADD_I_I_I]], [[CONV5_I_I_I]]
 // NCRDIV-NEXT:    [[INCDEC_PTR_I_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I_I_I15]], i64 1
-// NCRDIV-NEXT:    [[TMP12]] = load i8, ptr [[INCDEC_PTR_I_I_I]], align 1, !tbaa [[TBAA4]]
+// NCRDIV-NEXT:    [[TMP12]] = load i8, ptr [[INCDEC_PTR_I_I_I]], align 1, !tbaa [[CHAR_TBAA4]]
 // NCRDIV-NEXT:    [[CMP_NOT_I_I_I:%.*]] = icmp eq i8 [[TMP12]], 0
-// NCRDIV-NEXT:    br i1 [[CMP_NOT_I_I_I]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I_I_I]], !llvm.loop [[LOOP7]]
-// NCRDIV:       while.body.i18.i.i:
-// NCRDIV-NEXT:    [[TMP13:%.*]] = phi i8 [ [[TMP15:%.*]], [[IF_THEN_I21_I_I:%.*]] ], [ [[TMP1]], [[WHILE_COND_I14_I_I_PREHEADER]] ]
-// NCRDIV-NEXT:    [[__R_0_I16_I_I7:%.*]] = phi i64 [ [[SUB_I25_I_I:%.*]], [[IF_THEN_I21_I_I]] ], [ 0, [[WHILE_COND_I14_I_I_PREHEADER]] ]
-// NCRDIV-NEXT:    [[__TAGP_ADDR_0_I15_I_I6:%.*]] = phi ptr [ [[INCDEC_PTR_I26_I_I:%.*]], [[IF_THEN_I21_I_I]] ], [ [[TAG]], [[WHILE_COND_I14_I_I_PREHEADER]] ]
+// NCRDIV-NEXT:    br i1 [[CMP_NOT_I_I_I]], label %[[_ZL4NANFPKC_EXIT]], label %[[WHILE_BODY_I_I_I]], !llvm.loop [[LOOP7]]
+// NCRDIV:       [[WHILE_BODY_I18_I_I]]:
+// NCRDIV-NEXT:    [[TMP13:%.*]] = phi i8 [ [[TMP15:%.*]], %[[IF_THEN_I21_I_I:.*]] ], [ [[TMP1]], %[[WHILE_COND_I14_I_I_PREHEADER]] ]
+// NCRDIV-NEXT:    [[__R_0_I16_I_I7:%.*]] = phi i64 [ [[SUB_I25_I_I:%.*]], %[[IF_THEN_I21_I_I]] ], [ 0, %[[WHILE_COND_I14_I_I_PREHEADER]] ]
+// NCRDIV-NEXT:    [[__TAGP_ADDR_0_I15_I_I6:%.*]] = phi ptr [ [[INCDEC_PTR_I26_I_I:%.*]], %[[IF_THEN_I21_I_I]] ], [ [[TAG]], %[[WHILE_COND_I14_I_I_PREHEADER]] ]
 // NCRDIV-NEXT:    [[TMP14:%.*]] = add i8 [[TMP13]], -48
 // NCRDIV-NEXT:    [[OR_COND_I19_I_I:%.*]] = icmp ult i8 [[TMP14]], 10
-// NCRDIV-NEXT:    br i1 [[OR_COND_I19_I_I]], label [[IF_THEN_I21_I_I]], label [[_ZL4NANFPKC_EXIT]]
-// NCRDIV:       if.then.i21.i.i:
+// NCRDIV-NEXT:    br i1 [[OR_COND_I19_I_I]], label %[[IF_THEN_I21_I_I]], label %[[_ZL4NANFPKC_EXIT]]
+// NCRDIV:       [[IF_THEN_I21_I_I]]:
 // NCRDIV-NEXT:    [[MUL_I22_I_I:%.*]] = mul i64 [[__R_0_I16_I_I7]], 10
 // NCRDIV-NEXT:    [[CONV5_I23_I_I:%.*]] = zext nneg i8 [[TMP13]] to i64
 // NCRDIV-NEXT:    [[ADD_I24_I_I:%.*]] = add i64 [[MUL_I22_I_I]], -48
 // NCRDIV-NEXT:    [[SUB_I25_I_I]] = add i64 [[ADD_I24_I_I]], [[CONV5_I23_I_I]]
 // NCRDIV-NEXT:    [[INCDEC_PTR_I26_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I15_I_I6]], i64 1
-// NCRDIV-NEXT:    [[TMP15]] = load i8, ptr [[INCDEC_PTR_I26_I_I]], align 1, !tbaa [[TBAA4]]
+// NCRDIV-NEXT:    [[TMP15]] = load i8, ptr [[INCDEC_PTR_I26_I_I]], align 1, !tbaa [[CHAR_TBAA4]]
 // NCRDIV-NEXT:    [[CMP_NOT_I17_I_I:%.*]] = icmp eq i8 [[TMP15]], 0
-// NCRDIV-NEXT:    br i1 [[CMP_NOT_I17_I_I]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I18_I_I]], !llvm.loop [[LOOP10]]
-// NCRDIV:       _ZL4nanfPKc.exit:
-// NCRDIV-NEXT:    [[RETVAL_0_I_I:%.*]] = phi i64 [ 0, [[WHILE_COND_I_I_I_PREHEADER]] ], [ 0, [[IF_THEN5_I_I]] ], [ 0, [[WHILE_COND_I14_I_I_PREHEADER]] ], [ [[SUB_I_I_I]], [[IF_THEN_I_I_I]] ], [ 0, [[WHILE_BODY_I_I_I]] ], [ [[ADD28_I_I_I]], [[IF_END31_I_I_I]] ], [ 0, [[IF_ELSE17_I_I_I]] ], [ [[SUB_I25_I_I]], [[IF_THEN_I21_I_I]] ], [ 0, [[WHILE_BODY_I18_I_I]] ]
+// NCRDIV-NEXT:    br i1 [[CMP_NOT_I17_I_I]], label %[[_ZL4NANFPKC_EXIT]], label %[[WHILE_BODY_I18_I_I]], !llvm.loop [[LOOP10]]
+// NCRDIV:       [[_ZL4NANFPKC_EXIT]]:
+// NCRDIV-NEXT:    [[RETVAL_0_I_I:%.*]] = phi i64 [ 0, %[[WHILE_COND_I_I_I_PREHEADER]] ], [ 0, %[[IF_THEN5_I_I]] ], [ 0, %[[WHILE_COND_I14_I_I_PREHEADER]] ], [ [[SUB_I_I_I]], %[[IF_THEN_I_I_I]] ], [ 0, %[[WHILE_BODY_I_I_I]] ], [ [[ADD28_I_I_I]], %[[IF_END31_I_I_I]] ], [ 0, %[[IF_ELSE17_I_I_I]] ], [ [[SUB_I25_I_I]], %[[IF_THEN_I21_I_I]] ], [ 0, %[[WHILE_BODY_I18_I_I]] ]
 // NCRDIV-NEXT:    [[CONV_I:%.*]] = trunc i64 [[RETVAL_0_I_I]] to i32
 // NCRDIV-NEXT:    [[BF_VALUE_I:%.*]] = and i32 [[CONV_I]], 4194303
 // NCRDIV-NEXT:    [[BF_SET9_I:%.*]] = or disjoint i32 [[BF_VALUE_I]], 2143289344
 // NCRDIV-NEXT:    [[TMP16:%.*]] = bitcast i32 [[BF_SET9_I]] to float
 // NCRDIV-NEXT:    ret float [[TMP16]]
 //
-// AMDGCNSPIRV-LABEL: @test_nanf(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = load i8, ptr addrspace(4) [[TAG:%.*]], align 1, !tbaa [[TBAA5]]
+// AMDGCNSPIRV-LABEL: define spir_func float @test_nanf(
+// AMDGCNSPIRV-SAME: ptr addrspace(4) noundef readonly captures(none) [[TAG:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR2]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*]]:
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = load i8, ptr addrspace(4) [[TAG]], align 1, !tbaa [[CHAR_TBAA5]]
 // AMDGCNSPIRV-NEXT:    [[CMP_I_I:%.*]] = icmp eq i8 [[TMP0]], 48
-// AMDGCNSPIRV-NEXT:    br i1 [[CMP_I_I]], label [[IF_THEN_I_I:%.*]], label [[WHILE_COND_I14_I_I:%.*]]
-// AMDGCNSPIRV:       if.then.i.i:
+// AMDGCNSPIRV-NEXT:    br i1 [[CMP_I_I]], label %[[IF_THEN_I_I:.*]], label %[[WHILE_COND_I14_I_I:.*]]
+// AMDGCNSPIRV:       [[IF_THEN_I_I]]:
 // AMDGCNSPIRV-NEXT:    [[INCDEC_PTR_I_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TAG]], i64 1
-// AMDGCNSPIRV-NEXT:    [[TMP1:%.*]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA5]]
-// AMDGCNSPIRV-NEXT:    switch i8 [[TMP1]], label [[WHILE_COND_I_I_I:%.*]] [
-// AMDGCNSPIRV-NEXT:      i8 120, label [[IF_THEN5_I_I:%.*]]
-// AMDGCNSPIRV-NEXT:      i8 88, label [[IF_THEN5_I_I]]
+// AMDGCNSPIRV-NEXT:    [[TMP1:%.*]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA5]]
+// AMDGCNSPIRV-NEXT:    switch i8 [[TMP1]], label %[[WHILE_COND_I_I_I:.*]] [
+// AMDGCNSPIRV-NEXT:      i8 120, label %[[IF_THEN5_I_I:.*]]
+// AMDGCNSPIRV-NEXT:      i8 88, label %[[IF_THEN5_I_I]]
 // AMDGCNSPIRV-NEXT:    ]
-// AMDGCNSPIRV:       if.then5.i.i:
-// AMDGCNSPIRV-NEXT:    [[TMP2:%.*]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA5]]
+// AMDGCNSPIRV:       [[IF_THEN5_I_I]]:
+// AMDGCNSPIRV-NEXT:    [[TMP2:%.*]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA5]]
 // AMDGCNSPIRV-NEXT:    [[CMP_NOT_I31_I_I5:%.*]] = icmp eq i8 [[TMP2]], 0
-// AMDGCNSPIRV-NEXT:    br i1 [[CMP_NOT_I31_I_I5]], label [[_ZL4NANFPKC_EXIT:%.*]], label [[WHILE_BODY_I32_I_I:%.*]]
-// AMDGCNSPIRV:       while.body.i32.i.i:
-// AMDGCNSPIRV-NEXT:    [[TMP3:%.*]] = phi i8 [ [[TMP7:%.*]], [[IF_END31_I_I_I:%.*]] ], [ [[TMP2]], [[IF_THEN5_I_I]] ]
-// AMDGCNSPIRV-NEXT:    [[__R_0_I30_I_I7:%.*]] = phi i64 [ [[ADD28_I_I_I:%.*]], [[IF_END31_I_I_I]] ], [ 0, [[IF_THEN5_I_I]] ]
-// AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_0_I29_I_I6:%.*]] = phi ptr addrspace(4) [ [[INCDEC_PTR_I36_I_I:%.*]], [[IF_END31_I_I_I]] ], [ [[INCDEC_PTR_I_I]], [[IF_THEN5_I_I]] ]
+// AMDGCNSPIRV-NEXT:    br i1 [[CMP_NOT_I31_I_I5]], label %[[_ZL4NANFPKC_EXIT:.*]], label %[[WHILE_BODY_I32_I_I:.*]]
+// AMDGCNSPIRV:       [[WHILE_BODY_I32_I_I]]:
+// AMDGCNSPIRV-NEXT:    [[TMP3:%.*]] = phi i8 [ [[TMP7:%.*]], %[[IF_END31_I_I_I:.*]] ], [ [[TMP2]], %[[IF_THEN5_I_I]] ]
+// AMDGCNSPIRV-NEXT:    [[__R_0_I30_I_I7:%.*]] = phi i64 [ [[ADD28_I_I_I:%.*]], %[[IF_END31_I_I_I]] ], [ 0, %[[IF_THEN5_I_I]] ]
+// AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_0_I29_I_I6:%.*]] = phi ptr addrspace(4) [ [[INCDEC_PTR_I36_I_I:%.*]], %[[IF_END31_I_I_I]] ], [ [[INCDEC_PTR_I_I]], %[[IF_THEN5_I_I]] ]
 // AMDGCNSPIRV-NEXT:    [[TMP4:%.*]] = add i8 [[TMP3]], -48
 // AMDGCNSPIRV-NEXT:    [[OR_COND_I33_I_I:%.*]] = icmp ult i8 [[TMP4]], 10
-// AMDGCNSPIRV-NEXT:    br i1 [[OR_COND_I33_I_I]], label [[IF_END31_I_I_I]], label [[IF_ELSE_I_I_I:%.*]]
-// AMDGCNSPIRV:       if.else.i.i.i:
+// AMDGCNSPIRV-NEXT:    br i1 [[OR_COND_I33_I_I]], label %[[IF_END31_I_I_I]], label %[[IF_ELSE_I_I_I:.*]]
+// AMDGCNSPIRV:       [[IF_ELSE_I_I_I]]:
 // AMDGCNSPIRV-NEXT:    [[TMP5:%.*]] = add i8 [[TMP3]], -97
 // AMDGCNSPIRV-NEXT:    [[OR_COND33_I_I_I:%.*]] = icmp ult i8 [[TMP5]], 6
-// AMDGCNSPIRV-NEXT:    br i1 [[OR_COND33_I_I_I]], label [[IF_END31_I_I_I]], label [[IF_ELSE17_I_I_I:%.*]]
-// AMDGCNSPIRV:       if.else17.i.i.i:
+// AMDGCNSPIRV-NEXT:    br i1 [[OR_COND33_I_I_I]], label %[[IF_END31_I_I_I]], label %[[IF_ELSE17_I_I_I:.*]]
+// AMDGCNSPIRV:       [[IF_ELSE17_I_I_I]]:
 // AMDGCNSPIRV-NEXT:    [[TMP6:%.*]] = add i8 [[TMP3]], -65
 // AMDGCNSPIRV-NEXT:    [[OR_COND34_I_I_I:%.*]] = icmp ult i8 [[TMP6]], 6
-// AMDGCNSPIRV-NEXT:    br i1 [[OR_COND34_I_I_I]], label [[IF_END31_I_I_I]], label [[_ZL4NANFPKC_EXIT]]
-// AMDGCNSPIRV:       if.end31.i.i.i:
-// AMDGCNSPIRV-NEXT:    [[DOTSINK:%.*]] = phi i64 [ -48, [[WHILE_BODY_I32_I_I]] ], [ -87, [[IF_ELSE_I_I_I]] ], [ -55, [[IF_ELSE17_I_I_I]] ]
+// AMDGCNSPIRV-NEXT:    br i1 [[OR_COND34_I_I_I]], label %[[IF_END31_I_I_I]], label %[[_ZL4NANFPKC_EXIT]]
+// AMDGCNSPIRV:       [[IF_END31_I_I_I]]:
+// AMDGCNSPIRV-NEXT:    [[DOTSINK:%.*]] = phi i64 [ -48, %[[WHILE_BODY_I32_I_I]] ], [ -87, %[[IF_ELSE_I_I_I]] ], [ -55, %[[IF_ELSE17_I_I_I]] ]
 // AMDGCNSPIRV-NEXT:    [[MUL24_I_I_I:%.*]] = shl i64 [[__R_0_I30_I_I7]], 4
 // AMDGCNSPIRV-NEXT:    [[CONV25_I_I_I:%.*]] = zext nneg i8 [[TMP3]] to i64
 // AMDGCNSPIRV-NEXT:    [[ADD26_I_I_I:%.*]] = add i64 [[MUL24_I_I_I]], [[DOTSINK]]
 // AMDGCNSPIRV-NEXT:    [[ADD28_I_I_I]] = add i64 [[ADD26_I_I_I]], [[CONV25_I_I_I]]
 // AMDGCNSPIRV-NEXT:    [[INCDEC_PTR_I36_I_I]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__TAGP_ADDR_0_I29_I_I6]], i64 1
-// AMDGCNSPIRV-NEXT:    [[TMP7]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I36_I_I]], align 1, !tbaa [[TBAA5]]
+// AMDGCNSPIRV-NEXT:    [[TMP7]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I36_I_I]], align 1, !tbaa [[CHAR_TBAA5]]
 // AMDGCNSPIRV-NEXT:    [[CMP_NOT_I31_I_I:%.*]] = icmp eq i8 [[TMP7]], 0
-// AMDGCNSPIRV-NEXT:    br i1 [[CMP_NOT_I31_I_I]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I32_I_I]], !llvm.loop [[LOOP12]]
-// AMDGCNSPIRV:       while.cond.i.i.i:
-// AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_0_I_I_I:%.*]] = phi ptr addrspace(4) [ [[__TAGP_ADDR_1_I_I_I:%.*]], [[WHILE_BODY_I_I_I:%.*]] ], [ [[INCDEC_PTR_I_I]], [[IF_THEN_I_I]] ]
-// AMDGCNSPIRV-NEXT:    [[__R_0_I_I_I:%.*]] = phi i64 [ [[__R_1_I_I_I:%.*]], [[WHILE_BODY_I_I_I]] ], [ 0, [[IF_THEN_I_I]] ]
-// AMDGCNSPIRV-NEXT:    [[TMP8:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I_I_I]], align 1, !tbaa [[TBAA5]]
+// AMDGCNSPIRV-NEXT:    br i1 [[CMP_NOT_I31_I_I]], label %[[_ZL4NANFPKC_EXIT]], label %[[WHILE_BODY_I32_I_I]], !llvm.loop [[LOOP12]]
+// AMDGCNSPIRV:       [[WHILE_COND_I_I_I]]:
+// AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_0_I_I_I:%.*]] = phi ptr addrspace(4) [ [[__TAGP_ADDR_1_I_I_I:%.*]], %[[WHILE_BODY_I_I_I:.*]] ], [ [[INCDEC_PTR_I_I]], %[[IF_THEN_I_I]] ]
+// AMDGCNSPIRV-NEXT:    [[__R_0_I_I_I:%.*]] = phi i64 [ [[__R_1_I_I_I:%.*]], %[[WHILE_BODY_I_I_I]] ], [ 0, %[[IF_THEN_I_I]] ]
+// AMDGCNSPIRV-NEXT:    [[TMP8:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I_I_I]], align 1, !tbaa [[CHAR_TBAA5]]
 // AMDGCNSPIRV-NEXT:    [[CMP_NOT_I_I_I:%.*]] = icmp eq i8 [[TMP8]], 0
-// AMDGCNSPIRV-NEXT:    br i1 [[CMP_NOT_I_I_I]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I_I_I]]
-// AMDGCNSPIRV:       while.body.i.i.i:
+// AMDGCNSPIRV-NEXT:    br i1 [[CMP_NOT_I_I_I]], label %[[_ZL4NANFPKC_EXIT]], label %[[WHILE_BODY_I_I_I]]
+// AMDGCNSPIRV:       [[WHILE_BODY_I_I_I]]:
 // AMDGCNSPIRV-NEXT:    [[TMP9:%.*]] = and i8 [[TMP8]], -8
 // AMDGCNSPIRV-NEXT:    [[OR_COND_I_I_I:%.*]] = icmp eq i8 [[TMP9]], 48
 // AMDGCNSPIRV-NEXT:    [[MUL_I_I_I:%.*]] = shl i64 [[__R_0_I_I_I]], 3
@@ -4071,14 +4585,14 @@ extern "C" __device__ double test_modf(double x, double* y) {
 // AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_1_I_I_I_IDX:%.*]] = zext i1 [[OR_COND_I_I_I]] to i64
 // AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_1_I_I_I]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__TAGP_ADDR_0_I_I_I]], i64 [[__TAGP_ADDR_1_I_I_I_IDX]]
 // AMDGCNSPIRV-NEXT:    [[__R_1_I_I_I]] = select i1 [[OR_COND_I_I_I]], i64 [[SUB_I_I_I]], i64 [[__R_0_I_I_I]]
-// AMDGCNSPIRV-NEXT:    br i1 [[OR_COND_I_I_I]], label [[WHILE_COND_I_I_I]], label [[_ZL4NANFPKC_EXIT]], !llvm.loop [[LOOP8]]
-// AMDGCNSPIRV:       while.cond.i14.i.i:
-// AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_0_I15_I_I:%.*]] = phi ptr addrspace(4) [ [[__TAGP_ADDR_1_I25_I_I:%.*]], [[WHILE_BODY_I18_I_I:%.*]] ], [ [[TAG]], [[ENTRY:%.*]] ]
-// AMDGCNSPIRV-NEXT:    [[__R_0_I16_I_I:%.*]] = phi i64 [ [[__R_1_I26_I_I:%.*]], [[WHILE_BODY_I18_I_I]] ], [ 0, [[ENTRY]] ]
-// AMDGCNSPIRV-NEXT:    [[TMP10:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I15_I_I]], align 1, !tbaa [[TBAA5]]
+// AMDGCNSPIRV-NEXT:    br i1 [[OR_COND_I_I_I]], label %[[WHILE_COND_I_I_I]], label %[[_ZL4NANFPKC_EXIT]], !llvm.loop [[LOOP8]]
+// AMDGCNSPIRV:       [[WHILE_COND_I14_I_I]]:
+// AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_0_I15_I_I:%.*]] = phi ptr addrspace(4) [ [[__TAGP_ADDR_1_I25_I_I:%.*]], %[[WHILE_BODY_I18_I_I:.*]] ], [ [[TAG]], %[[ENTRY]] ]
+// AMDGCNSPIRV-NEXT:    [[__R_0_I16_I_I:%.*]] = phi i64 [ [[__R_1_I26_I_I:%.*]], %[[WHILE_BODY_I18_I_I]] ], [ 0, %[[ENTRY]] ]
+// AMDGCNSPIRV-NEXT:    [[TMP10:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I15_I_I]], align 1, !tbaa [[CHAR_TBAA5]]
 // AMDGCNSPIRV-NEXT:    [[CMP_NOT_I17_I_I:%.*]] = icmp eq i8 [[TMP10]], 0
-// AMDGCNSPIRV-NEXT:    br i1 [[CMP_NOT_I17_I_I]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I18_I_I]]
-// AMDGCNSPIRV:       while.body.i18.i.i:
+// AMDGCNSPIRV-NEXT:    br i1 [[CMP_NOT_I17_I_I]], label %[[_ZL4NANFPKC_EXIT]], label %[[WHILE_BODY_I18_I_I]]
+// AMDGCNSPIRV:       [[WHILE_BODY_I18_I_I]]:
 // AMDGCNSPIRV-NEXT:    [[TMP11:%.*]] = add i8 [[TMP10]], -48
 // AMDGCNSPIRV-NEXT:    [[OR_COND_I19_I_I:%.*]] = icmp ult i8 [[TMP11]], 10
 // AMDGCNSPIRV-NEXT:    [[MUL_I20_I_I:%.*]] = mul i64 [[__R_0_I16_I_I]], 10
@@ -4088,9 +4602,9 @@ extern "C" __device__ double test_modf(double x, double* y) {
 // AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_1_I25_I_I_IDX:%.*]] = zext i1 [[OR_COND_I19_I_I]] to i64
 // AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_1_I25_I_I]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__TAGP_ADDR_0_I15_I_I]], i64 [[__TAGP_ADDR_1_I25_I_I_IDX]]
 // AMDGCNSPIRV-NEXT:    [[__R_1_I26_I_I]] = select i1 [[OR_COND_I19_I_I]], i64 [[SUB_I23_I_I]], i64 [[__R_0_I16_I_I]]
-// AMDGCNSPIRV-NEXT:    br i1 [[OR_COND_I19_I_I]], label [[WHILE_COND_I14_I_I]], label [[_ZL4NANFPKC_EXIT]], !llvm.loop [[LOOP11]]
-// AMDGCNSPIRV:       _ZL4nanfPKc.exit:
-// AMDGCNSPIRV-NEXT:    [[RETVAL_0_I_I:%.*]] = phi i64 [ 0, [[IF_THEN5_I_I]] ], [ 0, [[WHILE_BODY_I_I_I]] ], [ [[__R_0_I_I_I]], [[WHILE_COND_I_I_I]] ], [ [[ADD28_I_I_I]], [[IF_END31_I_I_I]] ], [ 0, [[IF_ELSE17_I_I_I]] ], [ 0, [[WHILE_BODY_I18_I_I]] ], [ [[__R_0_I16_I_I]], [[WHILE_COND_I14_I_I]] ]
+// AMDGCNSPIRV-NEXT:    br i1 [[OR_COND_I19_I_I]], label %[[WHILE_COND_I14_I_I]], label %[[_ZL4NANFPKC_EXIT]], !llvm.loop [[LOOP11]]
+// AMDGCNSPIRV:       [[_ZL4NANFPKC_EXIT]]:
+// AMDGCNSPIRV-NEXT:    [[RETVAL_0_I_I:%.*]] = phi i64 [ 0, %[[IF_THEN5_I_I]] ], [ 0, %[[WHILE_BODY_I_I_I]] ], [ [[__R_0_I_I_I]], %[[WHILE_COND_I_I_I]] ], [ [[ADD28_I_I_I]], %[[IF_END31_I_I_I]] ], [ 0, %[[IF_ELSE17_I_I_I]] ], [ 0, %[[WHILE_BODY_I18_I_I]] ], [ [[__R_0_I16_I_I]], %[[WHILE_COND_I14_I_I]] ]
 // AMDGCNSPIRV-NEXT:    [[CONV_I:%.*]] = trunc i64 [[RETVAL_0_I_I]] to i32
 // AMDGCNSPIRV-NEXT:    [[BF_VALUE_I:%.*]] = and i32 [[CONV_I]], 4194303
 // AMDGCNSPIRV-NEXT:    [[BF_SET9_I:%.*]] = or disjoint i32 [[BF_VALUE_I]], 2143289344
@@ -4101,322 +4615,327 @@ extern "C" __device__ float test_nanf(const char *tag) {
   return nanf(tag);
 }
 
-// DEFAULT-LABEL: @test_nan(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[TMP0:%.*]] = load i8, ptr [[TAG:%.*]], align 1, !tbaa [[TBAA4]]
+// DEFAULT-LABEL: define dso_local double @test_nan(
+// DEFAULT-SAME: ptr noundef readonly captures(none) [[TAG:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[TMP0:%.*]] = load i8, ptr [[TAG]], align 1, !tbaa [[CHAR_TBAA4]]
 // DEFAULT-NEXT:    [[CMP_I_I:%.*]] = icmp eq i8 [[TMP0]], 48
-// DEFAULT-NEXT:    br i1 [[CMP_I_I]], label [[IF_THEN_I_I:%.*]], label [[WHILE_COND_I14_I_I_PREHEADER:%.*]]
-// DEFAULT:       while.cond.i14.i.i.preheader:
-// DEFAULT-NEXT:    [[TMP1:%.*]] = load i8, ptr [[TAG]], align 1, !tbaa [[TBAA4]]
+// DEFAULT-NEXT:    br i1 [[CMP_I_I]], label %[[IF_THEN_I_I:.*]], label %[[WHILE_COND_I14_I_I_PREHEADER:.*]]
+// DEFAULT:       [[WHILE_COND_I14_I_I_PREHEADER]]:
+// DEFAULT-NEXT:    [[TMP1:%.*]] = load i8, ptr [[TAG]], align 1, !tbaa [[CHAR_TBAA4]]
 // DEFAULT-NEXT:    [[CMP_NOT_I17_I_I5:%.*]] = icmp eq i8 [[TMP1]], 0
-// DEFAULT-NEXT:    br i1 [[CMP_NOT_I17_I_I5]], label [[_ZL3NANPKC_EXIT:%.*]], label [[WHILE_BODY_I18_I_I:%.*]]
-// DEFAULT:       if.then.i.i:
+// DEFAULT-NEXT:    br i1 [[CMP_NOT_I17_I_I5]], label %[[_ZL3NANPKC_EXIT:.*]], label %[[WHILE_BODY_I18_I_I:.*]]
+// DEFAULT:       [[IF_THEN_I_I]]:
 // DEFAULT-NEXT:    [[INCDEC_PTR_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[TAG]], i64 1
-// DEFAULT-NEXT:    [[TMP2:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]]
-// DEFAULT-NEXT:    switch i8 [[TMP2]], label [[WHILE_COND_I_I_I_PREHEADER:%.*]] [
-// DEFAULT-NEXT:      i8 120, label [[IF_THEN5_I_I:%.*]]
-// DEFAULT-NEXT:      i8 88, label [[IF_THEN5_I_I]]
+// DEFAULT-NEXT:    [[TMP2:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA4]]
+// DEFAULT-NEXT:    switch i8 [[TMP2]], label %[[WHILE_COND_I_I_I_PREHEADER:.*]] [
+// DEFAULT-NEXT:      i8 120, label %[[IF_THEN5_I_I:.*]]
+// DEFAULT-NEXT:      i8 88, label %[[IF_THEN5_I_I]]
 // DEFAULT-NEXT:    ]
-// DEFAULT:       while.cond.i.i.i.preheader:
-// DEFAULT-NEXT:    [[TMP3:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]]
+// DEFAULT:       [[WHILE_COND_I_I_I_PREHEADER]]:
+// DEFAULT-NEXT:    [[TMP3:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA4]]
 // DEFAULT-NEXT:    [[CMP_NOT_I_I_I14:%.*]] = icmp eq i8 [[TMP3]], 0
-// DEFAULT-NEXT:    br i1 [[CMP_NOT_I_I_I14]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I_I_I:%.*]]
-// DEFAULT:       if.then5.i.i:
-// DEFAULT-NEXT:    [[TMP4:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]]
+// DEFAULT-NEXT:    br i1 [[CMP_NOT_I_I_I14]], label %[[_ZL3NANPKC_EXIT]], label %[[WHILE_BODY_I_I_I:.*]]
+// DEFAULT:       [[IF_THEN5_I_I]]:
+// DEFAULT-NEXT:    [[TMP4:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA4]]
 // DEFAULT-NEXT:    [[CMP_NOT_I30_I_I9:%.*]] = icmp eq i8 [[TMP4]], 0
-// DEFAULT-NEXT:    br i1 [[CMP_NOT_I30_I_I9]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I31_I_I:%.*]]
-// DEFAULT:       while.body.i31.i.i:
-// DEFAULT-NEXT:    [[TMP5:%.*]] = phi i8 [ [[TMP9:%.*]], [[IF_END31_I_I_I:%.*]] ], [ [[TMP4]], [[IF_THEN5_I_I]] ]
-// DEFAULT-NEXT:    [[__R_0_I29_I_I11:%.*]] = phi i64 [ [[ADD28_I_I_I:%.*]], [[IF_END31_I_I_I]] ], [ 0, [[IF_THEN5_I_I]] ]
-// DEFAULT-NEXT:    [[__TAGP_ADDR_0_I28_I_I10:%.*]] = phi ptr [ [[INCDEC_PTR_I34_I_I:%.*]], [[IF_END31_I_I_I]] ], [ [[INCDEC_PTR_I_I]], [[IF_THEN5_I_I]] ]
+// DEFAULT-NEXT:    br i1 [[CMP_NOT_I30_I_I9]], label %[[_ZL3NANPKC_EXIT]], label %[[WHILE_BODY_I31_I_I:.*]]
+// DEFAULT:       [[WHILE_BODY_I31_I_I]]:
+// DEFAULT-NEXT:    [[TMP5:%.*]] = phi i8 [ [[TMP9:%.*]], %[[IF_END31_I_I_I:.*]] ], [ [[TMP4]], %[[IF_THEN5_I_I]] ]
+// DEFAULT-NEXT:    [[__R_0_I29_I_I11:%.*]] = phi i64 [ [[ADD28_I_I_I:%.*]], %[[IF_END31_I_I_I]] ], [ 0, %[[IF_THEN5_I_I]] ]
+// DEFAULT-NEXT:    [[__TAGP_ADDR_0_I28_I_I10:%.*]] = phi ptr [ [[INCDEC_PTR_I34_I_I:%.*]], %[[IF_END31_I_I_I]] ], [ [[INCDEC_PTR_I_I]], %[[IF_THEN5_I_I]] ]
 // DEFAULT-NEXT:    [[TMP6:%.*]] = add i8 [[TMP5]], -48
 // DEFAULT-NEXT:    [[OR_COND_I32_I_I:%.*]] = icmp ult i8 [[TMP6]], 10
-// DEFAULT-NEXT:    br i1 [[OR_COND_I32_I_I]], label [[IF_END31_I_I_I]], label [[IF_ELSE_I_I_I:%.*]]
-// DEFAULT:       if.else.i.i.i:
+// DEFAULT-NEXT:    br i1 [[OR_COND_I32_I_I]], label %[[IF_END31_I_I_I]], label %[[IF_ELSE_I_I_I:.*]]
+// DEFAULT:       [[IF_ELSE_I_I_I]]:
 // DEFAULT-NEXT:    [[TMP7:%.*]] = add i8 [[TMP5]], -97
 // DEFAULT-NEXT:    [[OR_COND33_I_I_I:%.*]] = icmp ult i8 [[TMP7]], 6
-// DEFAULT-NEXT:    br i1 [[OR_COND33_I_I_I]], label [[IF_END31_I_I_I]], label [[IF_ELSE17_I_I_I:%.*]]
-// DEFAULT:       if.else17.i.i.i:
+// DEFAULT-NEXT:    br i1 [[OR_COND33_I_I_I]], label %[[IF_END31_I_I_I]], label %[[IF_ELSE17_I_I_I:.*]]
+// DEFAULT:       [[IF_ELSE17_I_I_I]]:
 // DEFAULT-NEXT:    [[TMP8:%.*]] = add i8 [[TMP5]], -65
 // DEFAULT-NEXT:    [[OR_COND34_I_I_I:%.*]] = icmp ult i8 [[TMP8]], 6
-// DEFAULT-NEXT:    br i1 [[OR_COND34_I_I_I]], label [[IF_END31_I_I_I]], label [[_ZL3NANPKC_EXIT]]
-// DEFAULT:       if.end31.i.i.i:
-// DEFAULT-NEXT:    [[DOTSINK:%.*]] = phi i64 [ -48, [[WHILE_BODY_I31_I_I]] ], [ -87, [[IF_ELSE_I_I_I]] ], [ -55, [[IF_ELSE17_I_I_I]] ]
+// DEFAULT-NEXT:    br i1 [[OR_COND34_I_I_I]], label %[[IF_END31_I_I_I]], label %[[_ZL3NANPKC_EXIT]]
+// DEFAULT:       [[IF_END31_I_I_I]]:
+// DEFAULT-NEXT:    [[DOTSINK:%.*]] = phi i64 [ -48, %[[WHILE_BODY_I31_I_I]] ], [ -87, %[[IF_ELSE_I_I_I]] ], [ -55, %[[IF_ELSE17_I_I_I]] ]
 // DEFAULT-NEXT:    [[MUL24_I_I_I:%.*]] = shl i64 [[__R_0_I29_I_I11]], 4
 // DEFAULT-NEXT:    [[CONV25_I_I_I:%.*]] = zext nneg i8 [[TMP5]] to i64
 // DEFAULT-NEXT:    [[ADD26_I_I_I:%.*]] = add i64 [[MUL24_I_I_I]], [[DOTSINK]]
 // DEFAULT-NEXT:    [[ADD28_I_I_I]] = add i64 [[ADD26_I_I_I]], [[CONV25_I_I_I]]
 // DEFAULT-NEXT:    [[INCDEC_PTR_I34_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I28_I_I10]], i64 1
-// DEFAULT-NEXT:    [[TMP9]] = load i8, ptr [[INCDEC_PTR_I34_I_I]], align 1, !tbaa [[TBAA4]]
+// DEFAULT-NEXT:    [[TMP9]] = load i8, ptr [[INCDEC_PTR_I34_I_I]], align 1, !tbaa [[CHAR_TBAA4]]
 // DEFAULT-NEXT:    [[CMP_NOT_I30_I_I:%.*]] = icmp eq i8 [[TMP9]], 0
-// DEFAULT-NEXT:    br i1 [[CMP_NOT_I30_I_I]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I31_I_I]], !llvm.loop [[LOOP11]]
-// DEFAULT:       while.body.i.i.i:
-// DEFAULT-NEXT:    [[TMP10:%.*]] = phi i8 [ [[TMP12:%.*]], [[IF_THEN_I_I_I:%.*]] ], [ [[TMP3]], [[WHILE_COND_I_I_I_PREHEADER]] ]
-// DEFAULT-NEXT:    [[__R_0_I_I_I16:%.*]] = phi i64 [ [[SUB_I_I_I:%.*]], [[IF_THEN_I_I_I]] ], [ 0, [[WHILE_COND_I_I_I_PREHEADER]] ]
-// DEFAULT-NEXT:    [[__TAGP_ADDR_0_I_I_I15:%.*]] = phi ptr [ [[INCDEC_PTR_I_I_I:%.*]], [[IF_THEN_I_I_I]] ], [ [[INCDEC_PTR_I_I]], [[WHILE_COND_I_I_I_PREHEADER]] ]
+// DEFAULT-NEXT:    br i1 [[CMP_NOT_I30_I_I]], label %[[_ZL3NANPKC_EXIT]], label %[[WHILE_BODY_I31_I_I]], !llvm.loop [[LOOP11]]
+// DEFAULT:       [[WHILE_BODY_I_I_I]]:
+// DEFAULT-NEXT:    [[TMP10:%.*]] = phi i8 [ [[TMP12:%.*]], %[[IF_THEN_I_I_I:.*]] ], [ [[TMP3]], %[[WHILE_COND_I_I_I_PREHEADER]] ]
+// DEFAULT-NEXT:    [[__R_0_I_I_I16:%.*]] = phi i64 [ [[SUB_I_I_I:%.*]], %[[IF_THEN_I_I_I]] ], [ 0, %[[WHILE_COND_I_I_I_PREHEADER]] ]
+// DEFAULT-NEXT:    [[__TAGP_ADDR_0_I_I_I15:%.*]] = phi ptr [ [[INCDEC_PTR_I_I_I:%.*]], %[[IF_THEN_I_I_I]] ], [ [[INCDEC_PTR_I_I]], %[[WHILE_COND_I_I_I_PREHEADER]] ]
 // DEFAULT-NEXT:    [[TMP11:%.*]] = and i8 [[TMP10]], -8
 // DEFAULT-NEXT:    [[OR_COND_I_I_I:%.*]] = icmp eq i8 [[TMP11]], 48
-// DEFAULT-NEXT:    br i1 [[OR_COND_I_I_I]], label [[IF_THEN_I_I_I]], label [[_ZL3NANPKC_EXIT]]
-// DEFAULT:       if.then.i.i.i:
+// DEFAULT-NEXT:    br i1 [[OR_COND_I_I_I]], label %[[IF_THEN_I_I_I]], label %[[_ZL3NANPKC_EXIT]]
+// DEFAULT:       [[IF_THEN_I_I_I]]:
 // DEFAULT-NEXT:    [[MUL_I_I_I:%.*]] = shl i64 [[__R_0_I_I_I16]], 3
 // DEFAULT-NEXT:    [[CONV5_I_I_I:%.*]] = zext nneg i8 [[TMP10]] to i64
 // DEFAULT-NEXT:    [[ADD_I_I_I:%.*]] = add i64 [[MUL_I_I_I]], -48
 // DEFAULT-NEXT:    [[SUB_I_I_I]] = add i64 [[ADD_I_I_I]], [[CONV5_I_I_I]]
 // DEFAULT-NEXT:    [[INCDEC_PTR_I_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I_I_I15]], i64 1
-// DEFAULT-NEXT:    [[TMP12]] = load i8, ptr [[INCDEC_PTR_I_I_I]], align 1, !tbaa [[TBAA4]]
+// DEFAULT-NEXT:    [[TMP12]] = load i8, ptr [[INCDEC_PTR_I_I_I]], align 1, !tbaa [[CHAR_TBAA4]]
 // DEFAULT-NEXT:    [[CMP_NOT_I_I_I:%.*]] = icmp eq i8 [[TMP12]], 0
-// DEFAULT-NEXT:    br i1 [[CMP_NOT_I_I_I]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I_I_I]], !llvm.loop [[LOOP7]]
-// DEFAULT:       while.body.i18.i.i:
-// DEFAULT-NEXT:    [[TMP13:%.*]] = phi i8 [ [[TMP15:%.*]], [[IF_THEN_I21_I_I:%.*]] ], [ [[TMP1]], [[WHILE_COND_I14_I_I_PREHEADER]] ]
-// DEFAULT-NEXT:    [[__R_0_I16_I_I7:%.*]] = phi i64 [ [[SUB_I25_I_I:%.*]], [[IF_THEN_I21_I_I]] ], [ 0, [[WHILE_COND_I14_I_I_PREHEADER]] ]
-// DEFAULT-NEXT:    [[__TAGP_ADDR_0_I15_I_I6:%.*]] = phi ptr [ [[INCDEC_PTR_I26_I_I:%.*]], [[IF_THEN_I21_I_I]] ], [ [[TAG]], [[WHILE_COND_I14_I_I_PREHEADER]] ]
+// DEFAULT-NEXT:    br i1 [[CMP_NOT_I_I_I]], label %[[_ZL3NANPKC_EXIT]], label %[[WHILE_BODY_I_I_I]], !llvm.loop [[LOOP7]]
+// DEFAULT:       [[WHILE_BODY_I18_I_I]]:
+// DEFAULT-NEXT:    [[TMP13:%.*]] = phi i8 [ [[TMP15:%.*]], %[[IF_THEN_I21_I_I:.*]] ], [ [[TMP1]], %[[WHILE_COND_I14_I_I_PREHEADER]] ]
+// DEFAULT-NEXT:    [[__R_0_I16_I_I7:%.*]] = phi i64 [ [[SUB_I25_I_I:%.*]], %[[IF_THEN_I21_I_I]] ], [ 0, %[[WHILE_COND_I14_I_I_PREHEADER]] ]
+// DEFAULT-NEXT:    [[__TAGP_ADDR_0_I15_I_I6:%.*]] = phi ptr [ [[INCDEC_PTR_I26_I_I:%.*]], %[[IF_THEN_I21_I_I]] ], [ [[TAG]], %[[WHILE_COND_I14_I_I_PREHEADER]] ]
 // DEFAULT-NEXT:    [[TMP14:%.*]] = add i8 [[TMP13]], -48
 // DEFAULT-NEXT:    [[OR_COND_I19_I_I:%.*]] = icmp ult i8 [[TMP14]], 10
-// DEFAULT-NEXT:    br i1 [[OR_COND_I19_I_I]], label [[IF_THEN_I21_I_I]], label [[_ZL3NANPKC_EXIT]]
-// DEFAULT:       if.then.i21.i.i:
+// DEFAULT-NEXT:    br i1 [[OR_COND_I19_I_I]], label %[[IF_THEN_I21_I_I]], label %[[_ZL3NANPKC_EXIT]]
+// DEFAULT:       [[IF_THEN_I21_I_I]]:
 // DEFAULT-NEXT:    [[MUL_I22_I_I:%.*]] = mul i64 [[__R_0_I16_I_I7]], 10
 // DEFAULT-NEXT:    [[CONV5_I23_I_I:%.*]] = zext nneg i8 [[TMP13]] to i64
 // DEFAULT-NEXT:    [[ADD_I24_I_I:%.*]] = add i64 [[MUL_I22_I_I]], -48
 // DEFAULT-NEXT:    [[SUB_I25_I_I]] = add i64 [[ADD_I24_I_I]], [[CONV5_I23_I_I]]
 // DEFAULT-NEXT:    [[INCDEC_PTR_I26_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I15_I_I6]], i64 1
-// DEFAULT-NEXT:    [[TMP15]] = load i8, ptr [[INCDEC_PTR_I26_I_I]], align 1, !tbaa [[TBAA4]]
+// DEFAULT-NEXT:    [[TMP15]] = load i8, ptr [[INCDEC_PTR_I26_I_I]], align 1, !tbaa [[CHAR_TBAA4]]
 // DEFAULT-NEXT:    [[CMP_NOT_I17_I_I:%.*]] = icmp eq i8 [[TMP15]], 0
-// DEFAULT-NEXT:    br i1 [[CMP_NOT_I17_I_I]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I18_I_I]], !llvm.loop [[LOOP10]]
-// DEFAULT:       _ZL3nanPKc.exit:
-// DEFAULT-NEXT:    [[RETVAL_0_I_I:%.*]] = phi i64 [ 0, [[WHILE_COND_I_I_I_PREHEADER]] ], [ 0, [[IF_THEN5_I_I]] ], [ 0, [[WHILE_COND_I14_I_I_PREHEADER]] ], [ [[SUB_I_I_I]], [[IF_THEN_I_I_I]] ], [ 0, [[WHILE_BODY_I_I_I]] ], [ [[ADD28_I_I_I]], [[IF_END31_I_I_I]] ], [ 0, [[IF_ELSE17_I_I_I]] ], [ [[SUB_I25_I_I]], [[IF_THEN_I21_I_I]] ], [ 0, [[WHILE_BODY_I18_I_I]] ]
+// DEFAULT-NEXT:    br i1 [[CMP_NOT_I17_I_I]], label %[[_ZL3NANPKC_EXIT]], label %[[WHILE_BODY_I18_I_I]], !llvm.loop [[LOOP10]]
+// DEFAULT:       [[_ZL3NANPKC_EXIT]]:
+// DEFAULT-NEXT:    [[RETVAL_0_I_I:%.*]] = phi i64 [ 0, %[[WHILE_COND_I_I_I_PREHEADER]] ], [ 0, %[[IF_THEN5_I_I]] ], [ 0, %[[WHILE_COND_I14_I_I_PREHEADER]] ], [ [[SUB_I_I_I]], %[[IF_THEN_I_I_I]] ], [ 0, %[[WHILE_BODY_I_I_I]] ], [ [[ADD28_I_I_I]], %[[IF_END31_I_I_I]] ], [ 0, %[[IF_ELSE17_I_I_I]] ], [ [[SUB_I25_I_I]], %[[IF_THEN_I21_I_I]] ], [ 0, %[[WHILE_BODY_I18_I_I]] ]
 // DEFAULT-NEXT:    [[BF_VALUE_I:%.*]] = and i64 [[RETVAL_0_I_I]], 2251799813685247
 // DEFAULT-NEXT:    [[BF_SET9_I:%.*]] = or disjoint i64 [[BF_VALUE_I]], 9221120237041090560
 // DEFAULT-NEXT:    [[TMP16:%.*]] = bitcast i64 [[BF_SET9_I]] to double
 // DEFAULT-NEXT:    ret double [[TMP16]]
 //
-// FINITEONLY-LABEL: @test_nan(
-// FINITEONLY-NEXT:  entry:
+// FINITEONLY-LABEL: define dso_local nofpclass(nan inf) double @test_nan(
+// FINITEONLY-SAME: ptr noundef readonly captures(none) [[TAG:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
 // FINITEONLY-NEXT:    ret double poison
 //
-// APPROX-LABEL: @test_nan(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[TMP0:%.*]] = load i8, ptr [[TAG:%.*]], align 1, !tbaa [[TBAA4]]
+// APPROX-LABEL: define dso_local double @test_nan(
+// APPROX-SAME: ptr noundef readonly captures(none) [[TAG:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[TMP0:%.*]] = load i8, ptr [[TAG]], align 1, !tbaa [[CHAR_TBAA4]]
 // APPROX-NEXT:    [[CMP_I_I:%.*]] = icmp eq i8 [[TMP0]], 48
-// APPROX-NEXT:    br i1 [[CMP_I_I]], label [[IF_THEN_I_I:%.*]], label [[WHILE_COND_I14_I_I_PREHEADER:%.*]]
-// APPROX:       while.cond.i14.i.i.preheader:
-// APPROX-NEXT:    [[TMP1:%.*]] = load i8, ptr [[TAG]], align 1, !tbaa [[TBAA4]]
+// APPROX-NEXT:    br i1 [[CMP_I_I]], label %[[IF_THEN_I_I:.*]], label %[[WHILE_COND_I14_I_I_PREHEADER:.*]]
+// APPROX:       [[WHILE_COND_I14_I_I_PREHEADER]]:
+// APPROX-NEXT:    [[TMP1:%.*]] = load i8, ptr [[TAG]], align 1, !tbaa [[CHAR_TBAA4]]
 // APPROX-NEXT:    [[CMP_NOT_I17_I_I5:%.*]] = icmp eq i8 [[TMP1]], 0
-// APPROX-NEXT:    br i1 [[CMP_NOT_I17_I_I5]], label [[_ZL3NANPKC_EXIT:%.*]], label [[WHILE_BODY_I18_I_I:%.*]]
-// APPROX:       if.then.i.i:
+// APPROX-NEXT:    br i1 [[CMP_NOT_I17_I_I5]], label %[[_ZL3NANPKC_EXIT:.*]], label %[[WHILE_BODY_I18_I_I:.*]]
+// APPROX:       [[IF_THEN_I_I]]:
 // APPROX-NEXT:    [[INCDEC_PTR_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[TAG]], i64 1
-// APPROX-NEXT:    [[TMP2:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]]
-// APPROX-NEXT:    switch i8 [[TMP2]], label [[WHILE_COND_I_I_I_PREHEADER:%.*]] [
-// APPROX-NEXT:      i8 120, label [[IF_THEN5_I_I:%.*]]
-// APPROX-NEXT:      i8 88, label [[IF_THEN5_I_I]]
+// APPROX-NEXT:    [[TMP2:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA4]]
+// APPROX-NEXT:    switch i8 [[TMP2]], label %[[WHILE_COND_I_I_I_PREHEADER:.*]] [
+// APPROX-NEXT:      i8 120, label %[[IF_THEN5_I_I:.*]]
+// APPROX-NEXT:      i8 88, label %[[IF_THEN5_I_I]]
 // APPROX-NEXT:    ]
-// APPROX:       while.cond.i.i.i.preheader:
-// APPROX-NEXT:    [[TMP3:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]]
+// APPROX:       [[WHILE_COND_I_I_I_PREHEADER]]:
+// APPROX-NEXT:    [[TMP3:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA4]]
 // APPROX-NEXT:    [[CMP_NOT_I_I_I14:%.*]] = icmp eq i8 [[TMP3]], 0
-// APPROX-NEXT:    br i1 [[CMP_NOT_I_I_I14]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I_I_I:%.*]]
-// APPROX:       if.then5.i.i:
-// APPROX-NEXT:    [[TMP4:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]]
+// APPROX-NEXT:    br i1 [[CMP_NOT_I_I_I14]], label %[[_ZL3NANPKC_EXIT]], label %[[WHILE_BODY_I_I_I:.*]]
+// APPROX:       [[IF_THEN5_I_I]]:
+// APPROX-NEXT:    [[TMP4:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA4]]
 // APPROX-NEXT:    [[CMP_NOT_I30_I_I9:%.*]] = icmp eq i8 [[TMP4]], 0
-// APPROX-NEXT:    br i1 [[CMP_NOT_I30_I_I9]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I31_I_I:%.*]]
-// APPROX:       while.body.i31.i.i:
-// APPROX-NEXT:    [[TMP5:%.*]] = phi i8 [ [[TMP9:%.*]], [[IF_END31_I_I_I:%.*]] ], [ [[TMP4]], [[IF_THEN5_I_I]] ]
-// APPROX-NEXT:    [[__R_0_I29_I_I11:%.*]] = phi i64 [ [[ADD28_I_I_I:%.*]], [[IF_END31_I_I_I]] ], [ 0, [[IF_THEN5_I_I]] ]
-// APPROX-NEXT:    [[__TAGP_ADDR_0_I28_I_I10:%.*]] = phi ptr [ [[INCDEC_PTR_I34_I_I:%.*]], [[IF_END31_I_I_I]] ], [ [[INCDEC_PTR_I_I]], [[IF_THEN5_I_I]] ]
+// APPROX-NEXT:    br i1 [[CMP_NOT_I30_I_I9]], label %[[_ZL3NANPKC_EXIT]], label %[[WHILE_BODY_I31_I_I:.*]]
+// APPROX:       [[WHILE_BODY_I31_I_I]]:
+// APPROX-NEXT:    [[TMP5:%.*]] = phi i8 [ [[TMP9:%.*]], %[[IF_END31_I_I_I:.*]] ], [ [[TMP4]], %[[IF_THEN5_I_I]] ]
+// APPROX-NEXT:    [[__R_0_I29_I_I11:%.*]] = phi i64 [ [[ADD28_I_I_I:%.*]], %[[IF_END31_I_I_I]] ], [ 0, %[[IF_THEN5_I_I]] ]
+// APPROX-NEXT:    [[__TAGP_ADDR_0_I28_I_I10:%.*]] = phi ptr [ [[INCDEC_PTR_I34_I_I:%.*]], %[[IF_END31_I_I_I]] ], [ [[INCDEC_PTR_I_I]], %[[IF_THEN5_I_I]] ]
 // APPROX-NEXT:    [[TMP6:%.*]] = add i8 [[TMP5]], -48
 // APPROX-NEXT:    [[OR_COND_I32_I_I:%.*]] = icmp ult i8 [[TMP6]], 10
-// APPROX-NEXT:    br i1 [[OR_COND_I32_I_I]], label [[IF_END31_I_I_I]], label [[IF_ELSE_I_I_I:%.*]]
-// APPROX:       if.else.i.i.i:
+// APPROX-NEXT:    br i1 [[OR_COND_I32_I_I]], label %[[IF_END31_I_I_I]], label %[[IF_ELSE_I_I_I:.*]]
+// APPROX:       [[IF_ELSE_I_I_I]]:
 // APPROX-NEXT:    [[TMP7:%.*]] = add i8 [[TMP5]], -97
 // APPROX-NEXT:    [[OR_COND33_I_I_I:%.*]] = icmp ult i8 [[TMP7]], 6
-// APPROX-NEXT:    br i1 [[OR_COND33_I_I_I]], label [[IF_END31_I_I_I]], label [[IF_ELSE17_I_I_I:%.*]]
-// APPROX:       if.else17.i.i.i:
+// APPROX-NEXT:    br i1 [[OR_COND33_I_I_I]], label %[[IF_END31_I_I_I]], label %[[IF_ELSE17_I_I_I:.*]]
+// APPROX:       [[IF_ELSE17_I_I_I]]:
 // APPROX-NEXT:    [[TMP8:%.*]] = add i8 [[TMP5]], -65
 // APPROX-NEXT:    [[OR_COND34_I_I_I:%.*]] = icmp ult i8 [[TMP8]], 6
-// APPROX-NEXT:    br i1 [[OR_COND34_I_I_I]], label [[IF_END31_I_I_I]], label [[_ZL3NANPKC_EXIT]]
-// APPROX:       if.end31.i.i.i:
-// APPROX-NEXT:    [[DOTSINK:%.*]] = phi i64 [ -48, [[WHILE_BODY_I31_I_I]] ], [ -87, [[IF_ELSE_I_I_I]] ], [ -55, [[IF_ELSE17_I_I_I]] ]
+// APPROX-NEXT:    br i1 [[OR_COND34_I_I_I]], label %[[IF_END31_I_I_I]], label %[[_ZL3NANPKC_EXIT]]
+// APPROX:       [[IF_END31_I_I_I]]:
+// APPROX-NEXT:    [[DOTSINK:%.*]] = phi i64 [ -48, %[[WHILE_BODY_I31_I_I]] ], [ -87, %[[IF_ELSE_I_I_I]] ], [ -55, %[[IF_ELSE17_I_I_I]] ]
 // APPROX-NEXT:    [[MUL24_I_I_I:%.*]] = shl i64 [[__R_0_I29_I_I11]], 4
 // APPROX-NEXT:    [[CONV25_I_I_I:%.*]] = zext nneg i8 [[TMP5]] to i64
 // APPROX-NEXT:    [[ADD26_I_I_I:%.*]] = add i64 [[MUL24_I_I_I]], [[DOTSINK]]
 // APPROX-NEXT:    [[ADD28_I_I_I]] = add i64 [[ADD26_I_I_I]], [[CONV25_I_I_I]]
 // APPROX-NEXT:    [[INCDEC_PTR_I34_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I28_I_I10]], i64 1
-// APPROX-NEXT:    [[TMP9]] = load i8, ptr [[INCDEC_PTR_I34_I_I]], align 1, !tbaa [[TBAA4]]
+// APPROX-NEXT:    [[TMP9]] = load i8, ptr [[INCDEC_PTR_I34_I_I]], align 1, !tbaa [[CHAR_TBAA4]]
 // APPROX-NEXT:    [[CMP_NOT_I30_I_I:%.*]] = icmp eq i8 [[TMP9]], 0
-// APPROX-NEXT:    br i1 [[CMP_NOT_I30_I_I]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I31_I_I]], !llvm.loop [[LOOP11]]
-// APPROX:       while.body.i.i.i:
-// APPROX-NEXT:    [[TMP10:%.*]] = phi i8 [ [[TMP12:%.*]], [[IF_THEN_I_I_I:%.*]] ], [ [[TMP3]], [[WHILE_COND_I_I_I_PREHEADER]] ]
-// APPROX-NEXT:    [[__R_0_I_I_I16:%.*]] = phi i64 [ [[SUB_I_I_I:%.*]], [[IF_THEN_I_I_I]] ], [ 0, [[WHILE_COND_I_I_I_PREHEADER]] ]
-// APPROX-NEXT:    [[__TAGP_ADDR_0_I_I_I15:%.*]] = phi ptr [ [[INCDEC_PTR_I_I_I:%.*]], [[IF_THEN_I_I_I]] ], [ [[INCDEC_PTR_I_I]], [[WHILE_COND_I_I_I_PREHEADER]] ]
+// APPROX-NEXT:    br i1 [[CMP_NOT_I30_I_I]], label %[[_ZL3NANPKC_EXIT]], label %[[WHILE_BODY_I31_I_I]], !llvm.loop [[LOOP11]]
+// APPROX:       [[WHILE_BODY_I_I_I]]:
+// APPROX-NEXT:    [[TMP10:%.*]] = phi i8 [ [[TMP12:%.*]], %[[IF_THEN_I_I_I:.*]] ], [ [[TMP3]], %[[WHILE_COND_I_I_I_PREHEADER]] ]
+// APPROX-NEXT:    [[__R_0_I_I_I16:%.*]] = phi i64 [ [[SUB_I_I_I:%.*]], %[[IF_THEN_I_I_I]] ], [ 0, %[[WHILE_COND_I_I_I_PREHEADER]] ]
+// APPROX-NEXT:    [[__TAGP_ADDR_0_I_I_I15:%.*]] = phi ptr [ [[INCDEC_PTR_I_I_I:%.*]], %[[IF_THEN_I_I_I]] ], [ [[INCDEC_PTR_I_I]], %[[WHILE_COND_I_I_I_PREHEADER]] ]
 // APPROX-NEXT:    [[TMP11:%.*]] = and i8 [[TMP10]], -8
 // APPROX-NEXT:    [[OR_COND_I_I_I:%.*]] = icmp eq i8 [[TMP11]], 48
-// APPROX-NEXT:    br i1 [[OR_COND_I_I_I]], label [[IF_THEN_I_I_I]], label [[_ZL3NANPKC_EXIT]]
-// APPROX:       if.then.i.i.i:
+// APPROX-NEXT:    br i1 [[OR_COND_I_I_I]], label %[[IF_THEN_I_I_I]], label %[[_ZL3NANPKC_EXIT]]
+// APPROX:       [[IF_THEN_I_I_I]]:
 // APPROX-NEXT:    [[MUL_I_I_I:%.*]] = shl i64 [[__R_0_I_I_I16]], 3
 // APPROX-NEXT:    [[CONV5_I_I_I:%.*]] = zext nneg i8 [[TMP10]] to i64
 // APPROX-NEXT:    [[ADD_I_I_I:%.*]] = add i64 [[MUL_I_I_I]], -48
 // APPROX-NEXT:    [[SUB_I_I_I]] = add i64 [[ADD_I_I_I]], [[CONV5_I_I_I]]
 // APPROX-NEXT:    [[INCDEC_PTR_I_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I_I_I15]], i64 1
-// APPROX-NEXT:    [[TMP12]] = load i8, ptr [[INCDEC_PTR_I_I_I]], align 1, !tbaa [[TBAA4]]
+// APPROX-NEXT:    [[TMP12]] = load i8, ptr [[INCDEC_PTR_I_I_I]], align 1, !tbaa [[CHAR_TBAA4]]
 // APPROX-NEXT:    [[CMP_NOT_I_I_I:%.*]] = icmp eq i8 [[TMP12]], 0
-// APPROX-NEXT:    br i1 [[CMP_NOT_I_I_I]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I_I_I]], !llvm.loop [[LOOP7]]
-// APPROX:       while.body.i18.i.i:
-// APPROX-NEXT:    [[TMP13:%.*]] = phi i8 [ [[TMP15:%.*]], [[IF_THEN_I21_I_I:%.*]] ], [ [[TMP1]], [[WHILE_COND_I14_I_I_PREHEADER]] ]
-// APPROX-NEXT:    [[__R_0_I16_I_I7:%.*]] = phi i64 [ [[SUB_I25_I_I:%.*]], [[IF_THEN_I21_I_I]] ], [ 0, [[WHILE_COND_I14_I_I_PREHEADER]] ]
-// APPROX-NEXT:    [[__TAGP_ADDR_0_I15_I_I6:%.*]] = phi ptr [ [[INCDEC_PTR_I26_I_I:%.*]], [[IF_THEN_I21_I_I]] ], [ [[TAG]], [[WHILE_COND_I14_I_I_PREHEADER]] ]
+// APPROX-NEXT:    br i1 [[CMP_NOT_I_I_I]], label %[[_ZL3NANPKC_EXIT]], label %[[WHILE_BODY_I_I_I]], !llvm.loop [[LOOP7]]
+// APPROX:       [[WHILE_BODY_I18_I_I]]:
+// APPROX-NEXT:    [[TMP13:%.*]] = phi i8 [ [[TMP15:%.*]], %[[IF_THEN_I21_I_I:.*]] ], [ [[TMP1]], %[[WHILE_COND_I14_I_I_PREHEADER]] ]
+// APPROX-NEXT:    [[__R_0_I16_I_I7:%.*]] = phi i64 [ [[SUB_I25_I_I:%.*]], %[[IF_THEN_I21_I_I]] ], [ 0, %[[WHILE_COND_I14_I_I_PREHEADER]] ]
+// APPROX-NEXT:    [[__TAGP_ADDR_0_I15_I_I6:%.*]] = phi ptr [ [[INCDEC_PTR_I26_I_I:%.*]], %[[IF_THEN_I21_I_I]] ], [ [[TAG]], %[[WHILE_COND_I14_I_I_PREHEADER]] ]
 // APPROX-NEXT:    [[TMP14:%.*]] = add i8 [[TMP13]], -48
 // APPROX-NEXT:    [[OR_COND_I19_I_I:%.*]] = icmp ult i8 [[TMP14]], 10
-// APPROX-NEXT:    br i1 [[OR_COND_I19_I_I]], label [[IF_THEN_I21_I_I]], label [[_ZL3NANPKC_EXIT]]
-// APPROX:       if.then.i21.i.i:
+// APPROX-NEXT:    br i1 [[OR_COND_I19_I_I]], label %[[IF_THEN_I21_I_I]], label %[[_ZL3NANPKC_EXIT]]
+// APPROX:       [[IF_THEN_I21_I_I]]:
 // APPROX-NEXT:    [[MUL_I22_I_I:%.*]] = mul i64 [[__R_0_I16_I_I7]], 10
 // APPROX-NEXT:    [[CONV5_I23_I_I:%.*]] = zext nneg i8 [[TMP13]] to i64
 // APPROX-NEXT:    [[ADD_I24_I_I:%.*]] = add i64 [[MUL_I22_I_I]], -48
 // APPROX-NEXT:    [[SUB_I25_I_I]] = add i64 [[ADD_I24_I_I]], [[CONV5_I23_I_I]]
 // APPROX-NEXT:    [[INCDEC_PTR_I26_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I15_I_I6]], i64 1
-// APPROX-NEXT:    [[TMP15]] = load i8, ptr [[INCDEC_PTR_I26_I_I]], align 1, !tbaa [[TBAA4]]
+// APPROX-NEXT:    [[TMP15]] = load i8, ptr [[INCDEC_PTR_I26_I_I]], align 1, !tbaa [[CHAR_TBAA4]]
 // APPROX-NEXT:    [[CMP_NOT_I17_I_I:%.*]] = icmp eq i8 [[TMP15]], 0
-// APPROX-NEXT:    br i1 [[CMP_NOT_I17_I_I]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I18_I_I]], !llvm.loop [[LOOP10]]
-// APPROX:       _ZL3nanPKc.exit:
-// APPROX-NEXT:    [[RETVAL_0_I_I:%.*]] = phi i64 [ 0, [[WHILE_COND_I_I_I_PREHEADER]] ], [ 0, [[IF_THEN5_I_I]] ], [ 0, [[WHILE_COND_I14_I_I_PREHEADER]] ], [ [[SUB_I_I_I]], [[IF_THEN_I_I_I]] ], [ 0, [[WHILE_BODY_I_I_I]] ], [ [[ADD28_I_I_I]], [[IF_END31_I_I_I]] ], [ 0, [[IF_ELSE17_I_I_I]] ], [ [[SUB_I25_I_I]], [[IF_THEN_I21_I_I]] ], [ 0, [[WHILE_BODY_I18_I_I]] ]
+// APPROX-NEXT:    br i1 [[CMP_NOT_I17_I_I]], label %[[_ZL3NANPKC_EXIT]], label %[[WHILE_BODY_I18_I_I]], !llvm.loop [[LOOP10]]
+// APPROX:       [[_ZL3NANPKC_EXIT]]:
+// APPROX-NEXT:    [[RETVAL_0_I_I:%.*]] = phi i64 [ 0, %[[WHILE_COND_I_I_I_PREHEADER]] ], [ 0, %[[IF_THEN5_I_I]] ], [ 0, %[[WHILE_COND_I14_I_I_PREHEADER]] ], [ [[SUB_I_I_I]], %[[IF_THEN_I_I_I]] ], [ 0, %[[WHILE_BODY_I_I_I]] ], [ [[ADD28_I_I_I]], %[[IF_END31_I_I_I]] ], [ 0, %[[IF_ELSE17_I_I_I]] ], [ [[SUB_I25_I_I]], %[[IF_THEN_I21_I_I]] ], [ 0, %[[WHILE_BODY_I18_I_I]] ]
 // APPROX-NEXT:    [[BF_VALUE_I:%.*]] = and i64 [[RETVAL_0_I_I]], 2251799813685247
 // APPROX-NEXT:    [[BF_SET9_I:%.*]] = or disjoint i64 [[BF_VALUE_I]], 9221120237041090560
 // APPROX-NEXT:    [[TMP16:%.*]] = bitcast i64 [[BF_SET9_I]] to double
 // APPROX-NEXT:    ret double [[TMP16]]
 //
-// NCRDIV-LABEL: @test_nan(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[TMP0:%.*]] = load i8, ptr [[TAG:%.*]], align 1, !tbaa [[TBAA4]]
+// NCRDIV-LABEL: define dso_local double @test_nan(
+// NCRDIV-SAME: ptr noundef readonly captures(none) [[TAG:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[TMP0:%.*]] = load i8, ptr [[TAG]], align 1, !tbaa [[CHAR_TBAA4]]
 // NCRDIV-NEXT:    [[CMP_I_I:%.*]] = icmp eq i8 [[TMP0]], 48
-// NCRDIV-NEXT:    br i1 [[CMP_I_I]], label [[IF_THEN_I_I:%.*]], label [[WHILE_COND_I14_I_I_PREHEADER:%.*]]
-// NCRDIV:       while.cond.i14.i.i.preheader:
-// NCRDIV-NEXT:    [[TMP1:%.*]] = load i8, ptr [[TAG]], align 1, !tbaa [[TBAA4]]
+// NCRDIV-NEXT:    br i1 [[CMP_I_I]], label %[[IF_THEN_I_I:.*]], label %[[WHILE_COND_I14_I_I_PREHEADER:.*]]
+// NCRDIV:       [[WHILE_COND_I14_I_I_PREHEADER]]:
+// NCRDIV-NEXT:    [[TMP1:%.*]] = load i8, ptr [[TAG]], align 1, !tbaa [[CHAR_TBAA4]]
 // NCRDIV-NEXT:    [[CMP_NOT_I17_I_I5:%.*]] = icmp eq i8 [[TMP1]], 0
-// NCRDIV-NEXT:    br i1 [[CMP_NOT_I17_I_I5]], label [[_ZL3NANPKC_EXIT:%.*]], label [[WHILE_BODY_I18_I_I:%.*]]
-// NCRDIV:       if.then.i.i:
+// NCRDIV-NEXT:    br i1 [[CMP_NOT_I17_I_I5]], label %[[_ZL3NANPKC_EXIT:.*]], label %[[WHILE_BODY_I18_I_I:.*]]
+// NCRDIV:       [[IF_THEN_I_I]]:
 // NCRDIV-NEXT:    [[INCDEC_PTR_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[TAG]], i64 1
-// NCRDIV-NEXT:    [[TMP2:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]]
-// NCRDIV-NEXT:    switch i8 [[TMP2]], label [[WHILE_COND_I_I_I_PREHEADER:%.*]] [
-// NCRDIV-NEXT:      i8 120, label [[IF_THEN5_I_I:%.*]]
-// NCRDIV-NEXT:      i8 88, label [[IF_THEN5_I_I]]
+// NCRDIV-NEXT:    [[TMP2:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA4]]
+// NCRDIV-NEXT:    switch i8 [[TMP2]], label %[[WHILE_COND_I_I_I_PREHEADER:.*]] [
+// NCRDIV-NEXT:      i8 120, label %[[IF_THEN5_I_I:.*]]
+// NCRDIV-NEXT:      i8 88, label %[[IF_THEN5_I_I]]
 // NCRDIV-NEXT:    ]
-// NCRDIV:       while.cond.i.i.i.preheader:
-// NCRDIV-NEXT:    [[TMP3:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]]
+// NCRDIV:       [[WHILE_COND_I_I_I_PREHEADER]]:
+// NCRDIV-NEXT:    [[TMP3:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA4]]
 // NCRDIV-NEXT:    [[CMP_NOT_I_I_I14:%.*]] = icmp eq i8 [[TMP3]], 0
-// NCRDIV-NEXT:    br i1 [[CMP_NOT_I_I_I14]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I_I_I:%.*]]
-// NCRDIV:       if.then5.i.i:
-// NCRDIV-NEXT:    [[TMP4:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]]
+// NCRDIV-NEXT:    br i1 [[CMP_NOT_I_I_I14]], label %[[_ZL3NANPKC_EXIT]], label %[[WHILE_BODY_I_I_I:.*]]
+// NCRDIV:       [[IF_THEN5_I_I]]:
+// NCRDIV-NEXT:    [[TMP4:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA4]]
 // NCRDIV-NEXT:    [[CMP_NOT_I30_I_I9:%.*]] = icmp eq i8 [[TMP4]], 0
-// NCRDIV-NEXT:    br i1 [[CMP_NOT_I30_I_I9]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I31_I_I:%.*]]
-// NCRDIV:       while.body.i31.i.i:
-// NCRDIV-NEXT:    [[TMP5:%.*]] = phi i8 [ [[TMP9:%.*]], [[IF_END31_I_I_I:%.*]] ], [ [[TMP4]], [[IF_THEN5_I_I]] ]
-// NCRDIV-NEXT:    [[__R_0_I29_I_I11:%.*]] = phi i64 [ [[ADD28_I_I_I:%.*]], [[IF_END31_I_I_I]] ], [ 0, [[IF_THEN5_I_I]] ]
-// NCRDIV-NEXT:    [[__TAGP_ADDR_0_I28_I_I10:%.*]] = phi ptr [ [[INCDEC_PTR_I34_I_I:%.*]], [[IF_END31_I_I_I]] ], [ [[INCDEC_PTR_I_I]], [[IF_THEN5_I_I]] ]
+// NCRDIV-NEXT:    br i1 [[CMP_NOT_I30_I_I9]], label %[[_ZL3NANPKC_EXIT]], label %[[WHILE_BODY_I31_I_I:.*]]
+// NCRDIV:       [[WHILE_BODY_I31_I_I]]:
+// NCRDIV-NEXT:    [[TMP5:%.*]] = phi i8 [ [[TMP9:%.*]], %[[IF_END31_I_I_I:.*]] ], [ [[TMP4]], %[[IF_THEN5_I_I]] ]
+// NCRDIV-NEXT:    [[__R_0_I29_I_I11:%.*]] = phi i64 [ [[ADD28_I_I_I:%.*]], %[[IF_END31_I_I_I]] ], [ 0, %[[IF_THEN5_I_I]] ]
+// NCRDIV-NEXT:    [[__TAGP_ADDR_0_I28_I_I10:%.*]] = phi ptr [ [[INCDEC_PTR_I34_I_I:%.*]], %[[IF_END31_I_I_I]] ], [ [[INCDEC_PTR_I_I]], %[[IF_THEN5_I_I]] ]
 // NCRDIV-NEXT:    [[TMP6:%.*]] = add i8 [[TMP5]], -48
 // NCRDIV-NEXT:    [[OR_COND_I32_I_I:%.*]] = icmp ult i8 [[TMP6]], 10
-// NCRDIV-NEXT:    br i1 [[OR_COND_I32_I_I]], label [[IF_END31_I_I_I]], label [[IF_ELSE_I_I_I:%.*]]
-// NCRDIV:       if.else.i.i.i:
+// NCRDIV-NEXT:    br i1 [[OR_COND_I32_I_I]], label %[[IF_END31_I_I_I]], label %[[IF_ELSE_I_I_I:.*]]
+// NCRDIV:       [[IF_ELSE_I_I_I]]:
 // NCRDIV-NEXT:    [[TMP7:%.*]] = add i8 [[TMP5]], -97
 // NCRDIV-NEXT:    [[OR_COND33_I_I_I:%.*]] = icmp ult i8 [[TMP7]], 6
-// NCRDIV-NEXT:    br i1 [[OR_COND33_I_I_I]], label [[IF_END31_I_I_I]], label [[IF_ELSE17_I_I_I:%.*]]
-// NCRDIV:       if.else17.i.i.i:
+// NCRDIV-NEXT:    br i1 [[OR_COND33_I_I_I]], label %[[IF_END31_I_I_I]], label %[[IF_ELSE17_I_I_I:.*]]
+// NCRDIV:       [[IF_ELSE17_I_I_I]]:
 // NCRDIV-NEXT:    [[TMP8:%.*]] = add i8 [[TMP5]], -65
 // NCRDIV-NEXT:    [[OR_COND34_I_I_I:%.*]] = icmp ult i8 [[TMP8]], 6
-// NCRDIV-NEXT:    br i1 [[OR_COND34_I_I_I]], label [[IF_END31_I_I_I]], label [[_ZL3NANPKC_EXIT]]
-// NCRDIV:       if.end31.i.i.i:
-// NCRDIV-NEXT:    [[DOTSINK:%.*]] = phi i64 [ -48, [[WHILE_BODY_I31_I_I]] ], [ -87, [[IF_ELSE_I_I_I]] ], [ -55, [[IF_ELSE17_I_I_I]] ]
+// NCRDIV-NEXT:    br i1 [[OR_COND34_I_I_I]], label %[[IF_END31_I_I_I]], label %[[_ZL3NANPKC_EXIT]]
+// NCRDIV:       [[IF_END31_I_I_I]]:
+// NCRDIV-NEXT:    [[DOTSINK:%.*]] = phi i64 [ -48, %[[WHILE_BODY_I31_I_I]] ], [ -87, %[[IF_ELSE_I_I_I]] ], [ -55, %[[IF_ELSE17_I_I_I]] ]
 // NCRDIV-NEXT:    [[MUL24_I_I_I:%.*]] = shl i64 [[__R_0_I29_I_I11]], 4
 // NCRDIV-NEXT:    [[CONV25_I_I_I:%.*]] = zext nneg i8 [[TMP5]] to i64
 // NCRDIV-NEXT:    [[ADD26_I_I_I:%.*]] = add i64 [[MUL24_I_I_I]], [[DOTSINK]]
 // NCRDIV-NEXT:    [[ADD28_I_I_I]] = add i64 [[ADD26_I_I_I]], [[CONV25_I_I_I]]
 // NCRDIV-NEXT:    [[INCDEC_PTR_I34_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I28_I_I10]], i64 1
-// NCRDIV-NEXT:    [[TMP9]] = load i8, ptr [[INCDEC_PTR_I34_I_I]], align 1, !tbaa [[TBAA4]]
+// NCRDIV-NEXT:    [[TMP9]] = load i8, ptr [[INCDEC_PTR_I34_I_I]], align 1, !tbaa [[CHAR_TBAA4]]
 // NCRDIV-NEXT:    [[CMP_NOT_I30_I_I:%.*]] = icmp eq i8 [[TMP9]], 0
-// NCRDIV-NEXT:    br i1 [[CMP_NOT_I30_I_I]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I31_I_I]], !llvm.loop [[LOOP11]]
-// NCRDIV:       while.body.i.i.i:
-// NCRDIV-NEXT:    [[TMP10:%.*]] = phi i8 [ [[TMP12:%.*]], [[IF_THEN_I_I_I:%.*]] ], [ [[TMP3]], [[WHILE_COND_I_I_I_PREHEADER]] ]
-// NCRDIV-NEXT:    [[__R_0_I_I_I16:%.*]] = phi i64 [ [[SUB_I_I_I:%.*]], [[IF_THEN_I_I_I]] ], [ 0, [[WHILE_COND_I_I_I_PREHEADER]] ]
-// NCRDIV-NEXT:    [[__TAGP_ADDR_0_I_I_I15:%.*]] = phi ptr [ [[INCDEC_PTR_I_I_I:%.*]], [[IF_THEN_I_I_I]] ], [ [[INCDEC_PTR_I_I]], [[WHILE_COND_I_I_I_PREHEADER]] ]
+// NCRDIV-NEXT:    br i1 [[CMP_NOT_I30_I_I]], label %[[_ZL3NANPKC_EXIT]], label %[[WHILE_BODY_I31_I_I]], !llvm.loop [[LOOP11]]
+// NCRDIV:       [[WHILE_BODY_I_I_I]]:
+// NCRDIV-NEXT:    [[TMP10:%.*]] = phi i8 [ [[TMP12:%.*]], %[[IF_THEN_I_I_I:.*]] ], [ [[TMP3]], %[[WHILE_COND_I_I_I_PREHEADER]] ]
+// NCRDIV-NEXT:    [[__R_0_I_I_I16:%.*]] = phi i64 [ [[SUB_I_I_I:%.*]], %[[IF_THEN_I_I_I]] ], [ 0, %[[WHILE_COND_I_I_I_PREHEADER]] ]
+// NCRDIV-NEXT:    [[__TAGP_ADDR_0_I_I_I15:%.*]] = phi ptr [ [[INCDEC_PTR_I_I_I:%.*]], %[[IF_THEN_I_I_I]] ], [ [[INCDEC_PTR_I_I]], %[[WHILE_COND_I_I_I_PREHEADER]] ]
 // NCRDIV-NEXT:    [[TMP11:%.*]] = and i8 [[TMP10]], -8
 // NCRDIV-NEXT:    [[OR_COND_I_I_I:%.*]] = icmp eq i8 [[TMP11]], 48
-// NCRDIV-NEXT:    br i1 [[OR_COND_I_I_I]], label [[IF_THEN_I_I_I]], label [[_ZL3NANPKC_EXIT]]
-// NCRDIV:       if.then.i.i.i:
+// NCRDIV-NEXT:    br i1 [[OR_COND_I_I_I]], label %[[IF_THEN_I_I_I]], label %[[_ZL3NANPKC_EXIT]]
+// NCRDIV:       [[IF_THEN_I_I_I]]:
 // NCRDIV-NEXT:    [[MUL_I_I_I:%.*]] = shl i64 [[__R_0_I_I_I16]], 3
 // NCRDIV-NEXT:    [[CONV5_I_I_I:%.*]] = zext nneg i8 [[TMP10]] to i64
 // NCRDIV-NEXT:    [[ADD_I_I_I:%.*]] = add i64 [[MUL_I_I_I]], -48
 // NCRDIV-NEXT:    [[SUB_I_I_I]] = add i64 [[ADD_I_I_I]], [[CONV5_I_I_I]]
 // NCRDIV-NEXT:    [[INCDEC_PTR_I_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I_I_I15]], i64 1
-// NCRDIV-NEXT:    [[TMP12]] = load i8, ptr [[INCDEC_PTR_I_I_I]], align 1, !tbaa [[TBAA4]]
+// NCRDIV-NEXT:    [[TMP12]] = load i8, ptr [[INCDEC_PTR_I_I_I]], align 1, !tbaa [[CHAR_TBAA4]]
 // NCRDIV-NEXT:    [[CMP_NOT_I_I_I:%.*]] = icmp eq i8 [[TMP12]], 0
-// NCRDIV-NEXT:    br i1 [[CMP_NOT_I_I_I]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I_I_I]], !llvm.loop [[LOOP7]]
-// NCRDIV:       while.body.i18.i.i:
-// NCRDIV-NEXT:    [[TMP13:%.*]] = phi i8 [ [[TMP15:%.*]], [[IF_THEN_I21_I_I:%.*]] ], [ [[TMP1]], [[WHILE_COND_I14_I_I_PREHEADER]] ]
-// NCRDIV-NEXT:    [[__R_0_I16_I_I7:%.*]] = phi i64 [ [[SUB_I25_I_I:%.*]], [[IF_THEN_I21_I_I]] ], [ 0, [[WHILE_COND_I14_I_I_PREHEADER]] ]
-// NCRDIV-NEXT:    [[__TAGP_ADDR_0_I15_I_I6:%.*]] = phi ptr [ [[INCDEC_PTR_I26_I_I:%.*]], [[IF_THEN_I21_I_I]] ], [ [[TAG]], [[WHILE_COND_I14_I_I_PREHEADER]] ]
+// NCRDIV-NEXT:    br i1 [[CMP_NOT_I_I_I]], label %[[_ZL3NANPKC_EXIT]], label %[[WHILE_BODY_I_I_I]], !llvm.loop [[LOOP7]]
+// NCRDIV:       [[WHILE_BODY_I18_I_I]]:
+// NCRDIV-NEXT:    [[TMP13:%.*]] = phi i8 [ [[TMP15:%.*]], %[[IF_THEN_I21_I_I:.*]] ], [ [[TMP1]], %[[WHILE_COND_I14_I_I_PREHEADER]] ]
+// NCRDIV-NEXT:    [[__R_0_I16_I_I7:%.*]] = phi i64 [ [[SUB_I25_I_I:%.*]], %[[IF_THEN_I21_I_I]] ], [ 0, %[[WHILE_COND_I14_I_I_PREHEADER]] ]
+// NCRDIV-NEXT:    [[__TAGP_ADDR_0_I15_I_I6:%.*]] = phi ptr [ [[INCDEC_PTR_I26_I_I:%.*]], %[[IF_THEN_I21_I_I]] ], [ [[TAG]], %[[WHILE_COND_I14_I_I_PREHEADER]] ]
 // NCRDIV-NEXT:    [[TMP14:%.*]] = add i8 [[TMP13]], -48
 // NCRDIV-NEXT:    [[OR_COND_I19_I_I:%.*]] = icmp ult i8 [[TMP14]], 10
-// NCRDIV-NEXT:    br i1 [[OR_COND_I19_I_I]], label [[IF_THEN_I21_I_I]], label [[_ZL3NANPKC_EXIT]]
-// NCRDIV:       if.then.i21.i.i:
+// NCRDIV-NEXT:    br i1 [[OR_COND_I19_I_I]], label %[[IF_THEN_I21_I_I]], label %[[_ZL3NANPKC_EXIT]]
+// NCRDIV:       [[IF_THEN_I21_I_I]]:
 // NCRDIV-NEXT:    [[MUL_I22_I_I:%.*]] = mul i64 [[__R_0_I16_I_I7]], 10
 // NCRDIV-NEXT:    [[CONV5_I23_I_I:%.*]] = zext nneg i8 [[TMP13]] to i64
 // NCRDIV-NEXT:    [[ADD_I24_I_I:%.*]] = add i64 [[MUL_I22_I_I]], -48
 // NCRDIV-NEXT:    [[SUB_I25_I_I]] = add i64 [[ADD_I24_I_I]], [[CONV5_I23_I_I]]
 // NCRDIV-NEXT:    [[INCDEC_PTR_I26_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I15_I_I6]], i64 1
-// NCRDIV-NEXT:    [[TMP15]] = load i8, ptr [[INCDEC_PTR_I26_I_I]], align 1, !tbaa [[TBAA4]]
+// NCRDIV-NEXT:    [[TMP15]] = load i8, ptr [[INCDEC_PTR_I26_I_I]], align 1, !tbaa [[CHAR_TBAA4]]
 // NCRDIV-NEXT:    [[CMP_NOT_I17_I_I:%.*]] = icmp eq i8 [[TMP15]], 0
-// NCRDIV-NEXT:    br i1 [[CMP_NOT_I17_I_I]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I18_I_I]], !llvm.loop [[LOOP10]]
-// NCRDIV:       _ZL3nanPKc.exit:
-// NCRDIV-NEXT:    [[RETVAL_0_I_I:%.*]] = phi i64 [ 0, [[WHILE_COND_I_I_I_PREHEADER]] ], [ 0, [[IF_THEN5_I_I]] ], [ 0, [[WHILE_COND_I14_I_I_PREHEADER]] ], [ [[SUB_I_I_I]], [[IF_THEN_I_I_I]] ], [ 0, [[WHILE_BODY_I_I_I]] ], [ [[ADD28_I_I_I]], [[IF_END31_I_I_I]] ], [ 0, [[IF_ELSE17_I_I_I]] ], [ [[SUB_I25_I_I]], [[IF_THEN_I21_I_I]] ], [ 0, [[WHILE_BODY_I18_I_I]] ]
+// NCRDIV-NEXT:    br i1 [[CMP_NOT_I17_I_I]], label %[[_ZL3NANPKC_EXIT]], label %[[WHILE_BODY_I18_I_I]], !llvm.loop [[LOOP10]]
+// NCRDIV:       [[_ZL3NANPKC_EXIT]]:
+// NCRDIV-NEXT:    [[RETVAL_0_I_I:%.*]] = phi i64 [ 0, %[[WHILE_COND_I_I_I_PREHEADER]] ], [ 0, %[[IF_THEN5_I_I]] ], [ 0, %[[WHILE_COND_I14_I_I_PREHEADER]] ], [ [[SUB_I_I_I]], %[[IF_THEN_I_I_I]] ], [ 0, %[[WHILE_BODY_I_I_I]] ], [ [[ADD28_I_I_I]], %[[IF_END31_I_I_I]] ], [ 0, %[[IF_ELSE17_I_I_I]] ], [ [[SUB_I25_I_I]], %[[IF_THEN_I21_I_I]] ], [ 0, %[[WHILE_BODY_I18_I_I]] ]
 // NCRDIV-NEXT:    [[BF_VALUE_I:%.*]] = and i64 [[RETVAL_0_I_I]], 2251799813685247
 // NCRDIV-NEXT:    [[BF_SET9_I:%.*]] = or disjoint i64 [[BF_VALUE_I]], 9221120237041090560
 // NCRDIV-NEXT:    [[TMP16:%.*]] = bitcast i64 [[BF_SET9_I]] to double
 // NCRDIV-NEXT:    ret double [[TMP16]]
 //
-// AMDGCNSPIRV-LABEL: @test_nan(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = load i8, ptr addrspace(4) [[TAG:%.*]], align 1, !tbaa [[TBAA5]]
+// AMDGCNSPIRV-LABEL: define spir_func double @test_nan(
+// AMDGCNSPIRV-SAME: ptr addrspace(4) noundef readonly captures(none) [[TAG:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR2]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*]]:
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = load i8, ptr addrspace(4) [[TAG]], align 1, !tbaa [[CHAR_TBAA5]]
 // AMDGCNSPIRV-NEXT:    [[CMP_I_I:%.*]] = icmp eq i8 [[TMP0]], 48
-// AMDGCNSPIRV-NEXT:    br i1 [[CMP_I_I]], label [[IF_THEN_I_I:%.*]], label [[WHILE_COND_I14_I_I:%.*]]
-// AMDGCNSPIRV:       if.then.i.i:
+// AMDGCNSPIRV-NEXT:    br i1 [[CMP_I_I]], label %[[IF_THEN_I_I:.*]], label %[[WHILE_COND_I14_I_I:.*]]
+// AMDGCNSPIRV:       [[IF_THEN_I_I]]:
 // AMDGCNSPIRV-NEXT:    [[INCDEC_PTR_I_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TAG]], i64 1
-// AMDGCNSPIRV-NEXT:    [[TMP1:%.*]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA5]]
-// AMDGCNSPIRV-NEXT:    switch i8 [[TMP1]], label [[WHILE_COND_I_I_I:%.*]] [
-// AMDGCNSPIRV-NEXT:      i8 120, label [[IF_THEN5_I_I:%.*]]
-// AMDGCNSPIRV-NEXT:      i8 88, label [[IF_THEN5_I_I]]
+// AMDGCNSPIRV-NEXT:    [[TMP1:%.*]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA5]]
+// AMDGCNSPIRV-NEXT:    switch i8 [[TMP1]], label %[[WHILE_COND_I_I_I:.*]] [
+// AMDGCNSPIRV-NEXT:      i8 120, label %[[IF_THEN5_I_I:.*]]
+// AMDGCNSPIRV-NEXT:      i8 88, label %[[IF_THEN5_I_I]]
 // AMDGCNSPIRV-NEXT:    ]
-// AMDGCNSPIRV:       if.then5.i.i:
-// AMDGCNSPIRV-NEXT:    [[TMP2:%.*]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA5]]
+// AMDGCNSPIRV:       [[IF_THEN5_I_I]]:
+// AMDGCNSPIRV-NEXT:    [[TMP2:%.*]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA5]]
 // AMDGCNSPIRV-NEXT:    [[CMP_NOT_I31_I_I5:%.*]] = icmp eq i8 [[TMP2]], 0
-// AMDGCNSPIRV-NEXT:    br i1 [[CMP_NOT_I31_I_I5]], label [[_ZL3NANPKC_EXIT:%.*]], label [[WHILE_BODY_I32_I_I:%.*]]
-// AMDGCNSPIRV:       while.body.i32.i.i:
-// AMDGCNSPIRV-NEXT:    [[TMP3:%.*]] = phi i8 [ [[TMP7:%.*]], [[IF_END31_I_I_I:%.*]] ], [ [[TMP2]], [[IF_THEN5_I_I]] ]
-// AMDGCNSPIRV-NEXT:    [[__R_0_I30_I_I7:%.*]] = phi i64 [ [[ADD28_I_I_I:%.*]], [[IF_END31_I_I_I]] ], [ 0, [[IF_THEN5_I_I]] ]
-// AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_0_I29_I_I6:%.*]] = phi ptr addrspace(4) [ [[INCDEC_PTR_I36_I_I:%.*]], [[IF_END31_I_I_I]] ], [ [[INCDEC_PTR_I_I]], [[IF_THEN5_I_I]] ]
+// AMDGCNSPIRV-NEXT:    br i1 [[CMP_NOT_I31_I_I5]], label %[[_ZL3NANPKC_EXIT:.*]], label %[[WHILE_BODY_I32_I_I:.*]]
+// AMDGCNSPIRV:       [[WHILE_BODY_I32_I_I]]:
+// AMDGCNSPIRV-NEXT:    [[TMP3:%.*]] = phi i8 [ [[TMP7:%.*]], %[[IF_END31_I_I_I:.*]] ], [ [[TMP2]], %[[IF_THEN5_I_I]] ]
+// AMDGCNSPIRV-NEXT:    [[__R_0_I30_I_I7:%.*]] = phi i64 [ [[ADD28_I_I_I:%.*]], %[[IF_END31_I_I_I]] ], [ 0, %[[IF_THEN5_I_I]] ]
+// AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_0_I29_I_I6:%.*]] = phi ptr addrspace(4) [ [[INCDEC_PTR_I36_I_I:%.*]], %[[IF_END31_I_I_I]] ], [ [[INCDEC_PTR_I_I]], %[[IF_THEN5_I_I]] ]
 // AMDGCNSPIRV-NEXT:    [[TMP4:%.*]] = add i8 [[TMP3]], -48
 // AMDGCNSPIRV-NEXT:    [[OR_COND_I33_I_I:%.*]] = icmp ult i8 [[TMP4]], 10
-// AMDGCNSPIRV-NEXT:    br i1 [[OR_COND_I33_I_I]], label [[IF_END31_I_I_I]], label [[IF_ELSE_I_I_I:%.*]]
-// AMDGCNSPIRV:       if.else.i.i.i:
+// AMDGCNSPIRV-NEXT:    br i1 [[OR_COND_I33_I_I]], label %[[IF_END31_I_I_I]], label %[[IF_ELSE_I_I_I:.*]]
+// AMDGCNSPIRV:       [[IF_ELSE_I_I_I]]:
 // AMDGCNSPIRV-NEXT:    [[TMP5:%.*]] = add i8 [[TMP3]], -97
 // AMDGCNSPIRV-NEXT:    [[OR_COND33_I_I_I:%.*]] = icmp ult i8 [[TMP5]], 6
-// AMDGCNSPIRV-NEXT:    br i1 [[OR_COND33_I_I_I]], label [[IF_END31_I_I_I]], label [[IF_ELSE17_I_I_I:%.*]]
-// AMDGCNSPIRV:       if.else17.i.i.i:
+// AMDGCNSPIRV-NEXT:    br i1 [[OR_COND33_I_I_I]], label %[[IF_END31_I_I_I]], label %[[IF_ELSE17_I_I_I:.*]]
+// AMDGCNSPIRV:       [[IF_ELSE17_I_I_I]]:
 // AMDGCNSPIRV-NEXT:    [[TMP6:%.*]] = add i8 [[TMP3]], -65
 // AMDGCNSPIRV-NEXT:    [[OR_COND34_I_I_I:%.*]] = icmp ult i8 [[TMP6]], 6
-// AMDGCNSPIRV-NEXT:    br i1 [[OR_COND34_I_I_I]], label [[IF_END31_I_I_I]], label [[_ZL3NANPKC_EXIT]]
-// AMDGCNSPIRV:       if.end31.i.i.i:
-// AMDGCNSPIRV-NEXT:    [[DOTSINK:%.*]] = phi i64 [ -48, [[WHILE_BODY_I32_I_I]] ], [ -87, [[IF_ELSE_I_I_I]] ], [ -55, [[IF_ELSE17_I_I_I]] ]
+// AMDGCNSPIRV-NEXT:    br i1 [[OR_COND34_I_I_I]], label %[[IF_END31_I_I_I]], label %[[_ZL3NANPKC_EXIT]]
+// AMDGCNSPIRV:       [[IF_END31_I_I_I]]:
+// AMDGCNSPIRV-NEXT:    [[DOTSINK:%.*]] = phi i64 [ -48, %[[WHILE_BODY_I32_I_I]] ], [ -87, %[[IF_ELSE_I_I_I]] ], [ -55, %[[IF_ELSE17_I_I_I]] ]
 // AMDGCNSPIRV-NEXT:    [[MUL24_I_I_I:%.*]] = shl i64 [[__R_0_I30_I_I7]], 4
 // AMDGCNSPIRV-NEXT:    [[CONV25_I_I_I:%.*]] = zext nneg i8 [[TMP3]] to i64
 // AMDGCNSPIRV-NEXT:    [[ADD26_I_I_I:%.*]] = add i64 [[MUL24_I_I_I]], [[DOTSINK]]
 // AMDGCNSPIRV-NEXT:    [[ADD28_I_I_I]] = add i64 [[ADD26_I_I_I]], [[CONV25_I_I_I]]
 // AMDGCNSPIRV-NEXT:    [[INCDEC_PTR_I36_I_I]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__TAGP_ADDR_0_I29_I_I6]], i64 1
-// AMDGCNSPIRV-NEXT:    [[TMP7]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I36_I_I]], align 1, !tbaa [[TBAA5]]
+// AMDGCNSPIRV-NEXT:    [[TMP7]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I36_I_I]], align 1, !tbaa [[CHAR_TBAA5]]
 // AMDGCNSPIRV-NEXT:    [[CMP_NOT_I31_I_I:%.*]] = icmp eq i8 [[TMP7]], 0
-// AMDGCNSPIRV-NEXT:    br i1 [[CMP_NOT_I31_I_I]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I32_I_I]], !llvm.loop [[LOOP12]]
-// AMDGCNSPIRV:       while.cond.i.i.i:
-// AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_0_I_I_I:%.*]] = phi ptr addrspace(4) [ [[__TAGP_ADDR_1_I_I_I:%.*]], [[WHILE_BODY_I_I_I:%.*]] ], [ [[INCDEC_PTR_I_I]], [[IF_THEN_I_I]] ]
-// AMDGCNSPIRV-NEXT:    [[__R_0_I_I_I:%.*]] = phi i64 [ [[__R_1_I_I_I:%.*]], [[WHILE_BODY_I_I_I]] ], [ 0, [[IF_THEN_I_I]] ]
-// AMDGCNSPIRV-NEXT:    [[TMP8:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I_I_I]], align 1, !tbaa [[TBAA5]]
+// AMDGCNSPIRV-NEXT:    br i1 [[CMP_NOT_I31_I_I]], label %[[_ZL3NANPKC_EXIT]], label %[[WHILE_BODY_I32_I_I]], !llvm.loop [[LOOP12]]
+// AMDGCNSPIRV:       [[WHILE_COND_I_I_I]]:
+// AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_0_I_I_I:%.*]] = phi ptr addrspace(4) [ [[__TAGP_ADDR_1_I_I_I:%.*]], %[[WHILE_BODY_I_I_I:.*]] ], [ [[INCDEC_PTR_I_I]], %[[IF_THEN_I_I]] ]
+// AMDGCNSPIRV-NEXT:    [[__R_0_I_I_I:%.*]] = phi i64 [ [[__R_1_I_I_I:%.*]], %[[WHILE_BODY_I_I_I]] ], [ 0, %[[IF_THEN_I_I]] ]
+// AMDGCNSPIRV-NEXT:    [[TMP8:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I_I_I]], align 1, !tbaa [[CHAR_TBAA5]]
 // AMDGCNSPIRV-NEXT:    [[CMP_NOT_I_I_I:%.*]] = icmp eq i8 [[TMP8]], 0
-// AMDGCNSPIRV-NEXT:    br i1 [[CMP_NOT_I_I_I]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I_I_I]]
-// AMDGCNSPIRV:       while.body.i.i.i:
+// AMDGCNSPIRV-NEXT:    br i1 [[CMP_NOT_I_I_I]], label %[[_ZL3NANPKC_EXIT]], label %[[WHILE_BODY_I_I_I]]
+// AMDGCNSPIRV:       [[WHILE_BODY_I_I_I]]:
 // AMDGCNSPIRV-NEXT:    [[TMP9:%.*]] = and i8 [[TMP8]], -8
 // AMDGCNSPIRV-NEXT:    [[OR_COND_I_I_I:%.*]] = icmp eq i8 [[TMP9]], 48
 // AMDGCNSPIRV-NEXT:    [[MUL_I_I_I:%.*]] = shl i64 [[__R_0_I_I_I]], 3
@@ -4426,14 +4945,14 @@ extern "C" __device__ float test_nanf(const char *tag) {
 // AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_1_I_I_I_IDX:%.*]] = zext i1 [[OR_COND_I_I_I]] to i64
 // AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_1_I_I_I]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__TAGP_ADDR_0_I_I_I]], i64 [[__TAGP_ADDR_1_I_I_I_IDX]]
 // AMDGCNSPIRV-NEXT:    [[__R_1_I_I_I]] = select i1 [[OR_COND_I_I_I]], i64 [[SUB_I_I_I]], i64 [[__R_0_I_I_I]]
-// AMDGCNSPIRV-NEXT:    br i1 [[OR_COND_I_I_I]], label [[WHILE_COND_I_I_I]], label [[_ZL3NANPKC_EXIT]], !llvm.loop [[LOOP8]]
-// AMDGCNSPIRV:       while.cond.i14.i.i:
-// AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_0_I15_I_I:%.*]] = phi ptr addrspace(4) [ [[__TAGP_ADDR_1_I25_I_I:%.*]], [[WHILE_BODY_I18_I_I:%.*]] ], [ [[TAG]], [[ENTRY:%.*]] ]
-// AMDGCNSPIRV-NEXT:    [[__R_0_I16_I_I:%.*]] = phi i64 [ [[__R_1_I26_I_I:%.*]], [[WHILE_BODY_I18_I_I]] ], [ 0, [[ENTRY]] ]
-// AMDGCNSPIRV-NEXT:    [[TMP10:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I15_I_I]], align 1, !tbaa [[TBAA5]]
+// AMDGCNSPIRV-NEXT:    br i1 [[OR_COND_I_I_I]], label %[[WHILE_COND_I_I_I]], label %[[_ZL3NANPKC_EXIT]], !llvm.loop [[LOOP8]]
+// AMDGCNSPIRV:       [[WHILE_COND_I14_I_I]]:
+// AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_0_I15_I_I:%.*]] = phi ptr addrspace(4) [ [[__TAGP_ADDR_1_I25_I_I:%.*]], %[[WHILE_BODY_I18_I_I:.*]] ], [ [[TAG]], %[[ENTRY]] ]
+// AMDGCNSPIRV-NEXT:    [[__R_0_I16_I_I:%.*]] = phi i64 [ [[__R_1_I26_I_I:%.*]], %[[WHILE_BODY_I18_I_I]] ], [ 0, %[[ENTRY]] ]
+// AMDGCNSPIRV-NEXT:    [[TMP10:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I15_I_I]], align 1, !tbaa [[CHAR_TBAA5]]
 // AMDGCNSPIRV-NEXT:    [[CMP_NOT_I17_I_I:%.*]] = icmp eq i8 [[TMP10]], 0
-// AMDGCNSPIRV-NEXT:    br i1 [[CMP_NOT_I17_I_I]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I18_I_I]]
-// AMDGCNSPIRV:       while.body.i18.i.i:
+// AMDGCNSPIRV-NEXT:    br i1 [[CMP_NOT_I17_I_I]], label %[[_ZL3NANPKC_EXIT]], label %[[WHILE_BODY_I18_I_I]]
+// AMDGCNSPIRV:       [[WHILE_BODY_I18_I_I]]:
 // AMDGCNSPIRV-NEXT:    [[TMP11:%.*]] = add i8 [[TMP10]], -48
 // AMDGCNSPIRV-NEXT:    [[OR_COND_I19_I_I:%.*]] = icmp ult i8 [[TMP11]], 10
 // AMDGCNSPIRV-NEXT:    [[MUL_I20_I_I:%.*]] = mul i64 [[__R_0_I16_I_I]], 10
@@ -4443,9 +4962,9 @@ extern "C" __device__ float test_nanf(const char *tag) {
 // AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_1_I25_I_I_IDX:%.*]] = zext i1 [[OR_COND_I19_I_I]] to i64
 // AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_1_I25_I_I]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__TAGP_ADDR_0_I15_I_I]], i64 [[__TAGP_ADDR_1_I25_I_I_IDX]]
 // AMDGCNSPIRV-NEXT:    [[__R_1_I26_I_I]] = select i1 [[OR_COND_I19_I_I]], i64 [[SUB_I23_I_I]], i64 [[__R_0_I16_I_I]]
-// AMDGCNSPIRV-NEXT:    br i1 [[OR_COND_I19_I_I]], label [[WHILE_COND_I14_I_I]], label [[_ZL3NANPKC_EXIT]], !llvm.loop [[LOOP11]]
-// AMDGCNSPIRV:       _ZL3nanPKc.exit:
-// AMDGCNSPIRV-NEXT:    [[RETVAL_0_I_I:%.*]] = phi i64 [ 0, [[IF_THEN5_I_I]] ], [ 0, [[WHILE_BODY_I_I_I]] ], [ [[__R_0_I_I_I]], [[WHILE_COND_I_I_I]] ], [ [[ADD28_I_I_I]], [[IF_END31_I_I_I]] ], [ 0, [[IF_ELSE17_I_I_I]] ], [ 0, [[WHILE_BODY_I18_I_I]] ], [ [[__R_0_I16_I_I]], [[WHILE_COND_I14_I_I]] ]
+// AMDGCNSPIRV-NEXT:    br i1 [[OR_COND_I19_I_I]], label %[[WHILE_COND_I14_I_I]], label %[[_ZL3NANPKC_EXIT]], !llvm.loop [[LOOP11]]
+// AMDGCNSPIRV:       [[_ZL3NANPKC_EXIT]]:
+// AMDGCNSPIRV-NEXT:    [[RETVAL_0_I_I:%.*]] = phi i64 [ 0, %[[IF_THEN5_I_I]] ], [ 0, %[[WHILE_BODY_I_I_I]] ], [ [[__R_0_I_I_I]], %[[WHILE_COND_I_I_I]] ], [ [[ADD28_I_I_I]], %[[IF_END31_I_I_I]] ], [ 0, %[[IF_ELSE17_I_I_I]] ], [ 0, %[[WHILE_BODY_I18_I_I]] ], [ [[__R_0_I16_I_I]], %[[WHILE_COND_I14_I_I]] ]
 // AMDGCNSPIRV-NEXT:    [[BF_VALUE_I:%.*]] = and i64 [[RETVAL_0_I_I]], 2251799813685247
 // AMDGCNSPIRV-NEXT:    [[BF_SET9_I:%.*]] = or disjoint i64 [[BF_VALUE_I]], 9221120237041090560
 // AMDGCNSPIRV-NEXT:    [[TMP12:%.*]] = bitcast i64 [[BF_SET9_I]] to double
@@ -4455,958 +4974,1093 @@ extern "C" __device__ double test_nan(const char *tag) {
   return nan(tag);
 }
 
-// DEFAULT-LABEL: @test_nanf_emptystr(
-// DEFAULT-NEXT:  entry:
+// DEFAULT-LABEL: define dso_local noundef float @test_nanf_emptystr(
+// DEFAULT-SAME: ) local_unnamed_addr #[[ATTR3]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
 // DEFAULT-NEXT:    ret float 0x7FF8000000000000
 //
-// FINITEONLY-LABEL: @test_nanf_emptystr(
-// FINITEONLY-NEXT:  entry:
+// FINITEONLY-LABEL: define dso_local nofpclass(nan inf) float @test_nanf_emptystr(
+// FINITEONLY-SAME: ) local_unnamed_addr #[[ATTR3]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
 // FINITEONLY-NEXT:    ret float poison
 //
-// APPROX-LABEL: @test_nanf_emptystr(
-// APPROX-NEXT:  entry:
+// APPROX-LABEL: define dso_local noundef float @test_nanf_emptystr(
+// APPROX-SAME: ) local_unnamed_addr #[[ATTR3]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
 // APPROX-NEXT:    ret float 0x7FF8000000000000
 //
-// NCRDIV-LABEL: @test_nanf_emptystr(
-// NCRDIV-NEXT:  entry:
+// NCRDIV-LABEL: define dso_local noundef float @test_nanf_emptystr(
+// NCRDIV-SAME: ) local_unnamed_addr #[[ATTR3]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
 // NCRDIV-NEXT:    ret float 0x7FF8000000000000
 //
-// AMDGCNSPIRV-LABEL: @test_nanf_emptystr(
-// AMDGCNSPIRV-NEXT:  entry:
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_nanf_emptystr(
+// AMDGCNSPIRV-SAME: ) local_unnamed_addr addrspace(4) #[[ATTR3]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
 // AMDGCNSPIRV-NEXT:    ret float 0x7FF8000000000000
 //
 extern "C" __device__ float test_nanf_emptystr() {
   return nanf("");
 }
 
-// DEFAULT-LABEL: @test_nan_emptystr(
-// DEFAULT-NEXT:  entry:
+// DEFAULT-LABEL: define dso_local noundef double @test_nan_emptystr(
+// DEFAULT-SAME: ) local_unnamed_addr #[[ATTR3]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
 // DEFAULT-NEXT:    ret double 0x7FF8000000000000
 //
-// FINITEONLY-LABEL: @test_nan_emptystr(
-// FINITEONLY-NEXT:  entry:
+// FINITEONLY-LABEL: define dso_local nofpclass(nan inf) double @test_nan_emptystr(
+// FINITEONLY-SAME: ) local_unnamed_addr #[[ATTR3]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
 // FINITEONLY-NEXT:    ret double poison
 //
-// APPROX-LABEL: @test_nan_emptystr(
-// APPROX-NEXT:  entry:
+// APPROX-LABEL: define dso_local noundef double @test_nan_emptystr(
+// APPROX-SAME: ) local_unnamed_addr #[[ATTR3]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
 // APPROX-NEXT:    ret double 0x7FF8000000000000
 //
-// NCRDIV-LABEL: @test_nan_emptystr(
-// NCRDIV-NEXT:  entry:
+// NCRDIV-LABEL: define dso_local noundef double @test_nan_emptystr(
+// NCRDIV-SAME: ) local_unnamed_addr #[[ATTR3]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
 // NCRDIV-NEXT:    ret double 0x7FF8000000000000
 //
-// AMDGCNSPIRV-LABEL: @test_nan_emptystr(
-// AMDGCNSPIRV-NEXT:  entry:
+// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_nan_emptystr(
+// AMDGCNSPIRV-SAME: ) local_unnamed_addr addrspace(4) #[[ATTR3]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
 // AMDGCNSPIRV-NEXT:    ret double 0x7FF8000000000000
 //
 extern "C" __device__ double test_nan_emptystr() {
   return nan("");
 }
 
-// DEFAULT-LABEL: @test_nanf_fill(
-// DEFAULT-NEXT:  entry:
+// DEFAULT-LABEL: define dso_local noundef float @test_nanf_fill(
+// DEFAULT-SAME: ) local_unnamed_addr #[[ATTR3]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
 // DEFAULT-NEXT:    ret float 0x7FF8000000000000
 //
-// FINITEONLY-LABEL: @test_nanf_fill(
-// FINITEONLY-NEXT:  entry:
+// FINITEONLY-LABEL: define dso_local nofpclass(nan inf) float @test_nanf_fill(
+// FINITEONLY-SAME: ) local_unnamed_addr #[[ATTR3]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
 // FINITEONLY-NEXT:    ret float poison
 //
-// APPROX-LABEL: @test_nanf_fill(
-// APPROX-NEXT:  entry:
+// APPROX-LABEL: define dso_local noundef float @test_nanf_fill(
+// APPROX-SAME: ) local_unnamed_addr #[[ATTR3]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
 // APPROX-NEXT:    ret float 0x7FF8000000000000
 //
-// NCRDIV-LABEL: @test_nanf_fill(
-// NCRDIV-NEXT:  entry:
+// NCRDIV-LABEL: define dso_local noundef float @test_nanf_fill(
+// NCRDIV-SAME: ) local_unnamed_addr #[[ATTR3]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
 // NCRDIV-NEXT:    ret float 0x7FF8000000000000
 //
-// AMDGCNSPIRV-LABEL: @test_nanf_fill(
-// AMDGCNSPIRV-NEXT:  entry:
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_nanf_fill(
+// AMDGCNSPIRV-SAME: ) local_unnamed_addr addrspace(4) #[[ATTR3]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
 // AMDGCNSPIRV-NEXT:    ret float 0x7FF8000000000000
 //
 extern "C" __device__ float test_nanf_fill() {
   return nanf("0x456");
 }
 
-// DEFAULT-LABEL: @test_nan_fill(
-// DEFAULT-NEXT:  entry:
+// DEFAULT-LABEL: define dso_local noundef double @test_nan_fill(
+// DEFAULT-SAME: ) local_unnamed_addr #[[ATTR3]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
 // DEFAULT-NEXT:    ret double 0x7FF8000000000000
 //
-// FINITEONLY-LABEL: @test_nan_fill(
-// FINITEONLY-NEXT:  entry:
+// FINITEONLY-LABEL: define dso_local nofpclass(nan inf) double @test_nan_fill(
+// FINITEONLY-SAME: ) local_unnamed_addr #[[ATTR3]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
 // FINITEONLY-NEXT:    ret double poison
 //
-// APPROX-LABEL: @test_nan_fill(
-// APPROX-NEXT:  entry:
+// APPROX-LABEL: define dso_local noundef double @test_nan_fill(
+// APPROX-SAME: ) local_unnamed_addr #[[ATTR3]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
 // APPROX-NEXT:    ret double 0x7FF8000000000000
 //
-// NCRDIV-LABEL: @test_nan_fill(
-// NCRDIV-NEXT:  entry:
+// NCRDIV-LABEL: define dso_local noundef double @test_nan_fill(
+// NCRDIV-SAME: ) local_unnamed_addr #[[ATTR3]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
 // NCRDIV-NEXT:    ret double 0x7FF8000000000000
 //
-// AMDGCNSPIRV-LABEL: @test_nan_fill(
-// AMDGCNSPIRV-NEXT:  entry:
+// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_nan_fill(
+// AMDGCNSPIRV-SAME: ) local_unnamed_addr addrspace(4) #[[ATTR3]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
 // AMDGCNSPIRV-NEXT:    ret double 0x7FF8000000000000
 //
 extern "C" __device__ double test_nan_fill() {
   return nan("0x123");
 }
 
-// DEFAULT-LABEL: @test_nearbyintf(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.nearbyint.f32(float [[X:%.*]])
+// DEFAULT-LABEL: define dso_local noundef float @test_nearbyintf(
+// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.nearbyint.f32(float [[X]])
 // DEFAULT-NEXT:    ret float [[TMP0]]
 //
-// FINITEONLY-LABEL: @test_nearbyintf(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.nearbyint.f32(float nofpclass(nan inf) [[X:%.*]])
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_nearbyintf(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.nearbyint.f32(float nofpclass(nan inf) [[X]])
 // FINITEONLY-NEXT:    ret float [[TMP0]]
 //
-// APPROX-LABEL: @test_nearbyintf(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.nearbyint.f32(float [[X:%.*]])
+// APPROX-LABEL: define dso_local noundef float @test_nearbyintf(
+// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.nearbyint.f32(float [[X]])
 // APPROX-NEXT:    ret float [[TMP0]]
 //
-// NCRDIV-LABEL: @test_nearbyintf(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.nearbyint.f32(float [[X:%.*]])
+// NCRDIV-LABEL: define dso_local noundef float @test_nearbyintf(
+// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.nearbyint.f32(float [[X]])
 // NCRDIV-NEXT:    ret float [[TMP0]]
 //
-// AMDGCNSPIRV-LABEL: @test_nearbyintf(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.nearbyint.f32(float [[X:%.*]])
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_nearbyintf(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.nearbyint.f32(float [[X]])
 // AMDGCNSPIRV-NEXT:    ret float [[TMP0]]
 //
 extern "C" __device__ float test_nearbyintf(float x) {
   return nearbyintf(x);
 }
 
-// DEFAULT-LABEL: @test_nearbyint(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.nearbyint.f64(double [[X:%.*]])
+// DEFAULT-LABEL: define dso_local noundef double @test_nearbyint(
+// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.nearbyint.f64(double [[X]])
 // DEFAULT-NEXT:    ret double [[TMP0]]
 //
-// FINITEONLY-LABEL: @test_nearbyint(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef double @llvm.nearbyint.f64(double nofpclass(nan inf) [[X:%.*]])
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_nearbyint(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef double @llvm.nearbyint.f64(double nofpclass(nan inf) [[X]])
 // FINITEONLY-NEXT:    ret double [[TMP0]]
 //
-// APPROX-LABEL: @test_nearbyint(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.nearbyint.f64(double [[X:%.*]])
+// APPROX-LABEL: define dso_local noundef double @test_nearbyint(
+// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.nearbyint.f64(double [[X]])
 // APPROX-NEXT:    ret double [[TMP0]]
 //
-// NCRDIV-LABEL: @test_nearbyint(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.nearbyint.f64(double [[X:%.*]])
+// NCRDIV-LABEL: define dso_local noundef double @test_nearbyint(
+// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.nearbyint.f64(double [[X]])
 // NCRDIV-NEXT:    ret double [[TMP0]]
 //
-// AMDGCNSPIRV-LABEL: @test_nearbyint(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.nearbyint.f64(double [[X:%.*]])
+// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_nearbyint(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.nearbyint.f64(double [[X]])
 // AMDGCNSPIRV-NEXT:    ret double [[TMP0]]
 //
 extern "C" __device__ double test_nearbyint(double x) {
   return nearbyint(x);
 }
 
-// DEFAULT-LABEL: @test_nextafterf(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_nextafter_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR14]]
+// DEFAULT-LABEL: define dso_local noundef float @test_nextafterf(
+// DEFAULT-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_nextafter_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_nextafterf(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_nextafter_f32(float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR14]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_nextafterf(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_nextafter_f32(float noundef nofpclass(nan inf) [[X]], float noundef nofpclass(nan inf) [[Y]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
-// APPROX-LABEL: @test_nextafterf(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_nextafter_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR14]]
+// APPROX-LABEL: define dso_local noundef float @test_nextafterf(
+// APPROX-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_nextafter_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR14]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_nextafterf(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_nextafter_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR14]]
+// NCRDIV-LABEL: define dso_local noundef float @test_nextafterf(
+// NCRDIV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_nextafter_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_nextafterf(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_nextafter_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR12]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_nextafterf(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR4]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_nextafter_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR12]]
 // AMDGCNSPIRV-NEXT:    ret float [[CALL_I]]
 //
 extern "C" __device__ float test_nextafterf(float x, float y) {
   return nextafterf(x, y);
 }
 
-// DEFAULT-LABEL: @test_nextafter(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_nextafter_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR14]]
+// DEFAULT-LABEL: define dso_local noundef double @test_nextafter(
+// DEFAULT-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_nextafter_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_nextafter(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_nextafter_f64(double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR14]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_nextafter(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_nextafter_f64(double noundef nofpclass(nan inf) [[X]], double noundef nofpclass(nan inf) [[Y]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
-// APPROX-LABEL: @test_nextafter(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_nextafter_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR14]]
+// APPROX-LABEL: define dso_local noundef double @test_nextafter(
+// APPROX-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_nextafter_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR14]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_nextafter(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_nextafter_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR14]]
+// NCRDIV-LABEL: define dso_local noundef double @test_nextafter(
+// NCRDIV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_nextafter_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_nextafter(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_nextafter_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR12]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_nextafter(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR4]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_nextafter_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR12]]
 // AMDGCNSPIRV-NEXT:    ret double [[CALL_I]]
 //
 extern "C" __device__ double test_nextafter(double x, double y) {
   return nextafter(x, y);
 }
 
-// DEFAULT-LABEL: @test_norm3df(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_len3_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) #[[ATTR14]]
+// DEFAULT-LABEL: define dso_local noundef float @test_norm3df(
+// DEFAULT-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_len3_f32(float noundef [[X]], float noundef [[Y]], float noundef [[Z]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_norm3df(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_len3_f32(float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]], float noundef nofpclass(nan inf) [[Z:%.*]]) #[[ATTR14]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_norm3df(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]], float noundef nofpclass(nan inf) [[Z:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_len3_f32(float noundef nofpclass(nan inf) [[X]], float noundef nofpclass(nan inf) [[Y]], float noundef nofpclass(nan inf) [[Z]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
-// APPROX-LABEL: @test_norm3df(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_len3_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) #[[ATTR14]]
+// APPROX-LABEL: define dso_local noundef float @test_norm3df(
+// APPROX-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_len3_f32(float noundef [[X]], float noundef [[Y]], float noundef [[Z]]) #[[ATTR14]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_norm3df(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_len3_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) #[[ATTR14]]
+// NCRDIV-LABEL: define dso_local noundef float @test_norm3df(
+// NCRDIV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_len3_f32(float noundef [[X]], float noundef [[Y]], float noundef [[Z]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_norm3df(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_len3_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) #[[ATTR12]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_norm3df(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR4]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_len3_f32(float noundef [[X]], float noundef [[Y]], float noundef [[Z]]) #[[ATTR12]]
 // AMDGCNSPIRV-NEXT:    ret float [[CALL_I]]
 //
 extern "C" __device__ float test_norm3df(float x, float y, float z) {
   return norm3df(x, y, z);
 }
 
-// DEFAULT-LABEL: @test_norm3d(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_len3_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) #[[ATTR14]]
+// DEFAULT-LABEL: define dso_local noundef double @test_norm3d(
+// DEFAULT-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_len3_f64(double noundef [[X]], double noundef [[Y]], double noundef [[Z]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_norm3d(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_len3_f64(double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]], double noundef nofpclass(nan inf) [[Z:%.*]]) #[[ATTR14]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_norm3d(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]], double noundef nofpclass(nan inf) [[Z:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_len3_f64(double noundef nofpclass(nan inf) [[X]], double noundef nofpclass(nan inf) [[Y]], double noundef nofpclass(nan inf) [[Z]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
-// APPROX-LABEL: @test_norm3d(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_len3_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) #[[ATTR14]]
+// APPROX-LABEL: define dso_local noundef double @test_norm3d(
+// APPROX-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_len3_f64(double noundef [[X]], double noundef [[Y]], double noundef [[Z]]) #[[ATTR14]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_norm3d(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_len3_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) #[[ATTR14]]
+// NCRDIV-LABEL: define dso_local noundef double @test_norm3d(
+// NCRDIV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_len3_f64(double noundef [[X]], double noundef [[Y]], double noundef [[Z]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_norm3d(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_len3_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) #[[ATTR12]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_norm3d(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR4]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_len3_f64(double noundef [[X]], double noundef [[Y]], double noundef [[Z]]) #[[ATTR12]]
 // AMDGCNSPIRV-NEXT:    ret double [[CALL_I]]
 //
 extern "C" __device__ double test_norm3d(double x, double y, double z) {
   return norm3d(x, y, z);
 }
 
-// DEFAULT-LABEL: @test_norm4df(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_len4_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]], float noundef [[W:%.*]]) #[[ATTR14]]
+// DEFAULT-LABEL: define dso_local noundef float @test_norm4df(
+// DEFAULT-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]], float noundef [[W:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_len4_f32(float noundef [[X]], float noundef [[Y]], float noundef [[Z]], float noundef [[W]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_norm4df(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_len4_f32(float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]], float noundef nofpclass(nan inf) [[Z:%.*]], float noundef nofpclass(nan inf) [[W:%.*]]) #[[ATTR14]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_norm4df(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]], float noundef nofpclass(nan inf) [[Z:%.*]], float noundef nofpclass(nan inf) [[W:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_len4_f32(float noundef nofpclass(nan inf) [[X]], float noundef nofpclass(nan inf) [[Y]], float noundef nofpclass(nan inf) [[Z]], float noundef nofpclass(nan inf) [[W]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
-// APPROX-LABEL: @test_norm4df(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_len4_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]], float noundef [[W:%.*]]) #[[ATTR14]]
+// APPROX-LABEL: define dso_local noundef float @test_norm4df(
+// APPROX-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]], float noundef [[W:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_len4_f32(float noundef [[X]], float noundef [[Y]], float noundef [[Z]], float noundef [[W]]) #[[ATTR14]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_norm4df(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_len4_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]], float noundef [[W:%.*]]) #[[ATTR14]]
+// NCRDIV-LABEL: define dso_local noundef float @test_norm4df(
+// NCRDIV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]], float noundef [[W:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_len4_f32(float noundef [[X]], float noundef [[Y]], float noundef [[Z]], float noundef [[W]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_norm4df(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_len4_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]], float noundef [[W:%.*]]) #[[ATTR12]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_norm4df(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]], float noundef [[W:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR4]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_len4_f32(float noundef [[X]], float noundef [[Y]], float noundef [[Z]], float noundef [[W]]) #[[ATTR12]]
 // AMDGCNSPIRV-NEXT:    ret float [[CALL_I]]
 //
 extern "C" __device__ float test_norm4df(float x, float y, float z, float w) {
   return norm4df(x, y, z, w);
 }
 
-// DEFAULT-LABEL: @test_norm4d(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_len4_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]], double noundef [[W:%.*]]) #[[ATTR14]]
+// DEFAULT-LABEL: define dso_local noundef double @test_norm4d(
+// DEFAULT-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]], double noundef [[W:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_len4_f64(double noundef [[X]], double noundef [[Y]], double noundef [[Z]], double noundef [[W]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_norm4d(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_len4_f64(double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]], double noundef nofpclass(nan inf) [[Z:%.*]], double noundef nofpclass(nan inf) [[W:%.*]]) #[[ATTR14]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_norm4d(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]], double noundef nofpclass(nan inf) [[Z:%.*]], double noundef nofpclass(nan inf) [[W:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_len4_f64(double noundef nofpclass(nan inf) [[X]], double noundef nofpclass(nan inf) [[Y]], double noundef nofpclass(nan inf) [[Z]], double noundef nofpclass(nan inf) [[W]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
-// APPROX-LABEL: @test_norm4d(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_len4_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]], double noundef [[W:%.*]]) #[[ATTR14]]
+// APPROX-LABEL: define dso_local noundef double @test_norm4d(
+// APPROX-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]], double noundef [[W:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_len4_f64(double noundef [[X]], double noundef [[Y]], double noundef [[Z]], double noundef [[W]]) #[[ATTR14]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_norm4d(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_len4_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]], double noundef [[W:%.*]]) #[[ATTR14]]
+// NCRDIV-LABEL: define dso_local noundef double @test_norm4d(
+// NCRDIV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]], double noundef [[W:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_len4_f64(double noundef [[X]], double noundef [[Y]], double noundef [[Z]], double noundef [[W]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_norm4d(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_len4_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]], double noundef [[W:%.*]]) #[[ATTR12]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_norm4d(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]], double noundef [[W:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR4]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_len4_f64(double noundef [[X]], double noundef [[Y]], double noundef [[Z]], double noundef [[W]]) #[[ATTR12]]
 // AMDGCNSPIRV-NEXT:    ret double [[CALL_I]]
 //
 extern "C" __device__ double test_norm4d(double x, double y, double z, double w) {
   return norm4d(x, y, z, w);
 }
 
-// DEFAULT-LABEL: @test_normcdff(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_ncdf_f32(float noundef [[X:%.*]]) #[[ATTR15]]
+// DEFAULT-LABEL: define dso_local noundef float @test_normcdff(
+// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_ncdf_f32(float noundef [[X]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_normcdff(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_ncdf_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_normcdff(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_ncdf_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
-// APPROX-LABEL: @test_normcdff(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_ncdf_f32(float noundef [[X:%.*]]) #[[ATTR15]]
+// APPROX-LABEL: define dso_local noundef float @test_normcdff(
+// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_ncdf_f32(float noundef [[X]]) #[[ATTR15]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_normcdff(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_ncdf_f32(float noundef [[X:%.*]]) #[[ATTR15]]
+// NCRDIV-LABEL: define dso_local noundef float @test_normcdff(
+// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_ncdf_f32(float noundef [[X]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_normcdff(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_ncdf_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_normcdff(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR5]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_ncdf_f32(float noundef [[X]]) #[[ATTR13]]
 // AMDGCNSPIRV-NEXT:    ret float [[CALL_I]]
 //
 extern "C" __device__ float test_normcdff(float x) {
   return normcdff(x);
 }
 
-// DEFAULT-LABEL: @test_normcdf(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_ncdf_f64(double noundef [[X:%.*]]) #[[ATTR15]]
+// DEFAULT-LABEL: define dso_local noundef double @test_normcdf(
+// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_ncdf_f64(double noundef [[X]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_normcdf(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_ncdf_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_normcdf(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_ncdf_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
-// APPROX-LABEL: @test_normcdf(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_ncdf_f64(double noundef [[X:%.*]]) #[[ATTR15]]
+// APPROX-LABEL: define dso_local noundef double @test_normcdf(
+// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_ncdf_f64(double noundef [[X]]) #[[ATTR15]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_normcdf(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_ncdf_f64(double noundef [[X:%.*]]) #[[ATTR15]]
+// NCRDIV-LABEL: define dso_local noundef double @test_normcdf(
+// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_ncdf_f64(double noundef [[X]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_normcdf(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_ncdf_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_normcdf(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR5]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_ncdf_f64(double noundef [[X]]) #[[ATTR13]]
 // AMDGCNSPIRV-NEXT:    ret double [[CALL_I]]
 //
 extern "C" __device__ double test_normcdf(double x) {
   return normcdf(x);
 }
 
-// DEFAULT-LABEL: @test_normcdfinvf(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_ncdfinv_f32(float noundef [[X:%.*]]) #[[ATTR15]]
+// DEFAULT-LABEL: define dso_local noundef float @test_normcdfinvf(
+// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_ncdfinv_f32(float noundef [[X]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_normcdfinvf(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_ncdfinv_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_normcdfinvf(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_ncdfinv_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
-// APPROX-LABEL: @test_normcdfinvf(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_ncdfinv_f32(float noundef [[X:%.*]]) #[[ATTR15]]
+// APPROX-LABEL: define dso_local noundef float @test_normcdfinvf(
+// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_ncdfinv_f32(float noundef [[X]]) #[[ATTR15]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_normcdfinvf(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_ncdfinv_f32(float noundef [[X:%.*]]) #[[ATTR15]]
+// NCRDIV-LABEL: define dso_local noundef float @test_normcdfinvf(
+// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_ncdfinv_f32(float noundef [[X]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_normcdfinvf(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_ncdfinv_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_normcdfinvf(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR5]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_ncdfinv_f32(float noundef [[X]]) #[[ATTR13]]
 // AMDGCNSPIRV-NEXT:    ret float [[CALL_I]]
 //
 extern "C" __device__ float test_normcdfinvf(float x) {
   return normcdfinvf(x);
 }
 
-// DEFAULT-LABEL: @test_normcdfinv(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_ncdfinv_f64(double noundef [[X:%.*]]) #[[ATTR15]]
+// DEFAULT-LABEL: define dso_local noundef double @test_normcdfinv(
+// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_ncdfinv_f64(double noundef [[X]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_normcdfinv(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_ncdfinv_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_normcdfinv(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_ncdfinv_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
-// APPROX-LABEL: @test_normcdfinv(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_ncdfinv_f64(double noundef [[X:%.*]]) #[[ATTR15]]
+// APPROX-LABEL: define dso_local noundef double @test_normcdfinv(
+// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_ncdfinv_f64(double noundef [[X]]) #[[ATTR15]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_normcdfinv(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_ncdfinv_f64(double noundef [[X:%.*]]) #[[ATTR15]]
+// NCRDIV-LABEL: define dso_local noundef double @test_normcdfinv(
+// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_ncdfinv_f64(double noundef [[X]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_normcdfinv(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_ncdfinv_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_normcdfinv(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR5]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_ncdfinv_f64(double noundef [[X]]) #[[ATTR13]]
 // AMDGCNSPIRV-NEXT:    ret double [[CALL_I]]
 //
 extern "C" __device__ double test_normcdfinv(double x) {
   return normcdfinv(x);
 }
 
-// DEFAULT-LABEL: @test_normf(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X:%.*]], 0
-// DEFAULT-NEXT:    br i1 [[TOBOOL_NOT_I1]], label [[_ZL5NORMFIPKF_EXIT:%.*]], label [[WHILE_BODY_I:%.*]]
-// DEFAULT:       while.body.i:
-// DEFAULT-NEXT:    [[__R_0_I4:%.*]] = phi float [ [[ADD_I:%.*]], [[WHILE_BODY_I]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
-// DEFAULT-NEXT:    [[__A_ADDR_0_I3:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], [[WHILE_BODY_I]] ], [ [[Y:%.*]], [[ENTRY]] ]
-// DEFAULT-NEXT:    [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], [[WHILE_BODY_I]] ], [ [[X]], [[ENTRY]] ]
+// DEFAULT-LABEL: define dso_local float @test_normf(
+// DEFAULT-SAME: i32 noundef [[X:%.*]], ptr noundef readonly captures(none) [[Y:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// DEFAULT-NEXT:  [[ENTRY:.*]]:
+// DEFAULT-NEXT:    [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X]], 0
+// DEFAULT-NEXT:    br i1 [[TOBOOL_NOT_I1]], label %[[_ZL5NORMFIPKF_EXIT:.*]], label %[[WHILE_BODY_I:.*]]
+// DEFAULT:       [[WHILE_BODY_I]]:
+// DEFAULT-NEXT:    [[__R_0_I4:%.*]] = phi float [ [[ADD_I:%.*]], %[[WHILE_BODY_I]] ], [ 0.000000e+00, %[[ENTRY]] ]
+// DEFAULT-NEXT:    [[__A_ADDR_0_I3:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], %[[WHILE_BODY_I]] ], [ [[Y]], %[[ENTRY]] ]
+// DEFAULT-NEXT:    [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], %[[WHILE_BODY_I]] ], [ [[X]], %[[ENTRY]] ]
 // DEFAULT-NEXT:    [[DEC_I]] = add nsw i32 [[__DIM_ADDR_0_I2]], -1
-// DEFAULT-NEXT:    [[TMP0:%.*]] = load float, ptr [[__A_ADDR_0_I3]], align 4, !tbaa [[TBAA16]]
+// DEFAULT-NEXT:    [[TMP0:%.*]] = load float, ptr [[__A_ADDR_0_I3]], align 4, !tbaa [[FLOAT_TBAA16]]
 // DEFAULT-NEXT:    [[MUL_I:%.*]] = fmul contract float [[TMP0]], [[TMP0]]
 // DEFAULT-NEXT:    [[ADD_I]] = fadd contract float [[__R_0_I4]], [[MUL_I]]
 // DEFAULT-NEXT:    [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__A_ADDR_0_I3]], i64 4
 // DEFAULT-NEXT:    [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0
-// DEFAULT-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL5NORMFIPKF_EXIT_LOOPEXIT:%.*]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP20:![0-9]+]]
-// DEFAULT:       _ZL5normfiPKf.exit.loopexit:
+// DEFAULT-NEXT:    br i1 [[TOBOOL_NOT_I]], label %[[_ZL5NORMFIPKF_EXIT_LOOPEXIT:.*]], label %[[WHILE_BODY_I]], !llvm.loop [[LOOP20:![0-9]+]]
+// DEFAULT:       [[_ZL5NORMFIPKF_EXIT_LOOPEXIT]]:
 // DEFAULT-NEXT:    [[TMP1:%.*]] = tail call contract float @llvm.sqrt.f32(float [[ADD_I]])
-// DEFAULT-NEXT:    br label [[_ZL5NORMFIPKF_EXIT]]
-// DEFAULT:       _ZL5normfiPKf.exit:
-// DEFAULT-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[TMP1]], [[_ZL5NORMFIPKF_EXIT_LOOPEXIT]] ]
+// DEFAULT-NEXT:    br label %[[_ZL5NORMFIPKF_EXIT]]
+// DEFAULT:       [[_ZL5NORMFIPKF_EXIT]]:
+// DEFAULT-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[TMP1]], %[[_ZL5NORMFIPKF_EXIT_LOOPEXIT]] ]
 // DEFAULT-NEXT:    ret float [[__R_0_I_LCSSA]]
 //
-// FINITEONLY-LABEL: @test_normf(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X:%.*]], 0
-// FINITEONLY-NEXT:    br i1 [[TOBOOL_NOT_I1]], label [[_ZL5NORMFIPKF_EXIT:%.*]], label [[WHILE_BODY_I:%.*]]
-// FINITEONLY:       while.body.i:
-// FINITEONLY-NEXT:    [[__R_0_I4:%.*]] = phi float [ [[ADD_I:%.*]], [[WHILE_BODY_I]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
-// FINITEONLY-NEXT:    [[__A_ADDR_0_I3:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], [[WHILE_BODY_I]] ], [ [[Y:%.*]], [[ENTRY]] ]
-// FINITEONLY-NEXT:    [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], [[WHILE_BODY_I]] ], [ [[X]], [[ENTRY]] ]
+// FINITEONLY-LABEL: define dso_local nofpclass(nan inf) float @test_normf(
+// FINITEONLY-SAME: i32 noundef [[X:%.*]], ptr noundef readonly captures(none) [[Y:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*]]:
+// FINITEONLY-NEXT:    [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X]], 0
+// FINITEONLY-NEXT:    br i1 [[TOBOOL_NOT_I1]], label %[[_ZL5NORMFIPKF_EXIT:.*]], label %[[WHILE_BODY_I:.*]]
+// FINITEONLY:       [[WHILE_BODY_I]]:
+// FINITEONLY-NEXT:    [[__R_0_I4:%.*]] = phi float [ [[ADD_I:%.*]], %[[WHILE_BODY_I]] ], [ 0.000000e+00, %[[ENTRY]] ]
+// FINITEONLY-NEXT:    [[__A_ADDR_0_I3:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], %[[WHILE_BODY_I]] ], [ [[Y]], %[[ENTRY]] ]
+// FINITEONLY-NEXT:    [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], %[[WHILE_BODY_I]] ], [ [[X]], %[[ENTRY]] ]
 // FINITEONLY-NEXT:    [[DEC_I]] = add nsw i32 [[__DIM_ADDR_0_I2]], -1
-// FINITEONLY-NEXT:    [[TMP0:%.*]] = load float, ptr [[__A_ADDR_0_I3]], align 4, !tbaa [[TBAA16]]
+// FINITEONLY-NEXT:    [[TMP0:%.*]] = load float, ptr [[__A_ADDR_0_I3]], align 4, !tbaa [[FLOAT_TBAA16]]
 // FINITEONLY-NEXT:    [[MUL_I:%.*]] = fmul nnan ninf contract float [[TMP0]], [[TMP0]]
 // FINITEONLY-NEXT:    [[ADD_I]] = fadd nnan ninf contract float [[__R_0_I4]], [[MUL_I]]
 // FINITEONLY-NEXT:    [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__A_ADDR_0_I3]], i64 4
 // FINITEONLY-NEXT:    [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0
-// FINITEONLY-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL5NORMFIPKF_EXIT_LOOPEXIT:%.*]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP20:![0-9]+]]
-// FINITEONLY:       _ZL5normfiPKf.exit.loopexit:
+// FINITEONLY-NEXT:    br i1 [[TOBOOL_NOT_I]], label %[[_ZL5NORMFIPKF_EXIT_LOOPEXIT:.*]], label %[[WHILE_BODY_I]], !llvm.loop [[LOOP20:![0-9]+]]
+// FINITEONLY:       [[_ZL5NORMFIPKF_EXIT_LOOPEXIT]]:
 // FINITEONLY-NEXT:    [[TMP1:%.*]] = tail call nnan ninf contract float @llvm.sqrt.f32(float [[ADD_I]])
-// FINITEONLY-NEXT:    br label [[_ZL5NORMFIPKF_EXIT]]
-// FINITEONLY:       _ZL5normfiPKf.exit:
-// FINITEONLY-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[TMP1]], [[_ZL5NORMFIPKF_EXIT_LOOPEXIT]] ]
+// FINITEONLY-NEXT:    br label %[[_ZL5NORMFIPKF_EXIT]]
+// FINITEONLY:       [[_ZL5NORMFIPKF_EXIT]]:
+// FINITEONLY-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[TMP1]], %[[_ZL5NORMFIPKF_EXIT_LOOPEXIT]] ]
 // FINITEONLY-NEXT:    ret float [[__R_0_I_LCSSA]]
 //
-// APPROX-LABEL: @test_normf(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X:%.*]], 0
-// APPROX-NEXT:    br i1 [[TOBOOL_NOT_I1]], label [[_ZL5NORMFIPKF_EXIT:%.*]], label [[WHILE_BODY_I:%.*]]
-// APPROX:       while.body.i:
-// APPROX-NEXT:    [[__R_0_I4:%.*]] = phi float [ [[ADD_I:%.*]], [[WHILE_BODY_I]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
-// APPROX-NEXT:    [[__A_ADDR_0_I3:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], [[WHILE_BODY_I]] ], [ [[Y:%.*]], [[ENTRY]] ]
-// APPROX-NEXT:    [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], [[WHILE_BODY_I]] ], [ [[X]], [[ENTRY]] ]
+// APPROX-LABEL: define dso_local float @test_normf(
+// APPROX-SAME: i32 noundef [[X:%.*]], ptr noundef readonly captures(none) [[Y:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// APPROX-NEXT:  [[ENTRY:.*]]:
+// APPROX-NEXT:    [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X]], 0
+// APPROX-NEXT:    br i1 [[TOBOOL_NOT_I1]], label %[[_ZL5NORMFIPKF_EXIT:.*]], label %[[WHILE_BODY_I:.*]]
+// APPROX:       [[WHILE_BODY_I]]:
+// APPROX-NEXT:    [[__R_0_I4:%.*]] = phi float [ [[ADD_I:%.*]], %[[WHILE_BODY_I]] ], [ 0.000000e+00, %[[ENTRY]] ]
+// APPROX-NEXT:    [[__A_ADDR_0_I3:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], %[[WHILE_BODY_I]] ], [ [[Y]], %[[ENTRY]] ]
+// APPROX-NEXT:    [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], %[[WHILE_BODY_I]] ], [ [[X]], %[[ENTRY]] ]
 // APPROX-NEXT:    [[DEC_I]] = add nsw i32 [[__DIM_ADDR_0_I2]], -1
-// APPROX-NEXT:    [[TMP0:%.*]] = load float, ptr [[__A_ADDR_0_I3]], align 4, !tbaa [[TBAA16]]
+// APPROX-NEXT:    [[TMP0:%.*]] = load float, ptr [[__A_ADDR_0_I3]], align 4, !tbaa [[FLOAT_TBAA16]]
 // APPROX-NEXT:    [[MUL_I:%.*]] = fmul contract float [[TMP0]], [[TMP0]]
 // APPROX-NEXT:    [[ADD_I]] = fadd contract float [[__R_0_I4]], [[MUL_I]]
 // APPROX-NEXT:    [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__A_ADDR_0_I3]], i64 4
 // APPROX-NEXT:    [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0
-// APPROX-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL5NORMFIPKF_EXIT_LOOPEXIT:%.*]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP20:![0-9]+]]
-// APPROX:       _ZL5normfiPKf.exit.loopexit:
+// APPROX-NEXT:    br i1 [[TOBOOL_NOT_I]], label %[[_ZL5NORMFIPKF_EXIT_LOOPEXIT:.*]], label %[[WHILE_BODY_I]], !llvm.loop [[LOOP20:![0-9]+]]
+// APPROX:       [[_ZL5NORMFIPKF_EXIT_LOOPEXIT]]:
 // APPROX-NEXT:    [[TMP1:%.*]] = tail call contract float @llvm.sqrt.f32(float [[ADD_I]])
-// APPROX-NEXT:    br label [[_ZL5NORMFIPKF_EXIT]]
-// APPROX:       _ZL5normfiPKf.exit:
-// APPROX-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[TMP1]], [[_ZL5NORMFIPKF_EXIT_LOOPEXIT]] ]
+// APPROX-NEXT:    br label %[[_ZL5NORMFIPKF_EXIT]]
+// APPROX:       [[_ZL5NORMFIPKF_EXIT]]:
+// APPROX-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[TMP1]], %[[_ZL5NORMFIPKF_EXIT_LOOPEXIT]] ]
 // APPROX-NEXT:    ret float [[__R_0_I_LCSSA]]
 //
-// NCRDIV-LABEL: @test_normf(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X:%.*]], 0
-// NCRDIV-NEXT:    br i1 [[TOBOOL_NOT_I1]], label [[_ZL5NORMFIPKF_EXIT:%.*]], label [[WHILE_BODY_I:%.*]]
-// NCRDIV:       while.body.i:
-// NCRDIV-NEXT:    [[__R_0_I4:%.*]] = phi float [ [[ADD_I:%.*]], [[WHILE_BODY_I]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
-// NCRDIV-NEXT:    [[__A_ADDR_0_I3:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], [[WHILE_BODY_I]] ], [ [[Y:%.*]], [[ENTRY]] ]
-// NCRDIV-NEXT:    [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], [[WHILE_BODY_I]] ], [ [[X]], [[ENTRY]] ]
+// NCRDIV-LABEL: define dso_local float @test_normf(
+// NCRDIV-SAME: i32 noundef [[X:%.*]], ptr noundef readonly captures(none) [[Y:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// NCRDIV-NEXT:  [[ENTRY:.*]]:
+// NCRDIV-NEXT:    [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X]], 0
+// NCRDIV-NEXT:    br i1 [[TOBOOL_NOT_I1]], label %[[_ZL5NORMFIPKF_EXIT:.*]], label %[[WHILE_BODY_I:.*]]
+// NCRDIV:       [[WHILE_BODY_I]]:
+// NCRDIV-NEXT:    [[__R_0_I4:%.*]] = phi float [ [[ADD_I:%.*]], %[[WHILE_BODY_I]] ], [ 0.000000e+00, %[[ENTRY]] ]
+// NCRDIV-NEXT:    [[__A_ADDR_0_I3:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], %[[WHILE_BODY_I]] ], [ [[Y]], %[[ENTRY]] ]
+// NCRDIV-NEXT:    [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], %[[WHILE_BODY_I]] ], [ [[X]], %[[ENTRY]] ]
 // NCRDIV-NEXT:    [[DEC_I]] = add nsw i32 [[__DIM_ADDR_0_I2]], -1
-// NCRDIV-NEXT:    [[TMP0:%.*]] = load float, ptr [[__A_ADDR_0_I3]], align 4, !tbaa [[TBAA17]]
+// NCRDIV-NEXT:    [[TMP0:%.*]] = load float, ptr [[__A_ADDR_0_I3]], align 4, !tbaa [[FLOAT_TBAA17]]
 // NCRDIV-NEXT:    [[MUL_I:%.*]] = fmul contract float [[TMP0]], [[TMP0]]
 // NCRDIV-NEXT:    [[ADD_I]] = fadd contract float [[__R_0_I4]], [[MUL_I]]
 // NCRDIV-NEXT:    [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__A_ADDR_0_I3]], i64 4
 // NCRDIV-NEXT:    [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0
-// NCRDIV-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL5NORMFIPKF_EXIT_LOOPEXIT:%.*]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP21:![0-9]+]]
-// NCRDIV:       _ZL5normfiPKf.exit.loopexit:
+// NCRDIV-NEXT:    br i1 [[TOBOOL_NOT_I]], label %[[_ZL5NORMFIPKF_EXIT_LOOPEXIT:.*]], label %[[WHILE_BODY_I]], !llvm.loop [[LOOP21:![0-9]+]]
+// NCRDIV:       [[_ZL5NORMFIPKF_EXIT_LOOPEXIT]]:
 // NCRDIV-NEXT:    [[TMP1:%.*]] = tail call contract float @llvm.sqrt.f32(float [[ADD_I]])
-// NCRDIV-NEXT:    br label [[_ZL5NORMFIPKF_EXIT]]
-// NCRDIV:       _ZL5normfiPKf.exit:
-// NCRDIV-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[TMP1]], [[_ZL5NORMFIPKF_EXIT_LOOPEXIT]] ]
+// NCRDIV-NEXT:    br label %[[_ZL5NORMFIPKF_EXIT]]
+// NCRDIV:       [[_ZL5NORMFIPKF_EXIT]]:
+// NCRDIV-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[TMP1]], %[[_ZL5NORMFIPKF_EXIT_LOOPEXIT]] ]
 // NCRDIV-NEXT:    ret float [[__R_0_I_LCSSA]]
 //
-// AMDGCNSPIRV-LABEL: @test_normf(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X:%.*]], 0
-// AMDGCNSPIRV-NEXT:    br i1 [[TOBOOL_NOT_I1]], label [[_ZL5NORMFIPKF_EXIT:%.*]], label [[WHILE_BODY_I:%.*]]
-// AMDGCNSPIRV:       while.body.i:
-// AMDGCNSPIRV-NEXT:    [[__R_0_I4:%.*]] = phi float [ [[ADD_I:%.*]], [[WHILE_BODY_I]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
-// AMDGCNSPIRV-NEXT:    [[__A_ADDR_0_I3:%.*]] = phi ptr addrspace(4) [ [[INCDEC_PTR_I:%.*]], [[WHILE_BODY_I]] ], [ [[Y:%.*]], [[ENTRY]] ]
-// AMDGCNSPIRV-NEXT:    [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], [[WHILE_BODY_I]] ], [ [[X]], [[ENTRY]] ]
+// AMDGCNSPIRV-LABEL: define spir_func float @test_normf(
+// AMDGCNSPIRV-SAME: i32 noundef [[X:%.*]], ptr addrspace(4) noundef readonly captures(none) [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR2]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*]]:
+// AMDGCNSPIRV-NEXT:    [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X]], 0
+// AMDGCNSPIRV-NEXT:    br i1 [[TOBOOL_NOT_I1]], label %[[_ZL5NORMFIPKF_EXIT:.*]], label %[[WHILE_BODY_I:.*]]
+// AMDGCNSPIRV:       [[WHILE_BODY_I]]:
+// AMDGCNSPIRV-NEXT:    [[__R_0_I4:%.*]] = phi float [ [[ADD_I:%.*]], %[[WHILE_BODY_I]] ], [ 0.000000e+00, %[[ENTRY]] ]
+// AMDGCNSPIRV-NEXT:    [[__A_ADDR_0_I3:%.*]] = phi ptr addrspace(4) [ [[INCDEC_PTR_I:%.*]], %[[WHILE_BODY_I]] ], [ [[Y]], %[[ENTRY]] ]
+// AMDGCNSPIRV-NEXT:    [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], %[[WHILE_BODY_I]] ], [ [[X]], %[[ENTRY]] ]
 // AMDGCNSPIRV-NEXT:    [[DEC_I]] = add nsw i32 [[__DIM_ADDR_0_I2]], -1
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = load float, ptr addrspace(4) [[__A_ADDR_0_I3]], align 4, !tbaa [[TBAA17]]
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = load float, ptr addrspace(4) [[__A_ADDR_0_I3]], align 4, !tbaa [[FLOAT_TBAA17]]
 // AMDGCNSPIRV-NEXT:    [[MUL_I:%.*]] = fmul contract float [[TMP0]], [[TMP0]]
 // AMDGCNSPIRV-NEXT:    [[ADD_I]] = fadd contract float [[__R_0_I4]], [[MUL_I]]
 // AMDGCNSPIRV-NEXT:    [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__A_ADDR_0_I3]], i64 4
 // AMDGCNSPIRV-NEXT:    [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0
-// AMDGCNSPIRV-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL5NORMFIPKF_EXIT_LOOPEXIT:%.*]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP21:![0-9]+]]
-// AMDGCNSPIRV:       _ZL5normfiPKf.exit.loopexit:
+// AMDGCNSPIRV-NEXT:    br i1 [[TOBOOL_NOT_I]], label %[[_ZL5NORMFIPKF_EXIT_LOOPEXIT:.*]], label %[[WHILE_BODY_I]], !llvm.loop [[LOOP21:![0-9]+]]
+// AMDGCNSPIRV:       [[_ZL5NORMFIPKF_EXIT_LOOPEXIT]]:
 // AMDGCNSPIRV-NEXT:    [[TMP1:%.*]] = tail call contract addrspace(4) float @llvm.sqrt.f32(float [[ADD_I]])
-// AMDGCNSPIRV-NEXT:    br label [[_ZL5NORMFIPKF_EXIT]]
-// AMDGCNSPIRV:       _ZL5normfiPKf.exit:
-// AMDGCNSPIRV-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[TMP1]], [[_ZL5NORMFIPKF_EXIT_LOOPEXIT]] ]
+// AMDGCNSPIRV-NEXT:    br label %[[_ZL5NORMFIPKF_EXIT]]
+// AMDGCNSPIRV:       [[_ZL5NORMFIPKF_EXIT]]:
+// AMDGCNSPIRV-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[TMP1]], %[[_ZL5NORMFIPKF_EXIT_LOOPEXIT]] ]
 // AMDGCNSPIRV-NEXT:    ret float [[__R_0_I_LCSSA]]
 //
 extern "C" __device__ float test_normf(int x, const float *y) {
   return normf(x, y);
 }
 
-// DEFAULT-LABEL: @test_norm(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X:%.*]], 0
-// DEFAULT-NEXT:    br i1 [[TOBOOL_NOT_I1]], label [[_ZL4NORMIPKD_EXIT:%.*]], label [[WHILE_BODY_I:%.*]]
-// DEFAULT:       while.body.i:
-// DEFAULT-NEXT:    [[__R_0_I4:%.*]] = phi double [ [[ADD_I:%.*]], [[WHILE_BODY_I]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
-// DEFAULT-NEXT:    [[__A_ADDR_0_I3:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], [[WHILE_BODY_I]] ], [ [[Y:%.*]], [[ENTRY]] ]
-// DEFAULT-NEXT:    [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], [[WHILE_BODY_I]] ], [ [[X]], [[ENTRY]] ]
+// DEFAULT-LABEL: define dso_local double @test_norm(
+// DEFAULT-SAME: i32 noundef [[X:%.*]], ptr noundef readonly captures(none) [[Y:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// DEFAULT-NEXT:  [[ENTRY:.*]]:
+// DEFAULT-NEXT:    [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X]], 0
+// DEFAULT-NEXT:    br i1 [[TOBOOL_NOT_I1]], label %[[_ZL4NORMIPKD_EXIT:.*]], label %[[WHILE_BODY_I:.*]]
+// DEFAULT:       [[WHILE_BODY_I]]:
+// DEFAULT-NEXT:    [[__R_0_I4:%.*]] = phi double [ [[ADD_I:%.*]], %[[WHILE_BODY_I]] ], [ 0.000000e+00, %[[ENTRY]] ]
+// DEFAULT-NEXT:    [[__A_ADDR_0_I3:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], %[[WHILE_BODY_I]] ], [ [[Y]], %[[ENTRY]] ]
+// DEFAULT-NEXT:    [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], %[[WHILE_BODY_I]] ], [ [[X]], %[[ENTRY]] ]
 // DEFAULT-NEXT:    [[DEC_I]] = add nsw i32 [[__DIM_ADDR_0_I2]], -1
-// DEFAULT-NEXT:    [[TMP0:%.*]] = load double, ptr [[__A_ADDR_0_I3]], align 8, !tbaa [[TBAA18]]
+// DEFAULT-NEXT:    [[TMP0:%.*]] = load double, ptr [[__A_ADDR_0_I3]], align 8, !tbaa [[DOUBLE_TBAA18]]
 // DEFAULT-NEXT:    [[MUL_I:%.*]] = fmul contract double [[TMP0]], [[TMP0]]
 // DEFAULT-NEXT:    [[ADD_I]] = fadd contract double [[__R_0_I4]], [[MUL_I]]
 // DEFAULT-NEXT:    [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__A_ADDR_0_I3]], i64 8
 // DEFAULT-NEXT:    [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0
-// DEFAULT-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL4NORMIPKD_EXIT_LOOPEXIT:%.*]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP21:![0-9]+]]
-// DEFAULT:       _ZL4normiPKd.exit.loopexit:
+// DEFAULT-NEXT:    br i1 [[TOBOOL_NOT_I]], label %[[_ZL4NORMIPKD_EXIT_LOOPEXIT:.*]], label %[[WHILE_BODY_I]], !llvm.loop [[LOOP21:![0-9]+]]
+// DEFAULT:       [[_ZL4NORMIPKD_EXIT_LOOPEXIT]]:
 // DEFAULT-NEXT:    [[TMP1:%.*]] = tail call contract double @llvm.sqrt.f64(double [[ADD_I]])
-// DEFAULT-NEXT:    br label [[_ZL4NORMIPKD_EXIT]]
-// DEFAULT:       _ZL4normiPKd.exit:
-// DEFAULT-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[TMP1]], [[_ZL4NORMIPKD_EXIT_LOOPEXIT]] ]
+// DEFAULT-NEXT:    br label %[[_ZL4NORMIPKD_EXIT]]
+// DEFAULT:       [[_ZL4NORMIPKD_EXIT]]:
+// DEFAULT-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[TMP1]], %[[_ZL4NORMIPKD_EXIT_LOOPEXIT]] ]
 // DEFAULT-NEXT:    ret double [[__R_0_I_LCSSA]]
 //
-// FINITEONLY-LABEL: @test_norm(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X:%.*]], 0
-// FINITEONLY-NEXT:    br i1 [[TOBOOL_NOT_I1]], label [[_ZL4NORMIPKD_EXIT:%.*]], label [[WHILE_BODY_I:%.*]]
-// FINITEONLY:       while.body.i:
-// FINITEONLY-NEXT:    [[__R_0_I4:%.*]] = phi double [ [[ADD_I:%.*]], [[WHILE_BODY_I]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
-// FINITEONLY-NEXT:    [[__A_ADDR_0_I3:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], [[WHILE_BODY_I]] ], [ [[Y:%.*]], [[ENTRY]] ]
-// FINITEONLY-NEXT:    [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], [[WHILE_BODY_I]] ], [ [[X]], [[ENTRY]] ]
+// FINITEONLY-LABEL: define dso_local nofpclass(nan inf) double @test_norm(
+// FINITEONLY-SAME: i32 noundef [[X:%.*]], ptr noundef readonly captures(none) [[Y:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*]]:
+// FINITEONLY-NEXT:    [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X]], 0
+// FINITEONLY-NEXT:    br i1 [[TOBOOL_NOT_I1]], label %[[_ZL4NORMIPKD_EXIT:.*]], label %[[WHILE_BODY_I:.*]]
+// FINITEONLY:       [[WHILE_BODY_I]]:
+// FINITEONLY-NEXT:    [[__R_0_I4:%.*]] = phi double [ [[ADD_I:%.*]], %[[WHILE_BODY_I]] ], [ 0.000000e+00, %[[ENTRY]] ]
+// FINITEONLY-NEXT:    [[__A_ADDR_0_I3:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], %[[WHILE_BODY_I]] ], [ [[Y]], %[[ENTRY]] ]
+// FINITEONLY-NEXT:    [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], %[[WHILE_BODY_I]] ], [ [[X]], %[[ENTRY]] ]
 // FINITEONLY-NEXT:    [[DEC_I]] = add nsw i32 [[__DIM_ADDR_0_I2]], -1
-// FINITEONLY-NEXT:    [[TMP0:%.*]] = load double, ptr [[__A_ADDR_0_I3]], align 8, !tbaa [[TBAA18]]
+// FINITEONLY-NEXT:    [[TMP0:%.*]] = load double, ptr [[__A_ADDR_0_I3]], align 8, !tbaa [[DOUBLE_TBAA18]]
 // FINITEONLY-NEXT:    [[MUL_I:%.*]] = fmul nnan ninf contract double [[TMP0]], [[TMP0]]
 // FINITEONLY-NEXT:    [[ADD_I]] = fadd nnan ninf contract double [[__R_0_I4]], [[MUL_I]]
 // FINITEONLY-NEXT:    [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__A_ADDR_0_I3]], i64 8
 // FINITEONLY-NEXT:    [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0
-// FINITEONLY-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL4NORMIPKD_EXIT_LOOPEXIT:%.*]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP21:![0-9]+]]
-// FINITEONLY:       _ZL4normiPKd.exit.loopexit:
+// FINITEONLY-NEXT:    br i1 [[TOBOOL_NOT_I]], label %[[_ZL4NORMIPKD_EXIT_LOOPEXIT:.*]], label %[[WHILE_BODY_I]], !llvm.loop [[LOOP21:![0-9]+]]
+// FINITEONLY:       [[_ZL4NORMIPKD_EXIT_LOOPEXIT]]:
 // FINITEONLY-NEXT:    [[TMP1:%.*]] = tail call nnan ninf contract double @llvm.sqrt.f64(double [[ADD_I]])
-// FINITEONLY-NEXT:    br label [[_ZL4NORMIPKD_EXIT]]
-// FINITEONLY:       _ZL4normiPKd.exit:
-// FINITEONLY-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[TMP1]], [[_ZL4NORMIPKD_EXIT_LOOPEXIT]] ]
+// FINITEONLY-NEXT:    br label %[[_ZL4NORMIPKD_EXIT]]
+// FINITEONLY:       [[_ZL4NORMIPKD_EXIT]]:
+// FINITEONLY-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[TMP1]], %[[_ZL4NORMIPKD_EXIT_LOOPEXIT]] ]
 // FINITEONLY-NEXT:    ret double [[__R_0_I_LCSSA]]
 //
-// APPROX-LABEL: @test_norm(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X:%.*]], 0
-// APPROX-NEXT:    br i1 [[TOBOOL_NOT_I1]], label [[_ZL4NORMIPKD_EXIT:%.*]], label [[WHILE_BODY_I:%.*]]
-// APPROX:       while.body.i:
-// APPROX-NEXT:    [[__R_0_I4:%.*]] = phi double [ [[ADD_I:%.*]], [[WHILE_BODY_I]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
-// APPROX-NEXT:    [[__A_ADDR_0_I3:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], [[WHILE_BODY_I]] ], [ [[Y:%.*]], [[ENTRY]] ]
-// APPROX-NEXT:    [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], [[WHILE_BODY_I]] ], [ [[X]], [[ENTRY]] ]
+// APPROX-LABEL: define dso_local double @test_norm(
+// APPROX-SAME: i32 noundef [[X:%.*]], ptr noundef readonly captures(none) [[Y:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// APPROX-NEXT:  [[ENTRY:.*]]:
+// APPROX-NEXT:    [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X]], 0
+// APPROX-NEXT:    br i1 [[TOBOOL_NOT_I1]], label %[[_ZL4NORMIPKD_EXIT:.*]], label %[[WHILE_BODY_I:.*]]
+// APPROX:       [[WHILE_BODY_I]]:
+// APPROX-NEXT:    [[__R_0_I4:%.*]] = phi double [ [[ADD_I:%.*]], %[[WHILE_BODY_I]] ], [ 0.000000e+00, %[[ENTRY]] ]
+// APPROX-NEXT:    [[__A_ADDR_0_I3:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], %[[WHILE_BODY_I]] ], [ [[Y]], %[[ENTRY]] ]
+// APPROX-NEXT:    [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], %[[WHILE_BODY_I]] ], [ [[X]], %[[ENTRY]] ]
 // APPROX-NEXT:    [[DEC_I]] = add nsw i32 [[__DIM_ADDR_0_I2]], -1
-// APPROX-NEXT:    [[TMP0:%.*]] = load double, ptr [[__A_ADDR_0_I3]], align 8, !tbaa [[TBAA18]]
+// APPROX-NEXT:    [[TMP0:%.*]] = load double, ptr [[__A_ADDR_0_I3]], align 8, !tbaa [[DOUBLE_TBAA18]]
 // APPROX-NEXT:    [[MUL_I:%.*]] = fmul contract double [[TMP0]], [[TMP0]]
 // APPROX-NEXT:    [[ADD_I]] = fadd contract double [[__R_0_I4]], [[MUL_I]]
 // APPROX-NEXT:    [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__A_ADDR_0_I3]], i64 8
 // APPROX-NEXT:    [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0
-// APPROX-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL4NORMIPKD_EXIT_LOOPEXIT:%.*]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP21:![0-9]+]]
-// APPROX:       _ZL4normiPKd.exit.loopexit:
+// APPROX-NEXT:    br i1 [[TOBOOL_NOT_I]], label %[[_ZL4NORMIPKD_EXIT_LOOPEXIT:.*]], label %[[WHILE_BODY_I]], !llvm.loop [[LOOP21:![0-9]+]]
+// APPROX:       [[_ZL4NORMIPKD_EXIT_LOOPEXIT]]:
 // APPROX-NEXT:    [[TMP1:%.*]] = tail call contract double @llvm.sqrt.f64(double [[ADD_I]])
-// APPROX-NEXT:    br label [[_ZL4NORMIPKD_EXIT]]
-// APPROX:       _ZL4normiPKd.exit:
-// APPROX-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[TMP1]], [[_ZL4NORMIPKD_EXIT_LOOPEXIT]] ]
+// APPROX-NEXT:    br label %[[_ZL4NORMIPKD_EXIT]]
+// APPROX:       [[_ZL4NORMIPKD_EXIT]]:
+// APPROX-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[TMP1]], %[[_ZL4NORMIPKD_EXIT_LOOPEXIT]] ]
 // APPROX-NEXT:    ret double [[__R_0_I_LCSSA]]
 //
-// NCRDIV-LABEL: @test_norm(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X:%.*]], 0
-// NCRDIV-NEXT:    br i1 [[TOBOOL_NOT_I1]], label [[_ZL4NORMIPKD_EXIT:%.*]], label [[WHILE_BODY_I:%.*]]
-// NCRDIV:       while.body.i:
-// NCRDIV-NEXT:    [[__R_0_I4:%.*]] = phi double [ [[ADD_I:%.*]], [[WHILE_BODY_I]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
-// NCRDIV-NEXT:    [[__A_ADDR_0_I3:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], [[WHILE_BODY_I]] ], [ [[Y:%.*]], [[ENTRY]] ]
-// NCRDIV-NEXT:    [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], [[WHILE_BODY_I]] ], [ [[X]], [[ENTRY]] ]
+// NCRDIV-LABEL: define dso_local double @test_norm(
+// NCRDIV-SAME: i32 noundef [[X:%.*]], ptr noundef readonly captures(none) [[Y:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// NCRDIV-NEXT:  [[ENTRY:.*]]:
+// NCRDIV-NEXT:    [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X]], 0
+// NCRDIV-NEXT:    br i1 [[TOBOOL_NOT_I1]], label %[[_ZL4NORMIPKD_EXIT:.*]], label %[[WHILE_BODY_I:.*]]
+// NCRDIV:       [[WHILE_BODY_I]]:
+// NCRDIV-NEXT:    [[__R_0_I4:%.*]] = phi double [ [[ADD_I:%.*]], %[[WHILE_BODY_I]] ], [ 0.000000e+00, %[[ENTRY]] ]
+// NCRDIV-NEXT:    [[__A_ADDR_0_I3:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], %[[WHILE_BODY_I]] ], [ [[Y]], %[[ENTRY]] ]
+// NCRDIV-NEXT:    [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], %[[WHILE_BODY_I]] ], [ [[X]], %[[ENTRY]] ]
 // NCRDIV-NEXT:    [[DEC_I]] = add nsw i32 [[__DIM_ADDR_0_I2]], -1
-// NCRDIV-NEXT:    [[TMP0:%.*]] = load double, ptr [[__A_ADDR_0_I3]], align 8, !tbaa [[TBAA19]]
+// NCRDIV-NEXT:    [[TMP0:%.*]] = load double, ptr [[__A_ADDR_0_I3]], align 8, !tbaa [[DOUBLE_TBAA19]]
 // NCRDIV-NEXT:    [[MUL_I:%.*]] = fmul contract double [[TMP0]], [[TMP0]]
 // NCRDIV-NEXT:    [[ADD_I]] = fadd contract double [[__R_0_I4]], [[MUL_I]]
 // NCRDIV-NEXT:    [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__A_ADDR_0_I3]], i64 8
 // NCRDIV-NEXT:    [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0
-// NCRDIV-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL4NORMIPKD_EXIT_LOOPEXIT:%.*]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP22:![0-9]+]]
-// NCRDIV:       _ZL4normiPKd.exit.loopexit:
+// NCRDIV-NEXT:    br i1 [[TOBOOL_NOT_I]], label %[[_ZL4NORMIPKD_EXIT_LOOPEXIT:.*]], label %[[WHILE_BODY_I]], !llvm.loop [[LOOP22:![0-9]+]]
+// NCRDIV:       [[_ZL4NORMIPKD_EXIT_LOOPEXIT]]:
 // NCRDIV-NEXT:    [[TMP1:%.*]] = tail call contract double @llvm.sqrt.f64(double [[ADD_I]])
-// NCRDIV-NEXT:    br label [[_ZL4NORMIPKD_EXIT]]
-// NCRDIV:       _ZL4normiPKd.exit:
-// NCRDIV-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[TMP1]], [[_ZL4NORMIPKD_EXIT_LOOPEXIT]] ]
+// NCRDIV-NEXT:    br label %[[_ZL4NORMIPKD_EXIT]]
+// NCRDIV:       [[_ZL4NORMIPKD_EXIT]]:
+// NCRDIV-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[TMP1]], %[[_ZL4NORMIPKD_EXIT_LOOPEXIT]] ]
 // NCRDIV-NEXT:    ret double [[__R_0_I_LCSSA]]
 //
-// AMDGCNSPIRV-LABEL: @test_norm(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X:%.*]], 0
-// AMDGCNSPIRV-NEXT:    br i1 [[TOBOOL_NOT_I1]], label [[_ZL4NORMIPKD_EXIT:%.*]], label [[WHILE_BODY_I:%.*]]
-// AMDGCNSPIRV:       while.body.i:
-// AMDGCNSPIRV-NEXT:    [[__R_0_I4:%.*]] = phi double [ [[ADD_I:%.*]], [[WHILE_BODY_I]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
-// AMDGCNSPIRV-NEXT:    [[__A_ADDR_0_I3:%.*]] = phi ptr addrspace(4) [ [[INCDEC_PTR_I:%.*]], [[WHILE_BODY_I]] ], [ [[Y:%.*]], [[ENTRY]] ]
-// AMDGCNSPIRV-NEXT:    [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], [[WHILE_BODY_I]] ], [ [[X]], [[ENTRY]] ]
+// AMDGCNSPIRV-LABEL: define spir_func double @test_norm(
+// AMDGCNSPIRV-SAME: i32 noundef [[X:%.*]], ptr addrspace(4) noundef readonly captures(none) [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR2]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*]]:
+// AMDGCNSPIRV-NEXT:    [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X]], 0
+// AMDGCNSPIRV-NEXT:    br i1 [[TOBOOL_NOT_I1]], label %[[_ZL4NORMIPKD_EXIT:.*]], label %[[WHILE_BODY_I:.*]]
+// AMDGCNSPIRV:       [[WHILE_BODY_I]]:
+// AMDGCNSPIRV-NEXT:    [[__R_0_I4:%.*]] = phi double [ [[ADD_I:%.*]], %[[WHILE_BODY_I]] ], [ 0.000000e+00, %[[ENTRY]] ]
+// AMDGCNSPIRV-NEXT:    [[__A_ADDR_0_I3:%.*]] = phi ptr addrspace(4) [ [[INCDEC_PTR_I:%.*]], %[[WHILE_BODY_I]] ], [ [[Y]], %[[ENTRY]] ]
+// AMDGCNSPIRV-NEXT:    [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], %[[WHILE_BODY_I]] ], [ [[X]], %[[ENTRY]] ]
 // AMDGCNSPIRV-NEXT:    [[DEC_I]] = add nsw i32 [[__DIM_ADDR_0_I2]], -1
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = load double, ptr addrspace(4) [[__A_ADDR_0_I3]], align 8, !tbaa [[TBAA19]]
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = load double, ptr addrspace(4) [[__A_ADDR_0_I3]], align 8, !tbaa [[DOUBLE_TBAA19]]
 // AMDGCNSPIRV-NEXT:    [[MUL_I:%.*]] = fmul contract double [[TMP0]], [[TMP0]]
 // AMDGCNSPIRV-NEXT:    [[ADD_I]] = fadd contract double [[__R_0_I4]], [[MUL_I]]
 // AMDGCNSPIRV-NEXT:    [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__A_ADDR_0_I3]], i64 8
 // AMDGCNSPIRV-NEXT:    [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0
-// AMDGCNSPIRV-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL4NORMIPKD_EXIT_LOOPEXIT:%.*]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP22:![0-9]+]]
-// AMDGCNSPIRV:       _ZL4normiPKd.exit.loopexit:
+// AMDGCNSPIRV-NEXT:    br i1 [[TOBOOL_NOT_I]], label %[[_ZL4NORMIPKD_EXIT_LOOPEXIT:.*]], label %[[WHILE_BODY_I]], !llvm.loop [[LOOP22:![0-9]+]]
+// AMDGCNSPIRV:       [[_ZL4NORMIPKD_EXIT_LOOPEXIT]]:
 // AMDGCNSPIRV-NEXT:    [[TMP1:%.*]] = tail call contract addrspace(4) double @llvm.sqrt.f64(double [[ADD_I]])
-// AMDGCNSPIRV-NEXT:    br label [[_ZL4NORMIPKD_EXIT]]
-// AMDGCNSPIRV:       _ZL4normiPKd.exit:
-// AMDGCNSPIRV-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[TMP1]], [[_ZL4NORMIPKD_EXIT_LOOPEXIT]] ]
+// AMDGCNSPIRV-NEXT:    br label %[[_ZL4NORMIPKD_EXIT]]
+// AMDGCNSPIRV:       [[_ZL4NORMIPKD_EXIT]]:
+// AMDGCNSPIRV-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[TMP1]], %[[_ZL4NORMIPKD_EXIT_LOOPEXIT]] ]
 // AMDGCNSPIRV-NEXT:    ret double [[__R_0_I_LCSSA]]
 //
 extern "C" __device__ double test_norm(int x, const double *y) {
   return norm(x, y);
 }
 
-// DEFAULT-LABEL: @test_powf(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pow_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR15]]
+// DEFAULT-LABEL: define dso_local noundef float @test_powf(
+// DEFAULT-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pow_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_powf(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_pow_f32(float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR15]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_powf(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_pow_f32(float noundef nofpclass(nan inf) [[X]], float noundef nofpclass(nan inf) [[Y]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
-// APPROX-LABEL: @test_powf(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pow_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR15]]
+// APPROX-LABEL: define dso_local noundef float @test_powf(
+// APPROX-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pow_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR15]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_powf(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pow_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR15]]
+// NCRDIV-LABEL: define dso_local noundef float @test_powf(
+// NCRDIV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pow_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_powf(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_pow_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR13]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_powf(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR5]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_pow_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR13]]
 // AMDGCNSPIRV-NEXT:    ret float [[CALL_I]]
 //
 extern "C" __device__ float test_powf(float x, float y) {
   return powf(x, y);
 }
 
-// DEFAULT-LABEL: @test_pow(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_pow_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR15]]
+// DEFAULT-LABEL: define dso_local noundef double @test_pow(
+// DEFAULT-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_pow_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_pow(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_pow_f64(double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR15]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_pow(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_pow_f64(double noundef nofpclass(nan inf) [[X]], double noundef nofpclass(nan inf) [[Y]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
-// APPROX-LABEL: @test_pow(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_pow_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR15]]
+// APPROX-LABEL: define dso_local noundef double @test_pow(
+// APPROX-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_pow_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR15]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_pow(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_pow_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR15]]
+// NCRDIV-LABEL: define dso_local noundef double @test_pow(
+// NCRDIV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_pow_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_pow(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_pow_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR13]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_pow(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR5]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_pow_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR13]]
 // AMDGCNSPIRV-NEXT:    ret double [[CALL_I]]
 //
 extern "C" __device__ double test_pow(double x, double y) {
   return pow(x, y);
 }
 
-// DEFAULT-LABEL: @test_powif(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pown_f32(float noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR15]]
+// DEFAULT-LABEL: define dso_local noundef float @test_powif(
+// DEFAULT-SAME: float noundef [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pown_f32(float noundef [[X]], i32 noundef [[Y]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_powif(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_pown_f32(float noundef nofpclass(nan inf) [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR15]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_powif(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_pown_f32(float noundef nofpclass(nan inf) [[X]], i32 noundef [[Y]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
-// APPROX-LABEL: @test_powif(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pown_f32(float noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR15]]
+// APPROX-LABEL: define dso_local noundef float @test_powif(
+// APPROX-SAME: float noundef [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pown_f32(float noundef [[X]], i32 noundef [[Y]]) #[[ATTR15]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_powif(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pown_f32(float noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR15]]
+// NCRDIV-LABEL: define dso_local noundef float @test_powif(
+// NCRDIV-SAME: float noundef [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pown_f32(float noundef [[X]], i32 noundef [[Y]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_powif(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_pown_f32(float noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR13]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_powif(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR5]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_pown_f32(float noundef [[X]], i32 noundef [[Y]]) #[[ATTR13]]
 // AMDGCNSPIRV-NEXT:    ret float [[CALL_I]]
 //
 extern "C" __device__ float test_powif(float x, int y) {
   return powif(x, y);
 }
 
-// DEFAULT-LABEL: @test_powi(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_pown_f64(double noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR15]]
+// DEFAULT-LABEL: define dso_local noundef double @test_powi(
+// DEFAULT-SAME: double noundef [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_pown_f64(double noundef [[X]], i32 noundef [[Y]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_powi(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_pown_f64(double noundef nofpclass(nan inf) [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR15]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_powi(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_pown_f64(double noundef nofpclass(nan inf) [[X]], i32 noundef [[Y]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
-// APPROX-LABEL: @test_powi(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_pown_f64(double noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR15]]
+// APPROX-LABEL: define dso_local noundef double @test_powi(
+// APPROX-SAME: double noundef [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_pown_f64(double noundef [[X]], i32 noundef [[Y]]) #[[ATTR15]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_powi(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_pown_f64(double noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR15]]
+// NCRDIV-LABEL: define dso_local noundef double @test_powi(
+// NCRDIV-SAME: double noundef [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_pown_f64(double noundef [[X]], i32 noundef [[Y]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_powi(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_pown_f64(double noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR13]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_powi(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR5]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_pown_f64(double noundef [[X]], i32 noundef [[Y]]) #[[ATTR13]]
 // AMDGCNSPIRV-NEXT:    ret double [[CALL_I]]
 //
 extern "C" __device__ double test_powi(double x, int y) {
   return powi(x, y);
 }
 
-// DEFAULT-LABEL: @test_rcbrtf(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rcbrt_f32(float noundef [[X:%.*]]) #[[ATTR15]]
+// DEFAULT-LABEL: define dso_local noundef float @test_rcbrtf(
+// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rcbrt_f32(float noundef [[X]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_rcbrtf(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_rcbrt_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_rcbrtf(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_rcbrt_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
-// APPROX-LABEL: @test_rcbrtf(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rcbrt_f32(float noundef [[X:%.*]]) #[[ATTR15]]
+// APPROX-LABEL: define dso_local noundef float @test_rcbrtf(
+// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rcbrt_f32(float noundef [[X]]) #[[ATTR15]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_rcbrtf(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rcbrt_f32(float noundef [[X:%.*]]) #[[ATTR15]]
+// NCRDIV-LABEL: define dso_local noundef float @test_rcbrtf(
+// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rcbrt_f32(float noundef [[X]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_rcbrtf(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_rcbrt_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_rcbrtf(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR5]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_rcbrt_f32(float noundef [[X]]) #[[ATTR13]]
 // AMDGCNSPIRV-NEXT:    ret float [[CALL_I]]
 //
 extern "C" __device__ float test_rcbrtf(float x) {
   return rcbrtf(x);
 }
 
-// DEFAULT-LABEL: @test_rcbrt(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rcbrt_f64(double noundef [[X:%.*]]) #[[ATTR15]]
+// DEFAULT-LABEL: define dso_local noundef double @test_rcbrt(
+// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rcbrt_f64(double noundef [[X]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_rcbrt(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_rcbrt_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_rcbrt(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_rcbrt_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
-// APPROX-LABEL: @test_rcbrt(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rcbrt_f64(double noundef [[X:%.*]]) #[[ATTR15]]
+// APPROX-LABEL: define dso_local noundef double @test_rcbrt(
+// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rcbrt_f64(double noundef [[X]]) #[[ATTR15]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_rcbrt(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rcbrt_f64(double noundef [[X:%.*]]) #[[ATTR15]]
+// NCRDIV-LABEL: define dso_local noundef double @test_rcbrt(
+// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rcbrt_f64(double noundef [[X]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_rcbrt(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_rcbrt_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_rcbrt(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR5]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_rcbrt_f64(double noundef [[X]]) #[[ATTR13]]
 // AMDGCNSPIRV-NEXT:    ret double [[CALL_I]]
 //
 extern "C" __device__ double test_rcbrt(double x) {
   return rcbrt(x);
 }
 
-// DEFAULT-LABEL: @test_remainderf(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_remainder_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR14]]
+// DEFAULT-LABEL: define dso_local noundef float @test_remainderf(
+// DEFAULT-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_remainder_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_remainderf(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_remainder_f32(float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR14]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_remainderf(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_remainder_f32(float noundef nofpclass(nan inf) [[X]], float noundef nofpclass(nan inf) [[Y]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
-// APPROX-LABEL: @test_remainderf(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_remainder_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR14]]
+// APPROX-LABEL: define dso_local noundef float @test_remainderf(
+// APPROX-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_remainder_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR14]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_remainderf(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_remainder_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR14]]
+// NCRDIV-LABEL: define dso_local noundef float @test_remainderf(
+// NCRDIV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_remainder_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_remainderf(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_remainder_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR12]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_remainderf(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR4]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_remainder_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR12]]
 // AMDGCNSPIRV-NEXT:    ret float [[CALL_I]]
 //
 extern "C" __device__ float test_remainderf(float x, float y) {
   return remainderf(x, y);
 }
 
-// DEFAULT-LABEL: @test_remainder(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_remainder_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR14]]
+// DEFAULT-LABEL: define dso_local noundef double @test_remainder(
+// DEFAULT-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_remainder_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_remainder(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_remainder_f64(double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR14]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_remainder(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_remainder_f64(double noundef nofpclass(nan inf) [[X]], double noundef nofpclass(nan inf) [[Y]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
-// APPROX-LABEL: @test_remainder(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_remainder_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR14]]
+// APPROX-LABEL: define dso_local noundef double @test_remainder(
+// APPROX-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_remainder_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR14]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_remainder(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_remainder_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR14]]
+// NCRDIV-LABEL: define dso_local noundef double @test_remainder(
+// NCRDIV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_remainder_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_remainder(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_remainder_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR12]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_remainder(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR4]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_remainder_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR12]]
 // AMDGCNSPIRV-NEXT:    ret double [[CALL_I]]
 //
 extern "C" __device__ double test_remainder(double x, double y) {
   return remainder(x, y);
 }
 
-// DEFAULT-LABEL: @test_remquof(
-// DEFAULT-NEXT:  entry:
+// DEFAULT-LABEL: define dso_local noundef float @test_remquof(
+// DEFAULT-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Z:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
 // DEFAULT-NEXT:    [[__TMP_I:%.*]] = alloca i32, align 4, addrspace(5)
 // DEFAULT-NEXT:    call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = call contract noundef float @__ocml_remquo_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
-// DEFAULT-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[TBAA12]]
-// DEFAULT-NEXT:    store i32 [[TMP0]], ptr [[Z:%.*]], align 4, !tbaa [[TBAA12]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = call contract noundef float @__ocml_remquo_f32(float noundef [[X]], float noundef [[Y]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
+// DEFAULT-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[INT_TBAA12]]
+// DEFAULT-NEXT:    store i32 [[TMP0]], ptr [[Z]], align 4, !tbaa [[INT_TBAA12]]
 // DEFAULT-NEXT:    call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_remquof(
-// FINITEONLY-NEXT:  entry:
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_remquof(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Z:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
 // FINITEONLY-NEXT:    [[__TMP_I:%.*]] = alloca i32, align 4, addrspace(5)
 // FINITEONLY-NEXT:    call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_remquo_f32(float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
-// FINITEONLY-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[TBAA12]]
-// FINITEONLY-NEXT:    store i32 [[TMP0]], ptr [[Z:%.*]], align 4, !tbaa [[TBAA12]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_remquo_f32(float noundef nofpclass(nan inf) [[X]], float noundef nofpclass(nan inf) [[Y]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
+// FINITEONLY-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[INT_TBAA12]]
+// FINITEONLY-NEXT:    store i32 [[TMP0]], ptr [[Z]], align 4, !tbaa [[INT_TBAA12]]
 // FINITEONLY-NEXT:    call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
-// APPROX-LABEL: @test_remquof(
-// APPROX-NEXT:  entry:
+// APPROX-LABEL: define dso_local noundef float @test_remquof(
+// APPROX-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Z:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
 // APPROX-NEXT:    [[__TMP_I:%.*]] = alloca i32, align 4, addrspace(5)
 // APPROX-NEXT:    call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
-// APPROX-NEXT:    [[CALL_I:%.*]] = call contract noundef float @__ocml_remquo_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
-// APPROX-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[TBAA12]]
-// APPROX-NEXT:    store i32 [[TMP0]], ptr [[Z:%.*]], align 4, !tbaa [[TBAA12]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = call contract noundef float @__ocml_remquo_f32(float noundef [[X]], float noundef [[Y]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
+// APPROX-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[INT_TBAA12]]
+// APPROX-NEXT:    store i32 [[TMP0]], ptr [[Z]], align 4, !tbaa [[INT_TBAA12]]
 // APPROX-NEXT:    call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_remquof(
-// NCRDIV-NEXT:  entry:
+// NCRDIV-LABEL: define dso_local noundef float @test_remquof(
+// NCRDIV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Z:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
 // NCRDIV-NEXT:    [[__TMP_I:%.*]] = alloca i32, align 4, addrspace(5)
 // NCRDIV-NEXT:    call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = call contract noundef float @__ocml_remquo_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
-// NCRDIV-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[TBAA13]]
-// NCRDIV-NEXT:    store i32 [[TMP0]], ptr [[Z:%.*]], align 4, !tbaa [[TBAA13]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = call contract noundef float @__ocml_remquo_f32(float noundef [[X]], float noundef [[Y]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
+// NCRDIV-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[INT_TBAA13]]
+// NCRDIV-NEXT:    store i32 [[TMP0]], ptr [[Z]], align 4, !tbaa [[INT_TBAA13]]
 // NCRDIV-NEXT:    call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_remquof(
-// AMDGCNSPIRV-NEXT:  entry:
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_remquof(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]], ptr addrspace(4) noundef writeonly captures(none) initializes((0, 4)) [[Z:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR6]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
 // AMDGCNSPIRV-NEXT:    [[__TMP_I:%.*]] = alloca i32, align 4
 // AMDGCNSPIRV-NEXT:    [[__TMP_ASCAST_I:%.*]] = addrspacecast ptr [[__TMP_I]] to ptr addrspace(4)
 // AMDGCNSPIRV-NEXT:    call addrspace(4) void @llvm.lifetime.start.p0(ptr nonnull [[__TMP_I]]) #[[ATTR15]]
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = call contract spir_func noundef addrspace(4) float @__ocml_remquo_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], ptr noundef nonnull [[__TMP_I]]) #[[ATTR14]]
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(4) [[__TMP_ASCAST_I]], align 4, !tbaa [[TBAA13]]
-// AMDGCNSPIRV-NEXT:    store i32 [[TMP0]], ptr addrspace(4) [[Z:%.*]], align 4, !tbaa [[TBAA13]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = call contract spir_func noundef addrspace(4) float @__ocml_remquo_f32(float noundef [[X]], float noundef [[Y]], ptr noundef nonnull [[__TMP_I]]) #[[ATTR14]]
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(4) [[__TMP_ASCAST_I]], align 4, !tbaa [[INT_TBAA13]]
+// AMDGCNSPIRV-NEXT:    store i32 [[TMP0]], ptr addrspace(4) [[Z]], align 4, !tbaa [[INT_TBAA13]]
 // AMDGCNSPIRV-NEXT:    call addrspace(4) void @llvm.lifetime.end.p0(ptr nonnull [[__TMP_I]]) #[[ATTR15]]
 // AMDGCNSPIRV-NEXT:    ret float [[CALL_I]]
 //
@@ -5414,54 +6068,59 @@ extern "C" __device__ float test_remquof(float x, float y, int* z) {
   return remquof(x, y, z);
 }
 
-// DEFAULT-LABEL: @test_remquo(
-// DEFAULT-NEXT:  entry:
+// DEFAULT-LABEL: define dso_local noundef double @test_remquo(
+// DEFAULT-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Z:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
 // DEFAULT-NEXT:    [[__TMP_I:%.*]] = alloca i32, align 4, addrspace(5)
 // DEFAULT-NEXT:    call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = call contract noundef double @__ocml_remquo_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
-// DEFAULT-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[TBAA12]]
-// DEFAULT-NEXT:    store i32 [[TMP0]], ptr [[Z:%.*]], align 4, !tbaa [[TBAA12]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = call contract noundef double @__ocml_remquo_f64(double noundef [[X]], double noundef [[Y]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
+// DEFAULT-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[INT_TBAA12]]
+// DEFAULT-NEXT:    store i32 [[TMP0]], ptr [[Z]], align 4, !tbaa [[INT_TBAA12]]
 // DEFAULT-NEXT:    call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_remquo(
-// FINITEONLY-NEXT:  entry:
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_remquo(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Z:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
 // FINITEONLY-NEXT:    [[__TMP_I:%.*]] = alloca i32, align 4, addrspace(5)
 // FINITEONLY-NEXT:    call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_remquo_f64(double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
-// FINITEONLY-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[TBAA12]]
-// FINITEONLY-NEXT:    store i32 [[TMP0]], ptr [[Z:%.*]], align 4, !tbaa [[TBAA12]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_remquo_f64(double noundef nofpclass(nan inf) [[X]], double noundef nofpclass(nan inf) [[Y]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
+// FINITEONLY-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[INT_TBAA12]]
+// FINITEONLY-NEXT:    store i32 [[TMP0]], ptr [[Z]], align 4, !tbaa [[INT_TBAA12]]
 // FINITEONLY-NEXT:    call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
-// APPROX-LABEL: @test_remquo(
-// APPROX-NEXT:  entry:
+// APPROX-LABEL: define dso_local noundef double @test_remquo(
+// APPROX-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Z:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
 // APPROX-NEXT:    [[__TMP_I:%.*]] = alloca i32, align 4, addrspace(5)
 // APPROX-NEXT:    call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
-// APPROX-NEXT:    [[CALL_I:%.*]] = call contract noundef double @__ocml_remquo_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
-// APPROX-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[TBAA12]]
-// APPROX-NEXT:    store i32 [[TMP0]], ptr [[Z:%.*]], align 4, !tbaa [[TBAA12]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = call contract noundef double @__ocml_remquo_f64(double noundef [[X]], double noundef [[Y]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
+// APPROX-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[INT_TBAA12]]
+// APPROX-NEXT:    store i32 [[TMP0]], ptr [[Z]], align 4, !tbaa [[INT_TBAA12]]
 // APPROX-NEXT:    call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_remquo(
-// NCRDIV-NEXT:  entry:
+// NCRDIV-LABEL: define dso_local noundef double @test_remquo(
+// NCRDIV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Z:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
 // NCRDIV-NEXT:    [[__TMP_I:%.*]] = alloca i32, align 4, addrspace(5)
 // NCRDIV-NEXT:    call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = call contract noundef double @__ocml_remquo_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
-// NCRDIV-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[TBAA13]]
-// NCRDIV-NEXT:    store i32 [[TMP0]], ptr [[Z:%.*]], align 4, !tbaa [[TBAA13]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = call contract noundef double @__ocml_remquo_f64(double noundef [[X]], double noundef [[Y]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
+// NCRDIV-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[INT_TBAA13]]
+// NCRDIV-NEXT:    store i32 [[TMP0]], ptr [[Z]], align 4, !tbaa [[INT_TBAA13]]
 // NCRDIV-NEXT:    call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_remquo(
-// AMDGCNSPIRV-NEXT:  entry:
+// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_remquo(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]], ptr addrspace(4) noundef writeonly captures(none) initializes((0, 4)) [[Z:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR6]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
 // AMDGCNSPIRV-NEXT:    [[__TMP_I:%.*]] = alloca i32, align 4
 // AMDGCNSPIRV-NEXT:    [[__TMP_ASCAST_I:%.*]] = addrspacecast ptr [[__TMP_I]] to ptr addrspace(4)
 // AMDGCNSPIRV-NEXT:    call addrspace(4) void @llvm.lifetime.start.p0(ptr nonnull [[__TMP_I]]) #[[ATTR15]]
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = call contract spir_func noundef addrspace(4) double @__ocml_remquo_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], ptr noundef nonnull [[__TMP_I]]) #[[ATTR14]]
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(4) [[__TMP_ASCAST_I]], align 4, !tbaa [[TBAA13]]
-// AMDGCNSPIRV-NEXT:    store i32 [[TMP0]], ptr addrspace(4) [[Z:%.*]], align 4, !tbaa [[TBAA13]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = call contract spir_func noundef addrspace(4) double @__ocml_remquo_f64(double noundef [[X]], double noundef [[Y]], ptr noundef nonnull [[__TMP_I]]) #[[ATTR14]]
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(4) [[__TMP_ASCAST_I]], align 4, !tbaa [[INT_TBAA13]]
+// AMDGCNSPIRV-NEXT:    store i32 [[TMP0]], ptr addrspace(4) [[Z]], align 4, !tbaa [[INT_TBAA13]]
 // AMDGCNSPIRV-NEXT:    call addrspace(4) void @llvm.lifetime.end.p0(ptr nonnull [[__TMP_I]]) #[[ATTR15]]
 // AMDGCNSPIRV-NEXT:    ret double [[CALL_I]]
 //
@@ -5469,219 +6128,244 @@ extern "C" __device__ double test_remquo(double x, double y, int* z) {
   return remquo(x, y, z);
 }
 
-// DEFAULT-LABEL: @test_rhypotf(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rhypot_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR14]]
+// DEFAULT-LABEL: define dso_local noundef float @test_rhypotf(
+// DEFAULT-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rhypot_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_rhypotf(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_rhypot_f32(float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR14]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_rhypotf(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_rhypot_f32(float noundef nofpclass(nan inf) [[X]], float noundef nofpclass(nan inf) [[Y]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
-// APPROX-LABEL: @test_rhypotf(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rhypot_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR14]]
+// APPROX-LABEL: define dso_local noundef float @test_rhypotf(
+// APPROX-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rhypot_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR14]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_rhypotf(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rhypot_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR14]]
+// NCRDIV-LABEL: define dso_local noundef float @test_rhypotf(
+// NCRDIV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rhypot_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_rhypotf(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_rhypot_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR12]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_rhypotf(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR4]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_rhypot_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR12]]
 // AMDGCNSPIRV-NEXT:    ret float [[CALL_I]]
 //
 extern "C" __device__ float test_rhypotf(float x, float y) {
   return rhypotf(x, y);
 }
 
-// DEFAULT-LABEL: @test_rhypot(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rhypot_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR14]]
+// DEFAULT-LABEL: define dso_local noundef double @test_rhypot(
+// DEFAULT-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rhypot_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_rhypot(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_rhypot_f64(double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR14]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_rhypot(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_rhypot_f64(double noundef nofpclass(nan inf) [[X]], double noundef nofpclass(nan inf) [[Y]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
-// APPROX-LABEL: @test_rhypot(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rhypot_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR14]]
+// APPROX-LABEL: define dso_local noundef double @test_rhypot(
+// APPROX-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rhypot_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR14]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_rhypot(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rhypot_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR14]]
+// NCRDIV-LABEL: define dso_local noundef double @test_rhypot(
+// NCRDIV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rhypot_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_rhypot(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_rhypot_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR12]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_rhypot(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR4]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_rhypot_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR12]]
 // AMDGCNSPIRV-NEXT:    ret double [[CALL_I]]
 //
 extern "C" __device__ double test_rhypot(double x, double y) {
   return rhypot(x, y);
 }
 
-// DEFAULT-LABEL: @test_rintf(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.rint.f32(float [[X:%.*]])
+// DEFAULT-LABEL: define dso_local noundef float @test_rintf(
+// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.rint.f32(float [[X]])
 // DEFAULT-NEXT:    ret float [[TMP0]]
 //
-// FINITEONLY-LABEL: @test_rintf(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.rint.f32(float nofpclass(nan inf) [[X:%.*]])
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_rintf(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.rint.f32(float nofpclass(nan inf) [[X]])
 // FINITEONLY-NEXT:    ret float [[TMP0]]
 //
-// APPROX-LABEL: @test_rintf(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.rint.f32(float [[X:%.*]])
+// APPROX-LABEL: define dso_local noundef float @test_rintf(
+// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.rint.f32(float [[X]])
 // APPROX-NEXT:    ret float [[TMP0]]
 //
-// NCRDIV-LABEL: @test_rintf(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.rint.f32(float [[X:%.*]])
+// NCRDIV-LABEL: define dso_local noundef float @test_rintf(
+// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.rint.f32(float [[X]])
 // NCRDIV-NEXT:    ret float [[TMP0]]
 //
-// AMDGCNSPIRV-LABEL: @test_rintf(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.rint.f32(float [[X:%.*]])
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_rintf(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.rint.f32(float [[X]])
 // AMDGCNSPIRV-NEXT:    ret float [[TMP0]]
 //
 extern "C" __device__ float test_rintf(float x) {
   return rintf(x);
 }
 
-// DEFAULT-LABEL: @test_rint(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.rint.f64(double [[X:%.*]])
+// DEFAULT-LABEL: define dso_local noundef double @test_rint(
+// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.rint.f64(double [[X]])
 // DEFAULT-NEXT:    ret double [[TMP0]]
 //
-// FINITEONLY-LABEL: @test_rint(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef double @llvm.rint.f64(double nofpclass(nan inf) [[X:%.*]])
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_rint(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef double @llvm.rint.f64(double nofpclass(nan inf) [[X]])
 // FINITEONLY-NEXT:    ret double [[TMP0]]
 //
-// APPROX-LABEL: @test_rint(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.rint.f64(double [[X:%.*]])
+// APPROX-LABEL: define dso_local noundef double @test_rint(
+// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.rint.f64(double [[X]])
 // APPROX-NEXT:    ret double [[TMP0]]
 //
-// NCRDIV-LABEL: @test_rint(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.rint.f64(double [[X:%.*]])
+// NCRDIV-LABEL: define dso_local noundef double @test_rint(
+// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.rint.f64(double [[X]])
 // NCRDIV-NEXT:    ret double [[TMP0]]
 //
-// AMDGCNSPIRV-LABEL: @test_rint(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.rint.f64(double [[X:%.*]])
+// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_rint(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.rint.f64(double [[X]])
 // AMDGCNSPIRV-NEXT:    ret double [[TMP0]]
 //
 extern "C" __device__ double test_rint(double x) {
   return rint(x);
 }
 
-// DEFAULT-LABEL: @test_rnormf(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X:%.*]], 0
-// DEFAULT-NEXT:    br i1 [[TOBOOL_NOT_I1]], label [[_ZL6RNORMFIPKF_EXIT:%.*]], label [[WHILE_BODY_I:%.*]]
-// DEFAULT:       while.body.i:
-// DEFAULT-NEXT:    [[__R_0_I4:%.*]] = phi float [ [[ADD_I:%.*]], [[WHILE_BODY_I]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
-// DEFAULT-NEXT:    [[__A_ADDR_0_I3:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], [[WHILE_BODY_I]] ], [ [[Y:%.*]], [[ENTRY]] ]
-// DEFAULT-NEXT:    [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], [[WHILE_BODY_I]] ], [ [[X]], [[ENTRY]] ]
+// DEFAULT-LABEL: define dso_local noundef float @test_rnormf(
+// DEFAULT-SAME: i32 noundef [[X:%.*]], ptr noundef readonly captures(none) [[Y:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// DEFAULT-NEXT:  [[ENTRY:.*]]:
+// DEFAULT-NEXT:    [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X]], 0
+// DEFAULT-NEXT:    br i1 [[TOBOOL_NOT_I1]], label %[[_ZL6RNORMFIPKF_EXIT:.*]], label %[[WHILE_BODY_I:.*]]
+// DEFAULT:       [[WHILE_BODY_I]]:
+// DEFAULT-NEXT:    [[__R_0_I4:%.*]] = phi float [ [[ADD_I:%.*]], %[[WHILE_BODY_I]] ], [ 0.000000e+00, %[[ENTRY]] ]
+// DEFAULT-NEXT:    [[__A_ADDR_0_I3:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], %[[WHILE_BODY_I]] ], [ [[Y]], %[[ENTRY]] ]
+// DEFAULT-NEXT:    [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], %[[WHILE_BODY_I]] ], [ [[X]], %[[ENTRY]] ]
 // DEFAULT-NEXT:    [[DEC_I]] = add nsw i32 [[__DIM_ADDR_0_I2]], -1
-// DEFAULT-NEXT:    [[TMP0:%.*]] = load float, ptr [[__A_ADDR_0_I3]], align 4, !tbaa [[TBAA16]]
+// DEFAULT-NEXT:    [[TMP0:%.*]] = load float, ptr [[__A_ADDR_0_I3]], align 4, !tbaa [[FLOAT_TBAA16]]
 // DEFAULT-NEXT:    [[MUL_I:%.*]] = fmul contract float [[TMP0]], [[TMP0]]
 // DEFAULT-NEXT:    [[ADD_I]] = fadd contract float [[__R_0_I4]], [[MUL_I]]
 // DEFAULT-NEXT:    [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__A_ADDR_0_I3]], i64 4
 // DEFAULT-NEXT:    [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0
-// DEFAULT-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL6RNORMFIPKF_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP22:![0-9]+]]
-// DEFAULT:       _ZL6rnormfiPKf.exit:
-// DEFAULT-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[ADD_I]], [[WHILE_BODY_I]] ]
+// DEFAULT-NEXT:    br i1 [[TOBOOL_NOT_I]], label %[[_ZL6RNORMFIPKF_EXIT]], label %[[WHILE_BODY_I]], !llvm.loop [[LOOP22:![0-9]+]]
+// DEFAULT:       [[_ZL6RNORMFIPKF_EXIT]]:
+// DEFAULT-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[ADD_I]], %[[WHILE_BODY_I]] ]
 // DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rsqrt_f32(float noundef [[__R_0_I_LCSSA]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_rnormf(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X:%.*]], 0
-// FINITEONLY-NEXT:    br i1 [[TOBOOL_NOT_I1]], label [[_ZL6RNORMFIPKF_EXIT:%.*]], label [[WHILE_BODY_I:%.*]]
-// FINITEONLY:       while.body.i:
-// FINITEONLY-NEXT:    [[__R_0_I4:%.*]] = phi float [ [[ADD_I:%.*]], [[WHILE_BODY_I]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
-// FINITEONLY-NEXT:    [[__A_ADDR_0_I3:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], [[WHILE_BODY_I]] ], [ [[Y:%.*]], [[ENTRY]] ]
-// FINITEONLY-NEXT:    [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], [[WHILE_BODY_I]] ], [ [[X]], [[ENTRY]] ]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_rnormf(
+// FINITEONLY-SAME: i32 noundef [[X:%.*]], ptr noundef readonly captures(none) [[Y:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*]]:
+// FINITEONLY-NEXT:    [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X]], 0
+// FINITEONLY-NEXT:    br i1 [[TOBOOL_NOT_I1]], label %[[_ZL6RNORMFIPKF_EXIT:.*]], label %[[WHILE_BODY_I:.*]]
+// FINITEONLY:       [[WHILE_BODY_I]]:
+// FINITEONLY-NEXT:    [[__R_0_I4:%.*]] = phi float [ [[ADD_I:%.*]], %[[WHILE_BODY_I]] ], [ 0.000000e+00, %[[ENTRY]] ]
+// FINITEONLY-NEXT:    [[__A_ADDR_0_I3:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], %[[WHILE_BODY_I]] ], [ [[Y]], %[[ENTRY]] ]
+// FINITEONLY-NEXT:    [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], %[[WHILE_BODY_I]] ], [ [[X]], %[[ENTRY]] ]
 // FINITEONLY-NEXT:    [[DEC_I]] = add nsw i32 [[__DIM_ADDR_0_I2]], -1
-// FINITEONLY-NEXT:    [[TMP0:%.*]] = load float, ptr [[__A_ADDR_0_I3]], align 4, !tbaa [[TBAA16]]
+// FINITEONLY-NEXT:    [[TMP0:%.*]] = load float, ptr [[__A_ADDR_0_I3]], align 4, !tbaa [[FLOAT_TBAA16]]
 // FINITEONLY-NEXT:    [[MUL_I:%.*]] = fmul nnan ninf contract float [[TMP0]], [[TMP0]]
 // FINITEONLY-NEXT:    [[ADD_I]] = fadd nnan ninf contract float [[__R_0_I4]], [[MUL_I]]
 // FINITEONLY-NEXT:    [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__A_ADDR_0_I3]], i64 4
 // FINITEONLY-NEXT:    [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0
-// FINITEONLY-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL6RNORMFIPKF_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP22:![0-9]+]]
-// FINITEONLY:       _ZL6rnormfiPKf.exit:
-// FINITEONLY-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[ADD_I]], [[WHILE_BODY_I]] ]
+// FINITEONLY-NEXT:    br i1 [[TOBOOL_NOT_I]], label %[[_ZL6RNORMFIPKF_EXIT]], label %[[WHILE_BODY_I]], !llvm.loop [[LOOP22:![0-9]+]]
+// FINITEONLY:       [[_ZL6RNORMFIPKF_EXIT]]:
+// FINITEONLY-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[ADD_I]], %[[WHILE_BODY_I]] ]
 // FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_rsqrt_f32(float noundef nofpclass(nan inf) [[__R_0_I_LCSSA]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
-// APPROX-LABEL: @test_rnormf(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X:%.*]], 0
-// APPROX-NEXT:    br i1 [[TOBOOL_NOT_I1]], label [[_ZL6RNORMFIPKF_EXIT:%.*]], label [[WHILE_BODY_I:%.*]]
-// APPROX:       while.body.i:
-// APPROX-NEXT:    [[__R_0_I4:%.*]] = phi float [ [[ADD_I:%.*]], [[WHILE_BODY_I]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
-// APPROX-NEXT:    [[__A_ADDR_0_I3:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], [[WHILE_BODY_I]] ], [ [[Y:%.*]], [[ENTRY]] ]
-// APPROX-NEXT:    [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], [[WHILE_BODY_I]] ], [ [[X]], [[ENTRY]] ]
+// APPROX-LABEL: define dso_local noundef float @test_rnormf(
+// APPROX-SAME: i32 noundef [[X:%.*]], ptr noundef readonly captures(none) [[Y:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// APPROX-NEXT:  [[ENTRY:.*]]:
+// APPROX-NEXT:    [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X]], 0
+// APPROX-NEXT:    br i1 [[TOBOOL_NOT_I1]], label %[[_ZL6RNORMFIPKF_EXIT:.*]], label %[[WHILE_BODY_I:.*]]
+// APPROX:       [[WHILE_BODY_I]]:
+// APPROX-NEXT:    [[__R_0_I4:%.*]] = phi float [ [[ADD_I:%.*]], %[[WHILE_BODY_I]] ], [ 0.000000e+00, %[[ENTRY]] ]
+// APPROX-NEXT:    [[__A_ADDR_0_I3:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], %[[WHILE_BODY_I]] ], [ [[Y]], %[[ENTRY]] ]
+// APPROX-NEXT:    [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], %[[WHILE_BODY_I]] ], [ [[X]], %[[ENTRY]] ]
 // APPROX-NEXT:    [[DEC_I]] = add nsw i32 [[__DIM_ADDR_0_I2]], -1
-// APPROX-NEXT:    [[TMP0:%.*]] = load float, ptr [[__A_ADDR_0_I3]], align 4, !tbaa [[TBAA16]]
+// APPROX-NEXT:    [[TMP0:%.*]] = load float, ptr [[__A_ADDR_0_I3]], align 4, !tbaa [[FLOAT_TBAA16]]
 // APPROX-NEXT:    [[MUL_I:%.*]] = fmul contract float [[TMP0]], [[TMP0]]
 // APPROX-NEXT:    [[ADD_I]] = fadd contract float [[__R_0_I4]], [[MUL_I]]
 // APPROX-NEXT:    [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__A_ADDR_0_I3]], i64 4
 // APPROX-NEXT:    [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0
-// APPROX-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL6RNORMFIPKF_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP22:![0-9]+]]
-// APPROX:       _ZL6rnormfiPKf.exit:
-// APPROX-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[ADD_I]], [[WHILE_BODY_I]] ]
+// APPROX-NEXT:    br i1 [[TOBOOL_NOT_I]], label %[[_ZL6RNORMFIPKF_EXIT]], label %[[WHILE_BODY_I]], !llvm.loop [[LOOP22:![0-9]+]]
+// APPROX:       [[_ZL6RNORMFIPKF_EXIT]]:
+// APPROX-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[ADD_I]], %[[WHILE_BODY_I]] ]
 // APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rsqrt_f32(float noundef [[__R_0_I_LCSSA]]) #[[ATTR15]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_rnormf(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X:%.*]], 0
-// NCRDIV-NEXT:    br i1 [[TOBOOL_NOT_I1]], label [[_ZL6RNORMFIPKF_EXIT:%.*]], label [[WHILE_BODY_I:%.*]]
-// NCRDIV:       while.body.i:
-// NCRDIV-NEXT:    [[__R_0_I4:%.*]] = phi float [ [[ADD_I:%.*]], [[WHILE_BODY_I]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
-// NCRDIV-NEXT:    [[__A_ADDR_0_I3:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], [[WHILE_BODY_I]] ], [ [[Y:%.*]], [[ENTRY]] ]
-// NCRDIV-NEXT:    [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], [[WHILE_BODY_I]] ], [ [[X]], [[ENTRY]] ]
+// NCRDIV-LABEL: define dso_local noundef float @test_rnormf(
+// NCRDIV-SAME: i32 noundef [[X:%.*]], ptr noundef readonly captures(none) [[Y:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// NCRDIV-NEXT:  [[ENTRY:.*]]:
+// NCRDIV-NEXT:    [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X]], 0
+// NCRDIV-NEXT:    br i1 [[TOBOOL_NOT_I1]], label %[[_ZL6RNORMFIPKF_EXIT:.*]], label %[[WHILE_BODY_I:.*]]
+// NCRDIV:       [[WHILE_BODY_I]]:
+// NCRDIV-NEXT:    [[__R_0_I4:%.*]] = phi float [ [[ADD_I:%.*]], %[[WHILE_BODY_I]] ], [ 0.000000e+00, %[[ENTRY]] ]
+// NCRDIV-NEXT:    [[__A_ADDR_0_I3:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], %[[WHILE_BODY_I]] ], [ [[Y]], %[[ENTRY]] ]
+// NCRDIV-NEXT:    [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], %[[WHILE_BODY_I]] ], [ [[X]], %[[ENTRY]] ]
 // NCRDIV-NEXT:    [[DEC_I]] = add nsw i32 [[__DIM_ADDR_0_I2]], -1
-// NCRDIV-NEXT:    [[TMP0:%.*]] = load float, ptr [[__A_ADDR_0_I3]], align 4, !tbaa [[TBAA17]]
+// NCRDIV-NEXT:    [[TMP0:%.*]] = load float, ptr [[__A_ADDR_0_I3]], align 4, !tbaa [[FLOAT_TBAA17]]
 // NCRDIV-NEXT:    [[MUL_I:%.*]] = fmul contract float [[TMP0]], [[TMP0]]
 // NCRDIV-NEXT:    [[ADD_I]] = fadd contract float [[__R_0_I4]], [[MUL_I]]
 // NCRDIV-NEXT:    [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__A_ADDR_0_I3]], i64 4
 // NCRDIV-NEXT:    [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0
-// NCRDIV-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL6RNORMFIPKF_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP23:![0-9]+]]
-// NCRDIV:       _ZL6rnormfiPKf.exit:
-// NCRDIV-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[ADD_I]], [[WHILE_BODY_I]] ]
+// NCRDIV-NEXT:    br i1 [[TOBOOL_NOT_I]], label %[[_ZL6RNORMFIPKF_EXIT]], label %[[WHILE_BODY_I]], !llvm.loop [[LOOP23:![0-9]+]]
+// NCRDIV:       [[_ZL6RNORMFIPKF_EXIT]]:
+// NCRDIV-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[ADD_I]], %[[WHILE_BODY_I]] ]
 // NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rsqrt_f32(float noundef [[__R_0_I_LCSSA]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_rnormf(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X:%.*]], 0
-// AMDGCNSPIRV-NEXT:    br i1 [[TOBOOL_NOT_I1]], label [[_ZL6RNORMFIPKF_EXIT:%.*]], label [[WHILE_BODY_I:%.*]]
-// AMDGCNSPIRV:       while.body.i:
-// AMDGCNSPIRV-NEXT:    [[__R_0_I4:%.*]] = phi float [ [[ADD_I:%.*]], [[WHILE_BODY_I]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
-// AMDGCNSPIRV-NEXT:    [[__A_ADDR_0_I3:%.*]] = phi ptr addrspace(4) [ [[INCDEC_PTR_I:%.*]], [[WHILE_BODY_I]] ], [ [[Y:%.*]], [[ENTRY]] ]
-// AMDGCNSPIRV-NEXT:    [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], [[WHILE_BODY_I]] ], [ [[X]], [[ENTRY]] ]
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_rnormf(
+// AMDGCNSPIRV-SAME: i32 noundef [[X:%.*]], ptr addrspace(4) noundef readonly captures(none) [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR5]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*]]:
+// AMDGCNSPIRV-NEXT:    [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X]], 0
+// AMDGCNSPIRV-NEXT:    br i1 [[TOBOOL_NOT_I1]], label %[[_ZL6RNORMFIPKF_EXIT:.*]], label %[[WHILE_BODY_I:.*]]
+// AMDGCNSPIRV:       [[WHILE_BODY_I]]:
+// AMDGCNSPIRV-NEXT:    [[__R_0_I4:%.*]] = phi float [ [[ADD_I:%.*]], %[[WHILE_BODY_I]] ], [ 0.000000e+00, %[[ENTRY]] ]
+// AMDGCNSPIRV-NEXT:    [[__A_ADDR_0_I3:%.*]] = phi ptr addrspace(4) [ [[INCDEC_PTR_I:%.*]], %[[WHILE_BODY_I]] ], [ [[Y]], %[[ENTRY]] ]
+// AMDGCNSPIRV-NEXT:    [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], %[[WHILE_BODY_I]] ], [ [[X]], %[[ENTRY]] ]
 // AMDGCNSPIRV-NEXT:    [[DEC_I]] = add nsw i32 [[__DIM_ADDR_0_I2]], -1
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = load float, ptr addrspace(4) [[__A_ADDR_0_I3]], align 4, !tbaa [[TBAA17]]
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = load float, ptr addrspace(4) [[__A_ADDR_0_I3]], align 4, !tbaa [[FLOAT_TBAA17]]
 // AMDGCNSPIRV-NEXT:    [[MUL_I:%.*]] = fmul contract float [[TMP0]], [[TMP0]]
 // AMDGCNSPIRV-NEXT:    [[ADD_I]] = fadd contract float [[__R_0_I4]], [[MUL_I]]
 // AMDGCNSPIRV-NEXT:    [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__A_ADDR_0_I3]], i64 4
 // AMDGCNSPIRV-NEXT:    [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0
-// AMDGCNSPIRV-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL6RNORMFIPKF_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP23:![0-9]+]]
-// AMDGCNSPIRV:       _ZL6rnormfiPKf.exit:
-// AMDGCNSPIRV-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[ADD_I]], [[WHILE_BODY_I]] ]
+// AMDGCNSPIRV-NEXT:    br i1 [[TOBOOL_NOT_I]], label %[[_ZL6RNORMFIPKF_EXIT]], label %[[WHILE_BODY_I]], !llvm.loop [[LOOP23:![0-9]+]]
+// AMDGCNSPIRV:       [[_ZL6RNORMFIPKF_EXIT]]:
+// AMDGCNSPIRV-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[ADD_I]], %[[WHILE_BODY_I]] ]
 // AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_rsqrt_f32(float noundef [[__R_0_I_LCSSA]]) #[[ATTR13]]
 // AMDGCNSPIRV-NEXT:    ret float [[CALL_I]]
 //
@@ -5689,103 +6373,108 @@ extern "C" __device__ float test_rnormf(int x, const float* y) {
   return rnormf(x, y);
 }
 
-// DEFAULT-LABEL: @test_rnorm(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X:%.*]], 0
-// DEFAULT-NEXT:    br i1 [[TOBOOL_NOT_I1]], label [[_ZL5RNORMIPKD_EXIT:%.*]], label [[WHILE_BODY_I:%.*]]
-// DEFAULT:       while.body.i:
-// DEFAULT-NEXT:    [[__R_0_I4:%.*]] = phi double [ [[ADD_I:%.*]], [[WHILE_BODY_I]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
-// DEFAULT-NEXT:    [[__A_ADDR_0_I3:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], [[WHILE_BODY_I]] ], [ [[Y:%.*]], [[ENTRY]] ]
-// DEFAULT-NEXT:    [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], [[WHILE_BODY_I]] ], [ [[X]], [[ENTRY]] ]
+// DEFAULT-LABEL: define dso_local noundef double @test_rnorm(
+// DEFAULT-SAME: i32 noundef [[X:%.*]], ptr noundef readonly captures(none) [[Y:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// DEFAULT-NEXT:  [[ENTRY:.*]]:
+// DEFAULT-NEXT:    [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X]], 0
+// DEFAULT-NEXT:    br i1 [[TOBOOL_NOT_I1]], label %[[_ZL5RNORMIPKD_EXIT:.*]], label %[[WHILE_BODY_I:.*]]
+// DEFAULT:       [[WHILE_BODY_I]]:
+// DEFAULT-NEXT:    [[__R_0_I4:%.*]] = phi double [ [[ADD_I:%.*]], %[[WHILE_BODY_I]] ], [ 0.000000e+00, %[[ENTRY]] ]
+// DEFAULT-NEXT:    [[__A_ADDR_0_I3:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], %[[WHILE_BODY_I]] ], [ [[Y]], %[[ENTRY]] ]
+// DEFAULT-NEXT:    [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], %[[WHILE_BODY_I]] ], [ [[X]], %[[ENTRY]] ]
 // DEFAULT-NEXT:    [[DEC_I]] = add nsw i32 [[__DIM_ADDR_0_I2]], -1
-// DEFAULT-NEXT:    [[TMP0:%.*]] = load double, ptr [[__A_ADDR_0_I3]], align 8, !tbaa [[TBAA18]]
+// DEFAULT-NEXT:    [[TMP0:%.*]] = load double, ptr [[__A_ADDR_0_I3]], align 8, !tbaa [[DOUBLE_TBAA18]]
 // DEFAULT-NEXT:    [[MUL_I:%.*]] = fmul contract double [[TMP0]], [[TMP0]]
 // DEFAULT-NEXT:    [[ADD_I]] = fadd contract double [[__R_0_I4]], [[MUL_I]]
 // DEFAULT-NEXT:    [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__A_ADDR_0_I3]], i64 8
 // DEFAULT-NEXT:    [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0
-// DEFAULT-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL5RNORMIPKD_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP23:![0-9]+]]
-// DEFAULT:       _ZL5rnormiPKd.exit:
-// DEFAULT-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[ADD_I]], [[WHILE_BODY_I]] ]
+// DEFAULT-NEXT:    br i1 [[TOBOOL_NOT_I]], label %[[_ZL5RNORMIPKD_EXIT]], label %[[WHILE_BODY_I]], !llvm.loop [[LOOP23:![0-9]+]]
+// DEFAULT:       [[_ZL5RNORMIPKD_EXIT]]:
+// DEFAULT-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[ADD_I]], %[[WHILE_BODY_I]] ]
 // DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rsqrt_f64(double noundef [[__R_0_I_LCSSA]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_rnorm(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X:%.*]], 0
-// FINITEONLY-NEXT:    br i1 [[TOBOOL_NOT_I1]], label [[_ZL5RNORMIPKD_EXIT:%.*]], label [[WHILE_BODY_I:%.*]]
-// FINITEONLY:       while.body.i:
-// FINITEONLY-NEXT:    [[__R_0_I4:%.*]] = phi double [ [[ADD_I:%.*]], [[WHILE_BODY_I]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
-// FINITEONLY-NEXT:    [[__A_ADDR_0_I3:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], [[WHILE_BODY_I]] ], [ [[Y:%.*]], [[ENTRY]] ]
-// FINITEONLY-NEXT:    [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], [[WHILE_BODY_I]] ], [ [[X]], [[ENTRY]] ]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_rnorm(
+// FINITEONLY-SAME: i32 noundef [[X:%.*]], ptr noundef readonly captures(none) [[Y:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*]]:
+// FINITEONLY-NEXT:    [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X]], 0
+// FINITEONLY-NEXT:    br i1 [[TOBOOL_NOT_I1]], label %[[_ZL5RNORMIPKD_EXIT:.*]], label %[[WHILE_BODY_I:.*]]
+// FINITEONLY:       [[WHILE_BODY_I]]:
+// FINITEONLY-NEXT:    [[__R_0_I4:%.*]] = phi double [ [[ADD_I:%.*]], %[[WHILE_BODY_I]] ], [ 0.000000e+00, %[[ENTRY]] ]
+// FINITEONLY-NEXT:    [[__A_ADDR_0_I3:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], %[[WHILE_BODY_I]] ], [ [[Y]], %[[ENTRY]] ]
+// FINITEONLY-NEXT:    [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], %[[WHILE_BODY_I]] ], [ [[X]], %[[ENTRY]] ]
 // FINITEONLY-NEXT:    [[DEC_I]] = add nsw i32 [[__DIM_ADDR_0_I2]], -1
-// FINITEONLY-NEXT:    [[TMP0:%.*]] = load double, ptr [[__A_ADDR_0_I3]], align 8, !tbaa [[TBAA18]]
+// FINITEONLY-NEXT:    [[TMP0:%.*]] = load double, ptr [[__A_ADDR_0_I3]], align 8, !tbaa [[DOUBLE_TBAA18]]
 // FINITEONLY-NEXT:    [[MUL_I:%.*]] = fmul nnan ninf contract double [[TMP0]], [[TMP0]]
 // FINITEONLY-NEXT:    [[ADD_I]] = fadd nnan ninf contract double [[__R_0_I4]], [[MUL_I]]
 // FINITEONLY-NEXT:    [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__A_ADDR_0_I3]], i64 8
 // FINITEONLY-NEXT:    [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0
-// FINITEONLY-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL5RNORMIPKD_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP23:![0-9]+]]
-// FINITEONLY:       _ZL5rnormiPKd.exit:
-// FINITEONLY-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[ADD_I]], [[WHILE_BODY_I]] ]
+// FINITEONLY-NEXT:    br i1 [[TOBOOL_NOT_I]], label %[[_ZL5RNORMIPKD_EXIT]], label %[[WHILE_BODY_I]], !llvm.loop [[LOOP23:![0-9]+]]
+// FINITEONLY:       [[_ZL5RNORMIPKD_EXIT]]:
+// FINITEONLY-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[ADD_I]], %[[WHILE_BODY_I]] ]
 // FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_rsqrt_f64(double noundef nofpclass(nan inf) [[__R_0_I_LCSSA]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
-// APPROX-LABEL: @test_rnorm(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X:%.*]], 0
-// APPROX-NEXT:    br i1 [[TOBOOL_NOT_I1]], label [[_ZL5RNORMIPKD_EXIT:%.*]], label [[WHILE_BODY_I:%.*]]
-// APPROX:       while.body.i:
-// APPROX-NEXT:    [[__R_0_I4:%.*]] = phi double [ [[ADD_I:%.*]], [[WHILE_BODY_I]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
-// APPROX-NEXT:    [[__A_ADDR_0_I3:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], [[WHILE_BODY_I]] ], [ [[Y:%.*]], [[ENTRY]] ]
-// APPROX-NEXT:    [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], [[WHILE_BODY_I]] ], [ [[X]], [[ENTRY]] ]
+// APPROX-LABEL: define dso_local noundef double @test_rnorm(
+// APPROX-SAME: i32 noundef [[X:%.*]], ptr noundef readonly captures(none) [[Y:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// APPROX-NEXT:  [[ENTRY:.*]]:
+// APPROX-NEXT:    [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X]], 0
+// APPROX-NEXT:    br i1 [[TOBOOL_NOT_I1]], label %[[_ZL5RNORMIPKD_EXIT:.*]], label %[[WHILE_BODY_I:.*]]
+// APPROX:       [[WHILE_BODY_I]]:
+// APPROX-NEXT:    [[__R_0_I4:%.*]] = phi double [ [[ADD_I:%.*]], %[[WHILE_BODY_I]] ], [ 0.000000e+00, %[[ENTRY]] ]
+// APPROX-NEXT:    [[__A_ADDR_0_I3:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], %[[WHILE_BODY_I]] ], [ [[Y]], %[[ENTRY]] ]
+// APPROX-NEXT:    [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], %[[WHILE_BODY_I]] ], [ [[X]], %[[ENTRY]] ]
 // APPROX-NEXT:    [[DEC_I]] = add nsw i32 [[__DIM_ADDR_0_I2]], -1
-// APPROX-NEXT:    [[TMP0:%.*]] = load double, ptr [[__A_ADDR_0_I3]], align 8, !tbaa [[TBAA18]]
+// APPROX-NEXT:    [[TMP0:%.*]] = load double, ptr [[__A_ADDR_0_I3]], align 8, !tbaa [[DOUBLE_TBAA18]]
 // APPROX-NEXT:    [[MUL_I:%.*]] = fmul contract double [[TMP0]], [[TMP0]]
 // APPROX-NEXT:    [[ADD_I]] = fadd contract double [[__R_0_I4]], [[MUL_I]]
 // APPROX-NEXT:    [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__A_ADDR_0_I3]], i64 8
 // APPROX-NEXT:    [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0
-// APPROX-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL5RNORMIPKD_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP23:![0-9]+]]
-// APPROX:       _ZL5rnormiPKd.exit:
-// APPROX-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[ADD_I]], [[WHILE_BODY_I]] ]
+// APPROX-NEXT:    br i1 [[TOBOOL_NOT_I]], label %[[_ZL5RNORMIPKD_EXIT]], label %[[WHILE_BODY_I]], !llvm.loop [[LOOP23:![0-9]+]]
+// APPROX:       [[_ZL5RNORMIPKD_EXIT]]:
+// APPROX-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[ADD_I]], %[[WHILE_BODY_I]] ]
 // APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rsqrt_f64(double noundef [[__R_0_I_LCSSA]]) #[[ATTR15]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_rnorm(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X:%.*]], 0
-// NCRDIV-NEXT:    br i1 [[TOBOOL_NOT_I1]], label [[_ZL5RNORMIPKD_EXIT:%.*]], label [[WHILE_BODY_I:%.*]]
-// NCRDIV:       while.body.i:
-// NCRDIV-NEXT:    [[__R_0_I4:%.*]] = phi double [ [[ADD_I:%.*]], [[WHILE_BODY_I]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
-// NCRDIV-NEXT:    [[__A_ADDR_0_I3:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], [[WHILE_BODY_I]] ], [ [[Y:%.*]], [[ENTRY]] ]
-// NCRDIV-NEXT:    [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], [[WHILE_BODY_I]] ], [ [[X]], [[ENTRY]] ]
+// NCRDIV-LABEL: define dso_local noundef double @test_rnorm(
+// NCRDIV-SAME: i32 noundef [[X:%.*]], ptr noundef readonly captures(none) [[Y:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// NCRDIV-NEXT:  [[ENTRY:.*]]:
+// NCRDIV-NEXT:    [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X]], 0
+// NCRDIV-NEXT:    br i1 [[TOBOOL_NOT_I1]], label %[[_ZL5RNORMIPKD_EXIT:.*]], label %[[WHILE_BODY_I:.*]]
+// NCRDIV:       [[WHILE_BODY_I]]:
+// NCRDIV-NEXT:    [[__R_0_I4:%.*]] = phi double [ [[ADD_I:%.*]], %[[WHILE_BODY_I]] ], [ 0.000000e+00, %[[ENTRY]] ]
+// NCRDIV-NEXT:    [[__A_ADDR_0_I3:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], %[[WHILE_BODY_I]] ], [ [[Y]], %[[ENTRY]] ]
+// NCRDIV-NEXT:    [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], %[[WHILE_BODY_I]] ], [ [[X]], %[[ENTRY]] ]
 // NCRDIV-NEXT:    [[DEC_I]] = add nsw i32 [[__DIM_ADDR_0_I2]], -1
-// NCRDIV-NEXT:    [[TMP0:%.*]] = load double, ptr [[__A_ADDR_0_I3]], align 8, !tbaa [[TBAA19]]
+// NCRDIV-NEXT:    [[TMP0:%.*]] = load double, ptr [[__A_ADDR_0_I3]], align 8, !tbaa [[DOUBLE_TBAA19]]
 // NCRDIV-NEXT:    [[MUL_I:%.*]] = fmul contract double [[TMP0]], [[TMP0]]
 // NCRDIV-NEXT:    [[ADD_I]] = fadd contract double [[__R_0_I4]], [[MUL_I]]
 // NCRDIV-NEXT:    [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__A_ADDR_0_I3]], i64 8
 // NCRDIV-NEXT:    [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0
-// NCRDIV-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL5RNORMIPKD_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP24:![0-9]+]]
-// NCRDIV:       _ZL5rnormiPKd.exit:
-// NCRDIV-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[ADD_I]], [[WHILE_BODY_I]] ]
+// NCRDIV-NEXT:    br i1 [[TOBOOL_NOT_I]], label %[[_ZL5RNORMIPKD_EXIT]], label %[[WHILE_BODY_I]], !llvm.loop [[LOOP24:![0-9]+]]
+// NCRDIV:       [[_ZL5RNORMIPKD_EXIT]]:
+// NCRDIV-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[ADD_I]], %[[WHILE_BODY_I]] ]
 // NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rsqrt_f64(double noundef [[__R_0_I_LCSSA]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_rnorm(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X:%.*]], 0
-// AMDGCNSPIRV-NEXT:    br i1 [[TOBOOL_NOT_I1]], label [[_ZL5RNORMIPKD_EXIT:%.*]], label [[WHILE_BODY_I:%.*]]
-// AMDGCNSPIRV:       while.body.i:
-// AMDGCNSPIRV-NEXT:    [[__R_0_I4:%.*]] = phi double [ [[ADD_I:%.*]], [[WHILE_BODY_I]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
-// AMDGCNSPIRV-NEXT:    [[__A_ADDR_0_I3:%.*]] = phi ptr addrspace(4) [ [[INCDEC_PTR_I:%.*]], [[WHILE_BODY_I]] ], [ [[Y:%.*]], [[ENTRY]] ]
-// AMDGCNSPIRV-NEXT:    [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], [[WHILE_BODY_I]] ], [ [[X]], [[ENTRY]] ]
+// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_rnorm(
+// AMDGCNSPIRV-SAME: i32 noundef [[X:%.*]], ptr addrspace(4) noundef readonly captures(none) [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR5]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*]]:
+// AMDGCNSPIRV-NEXT:    [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X]], 0
+// AMDGCNSPIRV-NEXT:    br i1 [[TOBOOL_NOT_I1]], label %[[_ZL5RNORMIPKD_EXIT:.*]], label %[[WHILE_BODY_I:.*]]
+// AMDGCNSPIRV:       [[WHILE_BODY_I]]:
+// AMDGCNSPIRV-NEXT:    [[__R_0_I4:%.*]] = phi double [ [[ADD_I:%.*]], %[[WHILE_BODY_I]] ], [ 0.000000e+00, %[[ENTRY]] ]
+// AMDGCNSPIRV-NEXT:    [[__A_ADDR_0_I3:%.*]] = phi ptr addrspace(4) [ [[INCDEC_PTR_I:%.*]], %[[WHILE_BODY_I]] ], [ [[Y]], %[[ENTRY]] ]
+// AMDGCNSPIRV-NEXT:    [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], %[[WHILE_BODY_I]] ], [ [[X]], %[[ENTRY]] ]
 // AMDGCNSPIRV-NEXT:    [[DEC_I]] = add nsw i32 [[__DIM_ADDR_0_I2]], -1
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = load double, ptr addrspace(4) [[__A_ADDR_0_I3]], align 8, !tbaa [[TBAA19]]
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = load double, ptr addrspace(4) [[__A_ADDR_0_I3]], align 8, !tbaa [[DOUBLE_TBAA19]]
 // AMDGCNSPIRV-NEXT:    [[MUL_I:%.*]] = fmul contract double [[TMP0]], [[TMP0]]
 // AMDGCNSPIRV-NEXT:    [[ADD_I]] = fadd contract double [[__R_0_I4]], [[MUL_I]]
 // AMDGCNSPIRV-NEXT:    [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__A_ADDR_0_I3]], i64 8
 // AMDGCNSPIRV-NEXT:    [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0
-// AMDGCNSPIRV-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL5RNORMIPKD_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP24:![0-9]+]]
-// AMDGCNSPIRV:       _ZL5rnormiPKd.exit:
-// AMDGCNSPIRV-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[ADD_I]], [[WHILE_BODY_I]] ]
+// AMDGCNSPIRV-NEXT:    br i1 [[TOBOOL_NOT_I]], label %[[_ZL5RNORMIPKD_EXIT]], label %[[WHILE_BODY_I]], !llvm.loop [[LOOP24:![0-9]+]]
+// AMDGCNSPIRV:       [[_ZL5RNORMIPKD_EXIT]]:
+// AMDGCNSPIRV-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[ADD_I]], %[[WHILE_BODY_I]] ]
 // AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_rsqrt_f64(double noundef [[__R_0_I_LCSSA]]) #[[ATTR13]]
 // AMDGCNSPIRV-NEXT:    ret double [[CALL_I]]
 //
@@ -5793,383 +6482,466 @@ extern "C" __device__ double test_rnorm(int x, const double* y) {
   return rnorm(x, y);
 }
 
-// DEFAULT-LABEL: @test_rnorm3df(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rlen3_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) #[[ATTR14]]
+// DEFAULT-LABEL: define dso_local noundef float @test_rnorm3df(
+// DEFAULT-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rlen3_f32(float noundef [[X]], float noundef [[Y]], float noundef [[Z]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_rnorm3df(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_rlen3_f32(float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]], float noundef nofpclass(nan inf) [[Z:%.*]]) #[[ATTR14]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_rnorm3df(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]], float noundef nofpclass(nan inf) [[Z:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_rlen3_f32(float noundef nofpclass(nan inf) [[X]], float noundef nofpclass(nan inf) [[Y]], float noundef nofpclass(nan inf) [[Z]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
-// APPROX-LABEL: @test_rnorm3df(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rlen3_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) #[[ATTR14]]
+// APPROX-LABEL: define dso_local noundef float @test_rnorm3df(
+// APPROX-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rlen3_f32(float noundef [[X]], float noundef [[Y]], float noundef [[Z]]) #[[ATTR14]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_rnorm3df(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rlen3_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) #[[ATTR14]]
+// NCRDIV-LABEL: define dso_local noundef float @test_rnorm3df(
+// NCRDIV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rlen3_f32(float noundef [[X]], float noundef [[Y]], float noundef [[Z]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_rnorm3df(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_rlen3_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) #[[ATTR12]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_rnorm3df(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR4]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_rlen3_f32(float noundef [[X]], float noundef [[Y]], float noundef [[Z]]) #[[ATTR12]]
 // AMDGCNSPIRV-NEXT:    ret float [[CALL_I]]
 //
 extern "C" __device__ float test_rnorm3df(float x, float y, float z) {
   return rnorm3df(x, y, z);
 }
 
-// DEFAULT-LABEL: @test_rnorm3d(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rlen3_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) #[[ATTR14]]
+// DEFAULT-LABEL: define dso_local noundef double @test_rnorm3d(
+// DEFAULT-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rlen3_f64(double noundef [[X]], double noundef [[Y]], double noundef [[Z]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_rnorm3d(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_rlen3_f64(double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]], double noundef nofpclass(nan inf) [[Z:%.*]]) #[[ATTR14]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_rnorm3d(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]], double noundef nofpclass(nan inf) [[Z:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_rlen3_f64(double noundef nofpclass(nan inf) [[X]], double noundef nofpclass(nan inf) [[Y]], double noundef nofpclass(nan inf) [[Z]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
-// APPROX-LABEL: @test_rnorm3d(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rlen3_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) #[[ATTR14]]
+// APPROX-LABEL: define dso_local noundef double @test_rnorm3d(
+// APPROX-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rlen3_f64(double noundef [[X]], double noundef [[Y]], double noundef [[Z]]) #[[ATTR14]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_rnorm3d(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rlen3_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) #[[ATTR14]]
+// NCRDIV-LABEL: define dso_local noundef double @test_rnorm3d(
+// NCRDIV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rlen3_f64(double noundef [[X]], double noundef [[Y]], double noundef [[Z]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_rnorm3d(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_rlen3_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) #[[ATTR12]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_rnorm3d(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR4]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_rlen3_f64(double noundef [[X]], double noundef [[Y]], double noundef [[Z]]) #[[ATTR12]]
 // AMDGCNSPIRV-NEXT:    ret double [[CALL_I]]
 //
 extern "C" __device__ double test_rnorm3d(double x, double y, double z) {
   return rnorm3d(x, y, z);
 }
 
-// DEFAULT-LABEL: @test_rnorm4df(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rlen4_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]], float noundef [[W:%.*]]) #[[ATTR14]]
+// DEFAULT-LABEL: define dso_local noundef float @test_rnorm4df(
+// DEFAULT-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]], float noundef [[W:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rlen4_f32(float noundef [[X]], float noundef [[Y]], float noundef [[Z]], float noundef [[W]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_rnorm4df(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_rlen4_f32(float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]], float noundef nofpclass(nan inf) [[Z:%.*]], float noundef nofpclass(nan inf) [[W:%.*]]) #[[ATTR14]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_rnorm4df(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]], float noundef nofpclass(nan inf) [[Z:%.*]], float noundef nofpclass(nan inf) [[W:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_rlen4_f32(float noundef nofpclass(nan inf) [[X]], float noundef nofpclass(nan inf) [[Y]], float noundef nofpclass(nan inf) [[Z]], float noundef nofpclass(nan inf) [[W]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
-// APPROX-LABEL: @test_rnorm4df(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rlen4_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]], float noundef [[W:%.*]]) #[[ATTR14]]
+// APPROX-LABEL: define dso_local noundef float @test_rnorm4df(
+// APPROX-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]], float noundef [[W:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rlen4_f32(float noundef [[X]], float noundef [[Y]], float noundef [[Z]], float noundef [[W]]) #[[ATTR14]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_rnorm4df(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rlen4_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]], float noundef [[W:%.*]]) #[[ATTR14]]
+// NCRDIV-LABEL: define dso_local noundef float @test_rnorm4df(
+// NCRDIV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]], float noundef [[W:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rlen4_f32(float noundef [[X]], float noundef [[Y]], float noundef [[Z]], float noundef [[W]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_rnorm4df(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_rlen4_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]], float noundef [[W:%.*]]) #[[ATTR12]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_rnorm4df(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]], float noundef [[W:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR4]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_rlen4_f32(float noundef [[X]], float noundef [[Y]], float noundef [[Z]], float noundef [[W]]) #[[ATTR12]]
 // AMDGCNSPIRV-NEXT:    ret float [[CALL_I]]
 //
 extern "C" __device__ float test_rnorm4df(float x, float y, float z, float w) {
   return rnorm4df(x, y, z, w);
 }
 
-// DEFAULT-LABEL: @test_rnorm4d(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rlen4_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]], double noundef [[W:%.*]]) #[[ATTR14]]
+// DEFAULT-LABEL: define dso_local noundef double @test_rnorm4d(
+// DEFAULT-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]], double noundef [[W:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rlen4_f64(double noundef [[X]], double noundef [[Y]], double noundef [[Z]], double noundef [[W]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_rnorm4d(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_rlen4_f64(double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]], double noundef nofpclass(nan inf) [[Z:%.*]], double noundef nofpclass(nan inf) [[W:%.*]]) #[[ATTR14]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_rnorm4d(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]], double noundef nofpclass(nan inf) [[Z:%.*]], double noundef nofpclass(nan inf) [[W:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_rlen4_f64(double noundef nofpclass(nan inf) [[X]], double noundef nofpclass(nan inf) [[Y]], double noundef nofpclass(nan inf) [[Z]], double noundef nofpclass(nan inf) [[W]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
-// APPROX-LABEL: @test_rnorm4d(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rlen4_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]], double noundef [[W:%.*]]) #[[ATTR14]]
+// APPROX-LABEL: define dso_local noundef double @test_rnorm4d(
+// APPROX-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]], double noundef [[W:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rlen4_f64(double noundef [[X]], double noundef [[Y]], double noundef [[Z]], double noundef [[W]]) #[[ATTR14]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_rnorm4d(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rlen4_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]], double noundef [[W:%.*]]) #[[ATTR14]]
+// NCRDIV-LABEL: define dso_local noundef double @test_rnorm4d(
+// NCRDIV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]], double noundef [[W:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rlen4_f64(double noundef [[X]], double noundef [[Y]], double noundef [[Z]], double noundef [[W]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_rnorm4d(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_rlen4_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]], double noundef [[W:%.*]]) #[[ATTR12]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_rnorm4d(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]], double noundef [[W:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR4]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_rlen4_f64(double noundef [[X]], double noundef [[Y]], double noundef [[Z]], double noundef [[W]]) #[[ATTR12]]
 // AMDGCNSPIRV-NEXT:    ret double [[CALL_I]]
 //
 extern "C" __device__ double test_rnorm4d(double x, double y, double z, double w) {
   return rnorm4d(x, y, z, w);
 }
 
-// DEFAULT-LABEL: @test_roundf(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.round.f32(float [[X:%.*]])
+// DEFAULT-LABEL: define dso_local noundef float @test_roundf(
+// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.round.f32(float [[X]])
 // DEFAULT-NEXT:    ret float [[TMP0]]
 //
-// FINITEONLY-LABEL: @test_roundf(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.round.f32(float nofpclass(nan inf) [[X:%.*]])
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_roundf(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.round.f32(float nofpclass(nan inf) [[X]])
 // FINITEONLY-NEXT:    ret float [[TMP0]]
 //
-// APPROX-LABEL: @test_roundf(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.round.f32(float [[X:%.*]])
+// APPROX-LABEL: define dso_local noundef float @test_roundf(
+// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.round.f32(float [[X]])
 // APPROX-NEXT:    ret float [[TMP0]]
 //
-// NCRDIV-LABEL: @test_roundf(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.round.f32(float [[X:%.*]])
+// NCRDIV-LABEL: define dso_local noundef float @test_roundf(
+// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.round.f32(float [[X]])
 // NCRDIV-NEXT:    ret float [[TMP0]]
 //
-// AMDGCNSPIRV-LABEL: @test_roundf(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.round.f32(float [[X:%.*]])
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_roundf(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.round.f32(float [[X]])
 // AMDGCNSPIRV-NEXT:    ret float [[TMP0]]
 //
 extern "C" __device__ float test_roundf(float x) {
   return roundf(x);
 }
 
-// DEFAULT-LABEL: @test_round(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.round.f64(double [[X:%.*]])
+// DEFAULT-LABEL: define dso_local noundef double @test_round(
+// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.round.f64(double [[X]])
 // DEFAULT-NEXT:    ret double [[TMP0]]
 //
-// FINITEONLY-LABEL: @test_round(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef double @llvm.round.f64(double nofpclass(nan inf) [[X:%.*]])
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_round(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef double @llvm.round.f64(double nofpclass(nan inf) [[X]])
 // FINITEONLY-NEXT:    ret double [[TMP0]]
 //
-// APPROX-LABEL: @test_round(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.round.f64(double [[X:%.*]])
+// APPROX-LABEL: define dso_local noundef double @test_round(
+// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.round.f64(double [[X]])
 // APPROX-NEXT:    ret double [[TMP0]]
 //
-// NCRDIV-LABEL: @test_round(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.round.f64(double [[X:%.*]])
+// NCRDIV-LABEL: define dso_local noundef double @test_round(
+// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.round.f64(double [[X]])
 // NCRDIV-NEXT:    ret double [[TMP0]]
 //
-// AMDGCNSPIRV-LABEL: @test_round(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.round.f64(double [[X:%.*]])
+// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_round(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.round.f64(double [[X]])
 // AMDGCNSPIRV-NEXT:    ret double [[TMP0]]
 //
 extern "C" __device__ double test_round(double x) {
   return round(x);
 }
 
-// DEFAULT-LABEL: @test_rsqrtf(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rsqrt_f32(float noundef [[X:%.*]]) #[[ATTR15]]
+// DEFAULT-LABEL: define dso_local noundef float @test_rsqrtf(
+// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rsqrt_f32(float noundef [[X]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_rsqrtf(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_rsqrt_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_rsqrtf(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_rsqrt_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
-// APPROX-LABEL: @test_rsqrtf(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rsqrt_f32(float noundef [[X:%.*]]) #[[ATTR15]]
+// APPROX-LABEL: define dso_local noundef float @test_rsqrtf(
+// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rsqrt_f32(float noundef [[X]]) #[[ATTR15]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_rsqrtf(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rsqrt_f32(float noundef [[X:%.*]]) #[[ATTR15]]
+// NCRDIV-LABEL: define dso_local noundef float @test_rsqrtf(
+// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rsqrt_f32(float noundef [[X]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_rsqrtf(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_rsqrt_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_rsqrtf(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR5]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_rsqrt_f32(float noundef [[X]]) #[[ATTR13]]
 // AMDGCNSPIRV-NEXT:    ret float [[CALL_I]]
 //
 extern "C" __device__ float test_rsqrtf(float x) {
   return rsqrtf(x);
 }
 
-// DEFAULT-LABEL: @test_rsqrt(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rsqrt_f64(double noundef [[X:%.*]]) #[[ATTR15]]
+// DEFAULT-LABEL: define dso_local noundef double @test_rsqrt(
+// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rsqrt_f64(double noundef [[X]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_rsqrt(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_rsqrt_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_rsqrt(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_rsqrt_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
-// APPROX-LABEL: @test_rsqrt(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rsqrt_f64(double noundef [[X:%.*]]) #[[ATTR15]]
+// APPROX-LABEL: define dso_local noundef double @test_rsqrt(
+// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rsqrt_f64(double noundef [[X]]) #[[ATTR15]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_rsqrt(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rsqrt_f64(double noundef [[X:%.*]]) #[[ATTR15]]
+// NCRDIV-LABEL: define dso_local noundef double @test_rsqrt(
+// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rsqrt_f64(double noundef [[X]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_rsqrt(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_rsqrt_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_rsqrt(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR5]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_rsqrt_f64(double noundef [[X]]) #[[ATTR13]]
 // AMDGCNSPIRV-NEXT:    ret double [[CALL_I]]
 //
 extern "C" __device__ double test_rsqrt(double x) {
   return rsqrt(x);
 }
 
-// DEFAULT-LABEL: @test_scalblnf(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[SPEC_STORE_SELECT_I:%.*]] = tail call i64 @llvm.smax.i64(i64 [[Y:%.*]], i64 -2147483648)
+// DEFAULT-LABEL: define dso_local noundef float @test_scalblnf(
+// DEFAULT-SAME: float noundef [[X:%.*]], i64 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[SPEC_STORE_SELECT_I:%.*]] = tail call i64 @llvm.smax.i64(i64 [[Y]], i64 -2147483648)
 // DEFAULT-NEXT:    [[CONV_I:%.*]] = trunc i64 [[SPEC_STORE_SELECT_I]] to i32
-// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.ldexp.f32.i32(float [[X:%.*]], i32 [[CONV_I]])
+// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.ldexp.f32.i32(float [[X]], i32 [[CONV_I]])
 // DEFAULT-NEXT:    ret float [[TMP0]]
 //
-// FINITEONLY-LABEL: @test_scalblnf(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[SPEC_STORE_SELECT_I:%.*]] = tail call i64 @llvm.smax.i64(i64 [[Y:%.*]], i64 -2147483648)
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_scalblnf(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], i64 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[SPEC_STORE_SELECT_I:%.*]] = tail call i64 @llvm.smax.i64(i64 [[Y]], i64 -2147483648)
 // FINITEONLY-NEXT:    [[CONV_I:%.*]] = trunc i64 [[SPEC_STORE_SELECT_I]] to i32
-// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.ldexp.f32.i32(float nofpclass(nan inf) [[X:%.*]], i32 [[CONV_I]])
+// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.ldexp.f32.i32(float nofpclass(nan inf) [[X]], i32 [[CONV_I]])
 // FINITEONLY-NEXT:    ret float [[TMP0]]
 //
-// APPROX-LABEL: @test_scalblnf(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[SPEC_STORE_SELECT_I:%.*]] = tail call i64 @llvm.smax.i64(i64 [[Y:%.*]], i64 -2147483648)
+// APPROX-LABEL: define dso_local noundef float @test_scalblnf(
+// APPROX-SAME: float noundef [[X:%.*]], i64 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[SPEC_STORE_SELECT_I:%.*]] = tail call i64 @llvm.smax.i64(i64 [[Y]], i64 -2147483648)
 // APPROX-NEXT:    [[CONV_I:%.*]] = trunc i64 [[SPEC_STORE_SELECT_I]] to i32
-// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.ldexp.f32.i32(float [[X:%.*]], i32 [[CONV_I]])
+// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.ldexp.f32.i32(float [[X]], i32 [[CONV_I]])
 // APPROX-NEXT:    ret float [[TMP0]]
 //
-// NCRDIV-LABEL: @test_scalblnf(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[SPEC_STORE_SELECT_I:%.*]] = tail call i64 @llvm.smax.i64(i64 [[Y:%.*]], i64 -2147483648)
+// NCRDIV-LABEL: define dso_local noundef float @test_scalblnf(
+// NCRDIV-SAME: float noundef [[X:%.*]], i64 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[SPEC_STORE_SELECT_I:%.*]] = tail call i64 @llvm.smax.i64(i64 [[Y]], i64 -2147483648)
 // NCRDIV-NEXT:    [[CONV_I:%.*]] = trunc i64 [[SPEC_STORE_SELECT_I]] to i32
-// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.ldexp.f32.i32(float [[X:%.*]], i32 [[CONV_I]])
+// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.ldexp.f32.i32(float [[X]], i32 [[CONV_I]])
 // NCRDIV-NEXT:    ret float [[TMP0]]
 //
-// AMDGCNSPIRV-LABEL: @test_scalblnf(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[SPEC_STORE_SELECT_I:%.*]] = tail call addrspace(4) i64 @llvm.smax.i64(i64 [[Y:%.*]], i64 -2147483648)
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_scalblnf(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]], i64 noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[SPEC_STORE_SELECT_I:%.*]] = tail call addrspace(4) i64 @llvm.smax.i64(i64 [[Y]], i64 -2147483648)
 // AMDGCNSPIRV-NEXT:    [[CONV_I:%.*]] = trunc i64 [[SPEC_STORE_SELECT_I]] to i32
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.ldexp.f32.i32(float [[X:%.*]], i32 [[CONV_I]])
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.ldexp.f32.i32(float [[X]], i32 [[CONV_I]])
 // AMDGCNSPIRV-NEXT:    ret float [[TMP0]]
 //
 extern "C" __device__ float test_scalblnf(float x, long int y) {
   return scalblnf(x, y);
 }
 
-// DEFAULT-LABEL: @test_scalbln(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[SPEC_STORE_SELECT_I:%.*]] = tail call i64 @llvm.smax.i64(i64 [[Y:%.*]], i64 -2147483648)
+// DEFAULT-LABEL: define dso_local noundef double @test_scalbln(
+// DEFAULT-SAME: double noundef [[X:%.*]], i64 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[SPEC_STORE_SELECT_I:%.*]] = tail call i64 @llvm.smax.i64(i64 [[Y]], i64 -2147483648)
 // DEFAULT-NEXT:    [[CONV_I:%.*]] = trunc i64 [[SPEC_STORE_SELECT_I]] to i32
-// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.ldexp.f64.i32(double [[X:%.*]], i32 [[CONV_I]])
+// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.ldexp.f64.i32(double [[X]], i32 [[CONV_I]])
 // DEFAULT-NEXT:    ret double [[TMP0]]
 //
-// FINITEONLY-LABEL: @test_scalbln(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[SPEC_STORE_SELECT_I:%.*]] = tail call i64 @llvm.smax.i64(i64 [[Y:%.*]], i64 -2147483648)
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_scalbln(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]], i64 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[SPEC_STORE_SELECT_I:%.*]] = tail call i64 @llvm.smax.i64(i64 [[Y]], i64 -2147483648)
 // FINITEONLY-NEXT:    [[CONV_I:%.*]] = trunc i64 [[SPEC_STORE_SELECT_I]] to i32
-// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef double @llvm.ldexp.f64.i32(double nofpclass(nan inf) [[X:%.*]], i32 [[CONV_I]])
+// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef double @llvm.ldexp.f64.i32(double nofpclass(nan inf) [[X]], i32 [[CONV_I]])
 // FINITEONLY-NEXT:    ret double [[TMP0]]
 //
-// APPROX-LABEL: @test_scalbln(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[SPEC_STORE_SELECT_I:%.*]] = tail call i64 @llvm.smax.i64(i64 [[Y:%.*]], i64 -2147483648)
+// APPROX-LABEL: define dso_local noundef double @test_scalbln(
+// APPROX-SAME: double noundef [[X:%.*]], i64 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[SPEC_STORE_SELECT_I:%.*]] = tail call i64 @llvm.smax.i64(i64 [[Y]], i64 -2147483648)
 // APPROX-NEXT:    [[CONV_I:%.*]] = trunc i64 [[SPEC_STORE_SELECT_I]] to i32
-// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.ldexp.f64.i32(double [[X:%.*]], i32 [[CONV_I]])
+// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.ldexp.f64.i32(double [[X]], i32 [[CONV_I]])
 // APPROX-NEXT:    ret double [[TMP0]]
 //
-// NCRDIV-LABEL: @test_scalbln(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[SPEC_STORE_SELECT_I:%.*]] = tail call i64 @llvm.smax.i64(i64 [[Y:%.*]], i64 -2147483648)
+// NCRDIV-LABEL: define dso_local noundef double @test_scalbln(
+// NCRDIV-SAME: double noundef [[X:%.*]], i64 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[SPEC_STORE_SELECT_I:%.*]] = tail call i64 @llvm.smax.i64(i64 [[Y]], i64 -2147483648)
 // NCRDIV-NEXT:    [[CONV_I:%.*]] = trunc i64 [[SPEC_STORE_SELECT_I]] to i32
-// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.ldexp.f64.i32(double [[X:%.*]], i32 [[CONV_I]])
+// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.ldexp.f64.i32(double [[X]], i32 [[CONV_I]])
 // NCRDIV-NEXT:    ret double [[TMP0]]
 //
-// AMDGCNSPIRV-LABEL: @test_scalbln(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[SPEC_STORE_SELECT_I:%.*]] = tail call addrspace(4) i64 @llvm.smax.i64(i64 [[Y:%.*]], i64 -2147483648)
+// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_scalbln(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]], i64 noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[SPEC_STORE_SELECT_I:%.*]] = tail call addrspace(4) i64 @llvm.smax.i64(i64 [[Y]], i64 -2147483648)
 // AMDGCNSPIRV-NEXT:    [[CONV_I:%.*]] = trunc i64 [[SPEC_STORE_SELECT_I]] to i32
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.ldexp.f64.i32(double [[X:%.*]], i32 [[CONV_I]])
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.ldexp.f64.i32(double [[X]], i32 [[CONV_I]])
 // AMDGCNSPIRV-NEXT:    ret double [[TMP0]]
 //
 extern "C" __device__ double test_scalbln(double x, long int y) {
   return scalbln(x, y);
 }
 
-// DEFAULT-LABEL: @test_scalbnf(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.ldexp.f32.i32(float [[X:%.*]], i32 [[Y:%.*]])
+// DEFAULT-LABEL: define dso_local noundef float @test_scalbnf(
+// DEFAULT-SAME: float noundef [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.ldexp.f32.i32(float [[X]], i32 [[Y]])
 // DEFAULT-NEXT:    ret float [[TMP0]]
 //
-// FINITEONLY-LABEL: @test_scalbnf(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.ldexp.f32.i32(float nofpclass(nan inf) [[X:%.*]], i32 [[Y:%.*]])
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_scalbnf(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.ldexp.f32.i32(float nofpclass(nan inf) [[X]], i32 [[Y]])
 // FINITEONLY-NEXT:    ret float [[TMP0]]
 //
-// APPROX-LABEL: @test_scalbnf(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.ldexp.f32.i32(float [[X:%.*]], i32 [[Y:%.*]])
+// APPROX-LABEL: define dso_local noundef float @test_scalbnf(
+// APPROX-SAME: float noundef [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.ldexp.f32.i32(float [[X]], i32 [[Y]])
 // APPROX-NEXT:    ret float [[TMP0]]
 //
-// NCRDIV-LABEL: @test_scalbnf(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.ldexp.f32.i32(float [[X:%.*]], i32 [[Y:%.*]])
+// NCRDIV-LABEL: define dso_local noundef float @test_scalbnf(
+// NCRDIV-SAME: float noundef [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.ldexp.f32.i32(float [[X]], i32 [[Y]])
 // NCRDIV-NEXT:    ret float [[TMP0]]
 //
-// AMDGCNSPIRV-LABEL: @test_scalbnf(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.ldexp.f32.i32(float [[X:%.*]], i32 [[Y:%.*]])
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_scalbnf(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.ldexp.f32.i32(float [[X]], i32 [[Y]])
 // AMDGCNSPIRV-NEXT:    ret float [[TMP0]]
 //
 extern "C" __device__ float test_scalbnf(float x, int y) {
   return scalbnf(x, y);
 }
 
-// DEFAULT-LABEL: @test_scalbn(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.ldexp.f64.i32(double [[X:%.*]], i32 [[Y:%.*]])
+// DEFAULT-LABEL: define dso_local noundef double @test_scalbn(
+// DEFAULT-SAME: double noundef [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.ldexp.f64.i32(double [[X]], i32 [[Y]])
 // DEFAULT-NEXT:    ret double [[TMP0]]
 //
-// FINITEONLY-LABEL: @test_scalbn(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef double @llvm.ldexp.f64.i32(double nofpclass(nan inf) [[X:%.*]], i32 [[Y:%.*]])
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_scalbn(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef double @llvm.ldexp.f64.i32(double nofpclass(nan inf) [[X]], i32 [[Y]])
 // FINITEONLY-NEXT:    ret double [[TMP0]]
 //
-// APPROX-LABEL: @test_scalbn(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.ldexp.f64.i32(double [[X:%.*]], i32 [[Y:%.*]])
+// APPROX-LABEL: define dso_local noundef double @test_scalbn(
+// APPROX-SAME: double noundef [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.ldexp.f64.i32(double [[X]], i32 [[Y]])
 // APPROX-NEXT:    ret double [[TMP0]]
 //
-// NCRDIV-LABEL: @test_scalbn(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.ldexp.f64.i32(double [[X:%.*]], i32 [[Y:%.*]])
+// NCRDIV-LABEL: define dso_local noundef double @test_scalbn(
+// NCRDIV-SAME: double noundef [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.ldexp.f64.i32(double [[X]], i32 [[Y]])
 // NCRDIV-NEXT:    ret double [[TMP0]]
 //
-// AMDGCNSPIRV-LABEL: @test_scalbn(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.ldexp.f64.i32(double [[X:%.*]], i32 [[Y:%.*]])
+// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_scalbn(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.ldexp.f64.i32(double [[X]], i32 [[Y]])
 // AMDGCNSPIRV-NEXT:    ret double [[TMP0]]
 //
 extern "C" __device__ double test_scalbn(double x, int y) {
   return scalbn(x, y);
 }
 
-// CHECK-LABEL: @test___signbitf(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast float [[X:%.*]] to i32
-// CHECK-NEXT:    [[DOTLOBIT:%.*]] = lshr i32 [[TMP0]], 31
-// CHECK-NEXT:    ret i32 [[DOTLOBIT]]
-//
-// AMDGCNSPIRV-LABEL: @test___signbitf(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = bitcast float [[X:%.*]] to i32
+// DEFAULT-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___signbitf(
+// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[TMP0:%.*]] = bitcast float [[X]] to i32
+// DEFAULT-NEXT:    [[DOTLOBIT:%.*]] = lshr i32 [[TMP0]], 31
+// DEFAULT-NEXT:    ret i32 [[DOTLOBIT]]
+//
+// FINITEONLY-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___signbitf(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[TMP0:%.*]] = bitcast float [[X]] to i32
+// FINITEONLY-NEXT:    [[DOTLOBIT:%.*]] = lshr i32 [[TMP0]], 31
+// FINITEONLY-NEXT:    ret i32 [[DOTLOBIT]]
+//
+// APPROX-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___signbitf(
+// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[TMP0:%.*]] = bitcast float [[X]] to i32
+// APPROX-NEXT:    [[DOTLOBIT:%.*]] = lshr i32 [[TMP0]], 31
+// APPROX-NEXT:    ret i32 [[DOTLOBIT]]
+//
+// NCRDIV-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___signbitf(
+// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[TMP0:%.*]] = bitcast float [[X]] to i32
+// NCRDIV-NEXT:    [[DOTLOBIT:%.*]] = lshr i32 [[TMP0]], 31
+// NCRDIV-NEXT:    ret i32 [[DOTLOBIT]]
+//
+// AMDGCNSPIRV-LABEL: define spir_func noundef range(i32 0, 2) i32 @test___signbitf(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = bitcast float [[X]] to i32
 // AMDGCNSPIRV-NEXT:    [[DOTLOBIT:%.*]] = lshr i32 [[TMP0]], 31
 // AMDGCNSPIRV-NEXT:    ret i32 [[DOTLOBIT]]
 //
@@ -6177,16 +6949,42 @@ extern "C" __device__ BOOL_TYPE test___signbitf(float x) {
   return __signbitf(x);
 }
 
-// CHECK-LABEL: @test___signbit(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast double [[X:%.*]] to i64
-// CHECK-NEXT:    [[DOTLOBIT:%.*]] = lshr i64 [[TMP0]], 63
-// CHECK-NEXT:    [[CONV:%.*]] = trunc nuw nsw i64 [[DOTLOBIT]] to i32
-// CHECK-NEXT:    ret i32 [[CONV]]
+// DEFAULT-LABEL: define dso_local range(i32 0, 2) i32 @test___signbit(
+// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[TMP0:%.*]] = bitcast double [[X]] to i64
+// DEFAULT-NEXT:    [[DOTLOBIT:%.*]] = lshr i64 [[TMP0]], 63
+// DEFAULT-NEXT:    [[CONV:%.*]] = trunc nuw nsw i64 [[DOTLOBIT]] to i32
+// DEFAULT-NEXT:    ret i32 [[CONV]]
+//
+// FINITEONLY-LABEL: define dso_local range(i32 0, 2) i32 @test___signbit(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[TMP0:%.*]] = bitcast double [[X]] to i64
+// FINITEONLY-NEXT:    [[DOTLOBIT:%.*]] = lshr i64 [[TMP0]], 63
+// FINITEONLY-NEXT:    [[CONV:%.*]] = trunc nuw nsw i64 [[DOTLOBIT]] to i32
+// FINITEONLY-NEXT:    ret i32 [[CONV]]
+//
+// APPROX-LABEL: define dso_local range(i32 0, 2) i32 @test___signbit(
+// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[TMP0:%.*]] = bitcast double [[X]] to i64
+// APPROX-NEXT:    [[DOTLOBIT:%.*]] = lshr i64 [[TMP0]], 63
+// APPROX-NEXT:    [[CONV:%.*]] = trunc nuw nsw i64 [[DOTLOBIT]] to i32
+// APPROX-NEXT:    ret i32 [[CONV]]
+//
+// NCRDIV-LABEL: define dso_local range(i32 0, 2) i32 @test___signbit(
+// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[TMP0:%.*]] = bitcast double [[X]] to i64
+// NCRDIV-NEXT:    [[DOTLOBIT:%.*]] = lshr i64 [[TMP0]], 63
+// NCRDIV-NEXT:    [[CONV:%.*]] = trunc nuw nsw i64 [[DOTLOBIT]] to i32
+// NCRDIV-NEXT:    ret i32 [[CONV]]
 //
-// AMDGCNSPIRV-LABEL: @test___signbit(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = bitcast double [[X:%.*]] to i64
+// AMDGCNSPIRV-LABEL: define spir_func range(i32 0, 2) i32 @test___signbit(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = bitcast double [[X]] to i64
 // AMDGCNSPIRV-NEXT:    [[DOTLOBIT:%.*]] = lshr i64 [[TMP0]], 63
 // AMDGCNSPIRV-NEXT:    [[CONV:%.*]] = trunc nuw nsw i64 [[DOTLOBIT]] to i32
 // AMDGCNSPIRV-NEXT:    ret i32 [[CONV]]
@@ -6195,59 +6993,64 @@ extern "C" __device__ BOOL_TYPE test___signbit(double x) {
   return __signbit(x);
 }
 
-// DEFAULT-LABEL: @test_sincosf(
-// DEFAULT-NEXT:  entry:
+// DEFAULT-LABEL: define dso_local void @test_sincosf(
+// DEFAULT-SAME: float noundef [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Z:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
 // DEFAULT-NEXT:    [[__TMP_I:%.*]] = alloca float, align 4, addrspace(5)
 // DEFAULT-NEXT:    call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = call contract float @__ocml_sincos_f32(float noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
-// DEFAULT-NEXT:    store float [[CALL_I]], ptr [[Y:%.*]], align 4, !tbaa [[TBAA16]]
-// DEFAULT-NEXT:    [[TMP0:%.*]] = load float, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[TBAA16]]
-// DEFAULT-NEXT:    store float [[TMP0]], ptr [[Z:%.*]], align 4, !tbaa [[TBAA16]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = call contract float @__ocml_sincos_f32(float noundef [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
+// DEFAULT-NEXT:    store float [[CALL_I]], ptr [[Y]], align 4, !tbaa [[FLOAT_TBAA16]]
+// DEFAULT-NEXT:    [[TMP0:%.*]] = load float, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[FLOAT_TBAA16]]
+// DEFAULT-NEXT:    store float [[TMP0]], ptr [[Z]], align 4, !tbaa [[FLOAT_TBAA16]]
 // DEFAULT-NEXT:    call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // DEFAULT-NEXT:    ret void
 //
-// FINITEONLY-LABEL: @test_sincosf(
-// FINITEONLY-NEXT:  entry:
+// FINITEONLY-LABEL: define dso_local void @test_sincosf(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Z:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
 // FINITEONLY-NEXT:    [[__TMP_I:%.*]] = alloca float, align 4, addrspace(5)
 // FINITEONLY-NEXT:    call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = call nnan ninf contract nofpclass(nan inf) float @__ocml_sincos_f32(float noundef nofpclass(nan inf) [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
-// FINITEONLY-NEXT:    store float [[CALL_I]], ptr [[Y:%.*]], align 4, !tbaa [[TBAA16]]
-// FINITEONLY-NEXT:    [[TMP0:%.*]] = load float, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[TBAA16]]
-// FINITEONLY-NEXT:    store float [[TMP0]], ptr [[Z:%.*]], align 4, !tbaa [[TBAA16]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = call nnan ninf contract nofpclass(nan inf) float @__ocml_sincos_f32(float noundef nofpclass(nan inf) [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
+// FINITEONLY-NEXT:    store float [[CALL_I]], ptr [[Y]], align 4, !tbaa [[FLOAT_TBAA16]]
+// FINITEONLY-NEXT:    [[TMP0:%.*]] = load float, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[FLOAT_TBAA16]]
+// FINITEONLY-NEXT:    store float [[TMP0]], ptr [[Z]], align 4, !tbaa [[FLOAT_TBAA16]]
 // FINITEONLY-NEXT:    call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // FINITEONLY-NEXT:    ret void
 //
-// APPROX-LABEL: @test_sincosf(
-// APPROX-NEXT:  entry:
+// APPROX-LABEL: define dso_local void @test_sincosf(
+// APPROX-SAME: float noundef [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Z:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
 // APPROX-NEXT:    [[__TMP_I:%.*]] = alloca float, align 4, addrspace(5)
 // APPROX-NEXT:    call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
-// APPROX-NEXT:    [[CALL_I:%.*]] = call contract float @__ocml_sincos_f32(float noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
-// APPROX-NEXT:    store float [[CALL_I]], ptr [[Y:%.*]], align 4, !tbaa [[TBAA16]]
-// APPROX-NEXT:    [[TMP0:%.*]] = load float, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[TBAA16]]
-// APPROX-NEXT:    store float [[TMP0]], ptr [[Z:%.*]], align 4, !tbaa [[TBAA16]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = call contract float @__ocml_sincos_f32(float noundef [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
+// APPROX-NEXT:    store float [[CALL_I]], ptr [[Y]], align 4, !tbaa [[FLOAT_TBAA16]]
+// APPROX-NEXT:    [[TMP0:%.*]] = load float, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[FLOAT_TBAA16]]
+// APPROX-NEXT:    store float [[TMP0]], ptr [[Z]], align 4, !tbaa [[FLOAT_TBAA16]]
 // APPROX-NEXT:    call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // APPROX-NEXT:    ret void
 //
-// NCRDIV-LABEL: @test_sincosf(
-// NCRDIV-NEXT:  entry:
+// NCRDIV-LABEL: define dso_local void @test_sincosf(
+// NCRDIV-SAME: float noundef [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Z:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
 // NCRDIV-NEXT:    [[__TMP_I:%.*]] = alloca float, align 4, addrspace(5)
 // NCRDIV-NEXT:    call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = call contract float @__ocml_sincos_f32(float noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
-// NCRDIV-NEXT:    store float [[CALL_I]], ptr [[Y:%.*]], align 4, !tbaa [[TBAA17]]
-// NCRDIV-NEXT:    [[TMP0:%.*]] = load float, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[TBAA17]]
-// NCRDIV-NEXT:    store float [[TMP0]], ptr [[Z:%.*]], align 4, !tbaa [[TBAA17]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = call contract float @__ocml_sincos_f32(float noundef [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
+// NCRDIV-NEXT:    store float [[CALL_I]], ptr [[Y]], align 4, !tbaa [[FLOAT_TBAA17]]
+// NCRDIV-NEXT:    [[TMP0:%.*]] = load float, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[FLOAT_TBAA17]]
+// NCRDIV-NEXT:    store float [[TMP0]], ptr [[Z]], align 4, !tbaa [[FLOAT_TBAA17]]
 // NCRDIV-NEXT:    call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // NCRDIV-NEXT:    ret void
 //
-// AMDGCNSPIRV-LABEL: @test_sincosf(
-// AMDGCNSPIRV-NEXT:  entry:
+// AMDGCNSPIRV-LABEL: define spir_func void @test_sincosf(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]], ptr addrspace(4) noundef writeonly captures(none) initializes((0, 4)) [[Y:%.*]], ptr addrspace(4) noundef writeonly captures(none) initializes((0, 4)) [[Z:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR6]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
 // AMDGCNSPIRV-NEXT:    [[__TMP_I:%.*]] = alloca float, align 4
 // AMDGCNSPIRV-NEXT:    [[__TMP_ASCAST_I:%.*]] = addrspacecast ptr [[__TMP_I]] to ptr addrspace(4)
 // AMDGCNSPIRV-NEXT:    call addrspace(4) void @llvm.lifetime.start.p0(ptr nonnull [[__TMP_I]]) #[[ATTR15]]
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = call contract spir_func addrspace(4) float @__ocml_sincos_f32(float noundef [[X:%.*]], ptr noundef nonnull [[__TMP_I]]) #[[ATTR14]]
-// AMDGCNSPIRV-NEXT:    store float [[CALL_I]], ptr addrspace(4) [[Y:%.*]], align 4, !tbaa [[TBAA17]]
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = load float, ptr addrspace(4) [[__TMP_ASCAST_I]], align 4, !tbaa [[TBAA17]]
-// AMDGCNSPIRV-NEXT:    store float [[TMP0]], ptr addrspace(4) [[Z:%.*]], align 4, !tbaa [[TBAA17]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = call contract spir_func addrspace(4) float @__ocml_sincos_f32(float noundef [[X]], ptr noundef nonnull [[__TMP_I]]) #[[ATTR14]]
+// AMDGCNSPIRV-NEXT:    store float [[CALL_I]], ptr addrspace(4) [[Y]], align 4, !tbaa [[FLOAT_TBAA17]]
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = load float, ptr addrspace(4) [[__TMP_ASCAST_I]], align 4, !tbaa [[FLOAT_TBAA17]]
+// AMDGCNSPIRV-NEXT:    store float [[TMP0]], ptr addrspace(4) [[Z]], align 4, !tbaa [[FLOAT_TBAA17]]
 // AMDGCNSPIRV-NEXT:    call addrspace(4) void @llvm.lifetime.end.p0(ptr nonnull [[__TMP_I]]) #[[ATTR15]]
 // AMDGCNSPIRV-NEXT:    ret void
 //
@@ -6255,59 +7058,64 @@ extern "C" __device__ void test_sincosf(float x, float *y, float *z) {
   sincosf(x, y, z);
 }
 
-// DEFAULT-LABEL: @test_sincos(
-// DEFAULT-NEXT:  entry:
+// DEFAULT-LABEL: define dso_local void @test_sincos(
+// DEFAULT-SAME: double noundef [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 8)) [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 8)) [[Z:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
 // DEFAULT-NEXT:    [[__TMP_I:%.*]] = alloca double, align 8, addrspace(5)
 // DEFAULT-NEXT:    call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = call contract double @__ocml_sincos_f64(double noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
-// DEFAULT-NEXT:    store double [[CALL_I]], ptr [[Y:%.*]], align 8, !tbaa [[TBAA18]]
-// DEFAULT-NEXT:    [[TMP0:%.*]] = load double, ptr addrspace(5) [[__TMP_I]], align 8, !tbaa [[TBAA18]]
-// DEFAULT-NEXT:    store double [[TMP0]], ptr [[Z:%.*]], align 8, !tbaa [[TBAA18]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = call contract double @__ocml_sincos_f64(double noundef [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
+// DEFAULT-NEXT:    store double [[CALL_I]], ptr [[Y]], align 8, !tbaa [[DOUBLE_TBAA18]]
+// DEFAULT-NEXT:    [[TMP0:%.*]] = load double, ptr addrspace(5) [[__TMP_I]], align 8, !tbaa [[DOUBLE_TBAA18]]
+// DEFAULT-NEXT:    store double [[TMP0]], ptr [[Z]], align 8, !tbaa [[DOUBLE_TBAA18]]
 // DEFAULT-NEXT:    call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // DEFAULT-NEXT:    ret void
 //
-// FINITEONLY-LABEL: @test_sincos(
-// FINITEONLY-NEXT:  entry:
+// FINITEONLY-LABEL: define dso_local void @test_sincos(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 8)) [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 8)) [[Z:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
 // FINITEONLY-NEXT:    [[__TMP_I:%.*]] = alloca double, align 8, addrspace(5)
 // FINITEONLY-NEXT:    call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = call nnan ninf contract nofpclass(nan inf) double @__ocml_sincos_f64(double noundef nofpclass(nan inf) [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
-// FINITEONLY-NEXT:    store double [[CALL_I]], ptr [[Y:%.*]], align 8, !tbaa [[TBAA18]]
-// FINITEONLY-NEXT:    [[TMP0:%.*]] = load double, ptr addrspace(5) [[__TMP_I]], align 8, !tbaa [[TBAA18]]
-// FINITEONLY-NEXT:    store double [[TMP0]], ptr [[Z:%.*]], align 8, !tbaa [[TBAA18]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = call nnan ninf contract nofpclass(nan inf) double @__ocml_sincos_f64(double noundef nofpclass(nan inf) [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
+// FINITEONLY-NEXT:    store double [[CALL_I]], ptr [[Y]], align 8, !tbaa [[DOUBLE_TBAA18]]
+// FINITEONLY-NEXT:    [[TMP0:%.*]] = load double, ptr addrspace(5) [[__TMP_I]], align 8, !tbaa [[DOUBLE_TBAA18]]
+// FINITEONLY-NEXT:    store double [[TMP0]], ptr [[Z]], align 8, !tbaa [[DOUBLE_TBAA18]]
 // FINITEONLY-NEXT:    call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // FINITEONLY-NEXT:    ret void
 //
-// APPROX-LABEL: @test_sincos(
-// APPROX-NEXT:  entry:
+// APPROX-LABEL: define dso_local void @test_sincos(
+// APPROX-SAME: double noundef [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 8)) [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 8)) [[Z:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
 // APPROX-NEXT:    [[__TMP_I:%.*]] = alloca double, align 8, addrspace(5)
 // APPROX-NEXT:    call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
-// APPROX-NEXT:    [[CALL_I:%.*]] = call contract double @__ocml_sincos_f64(double noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
-// APPROX-NEXT:    store double [[CALL_I]], ptr [[Y:%.*]], align 8, !tbaa [[TBAA18]]
-// APPROX-NEXT:    [[TMP0:%.*]] = load double, ptr addrspace(5) [[__TMP_I]], align 8, !tbaa [[TBAA18]]
-// APPROX-NEXT:    store double [[TMP0]], ptr [[Z:%.*]], align 8, !tbaa [[TBAA18]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = call contract double @__ocml_sincos_f64(double noundef [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
+// APPROX-NEXT:    store double [[CALL_I]], ptr [[Y]], align 8, !tbaa [[DOUBLE_TBAA18]]
+// APPROX-NEXT:    [[TMP0:%.*]] = load double, ptr addrspace(5) [[__TMP_I]], align 8, !tbaa [[DOUBLE_TBAA18]]
+// APPROX-NEXT:    store double [[TMP0]], ptr [[Z]], align 8, !tbaa [[DOUBLE_TBAA18]]
 // APPROX-NEXT:    call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // APPROX-NEXT:    ret void
 //
-// NCRDIV-LABEL: @test_sincos(
-// NCRDIV-NEXT:  entry:
+// NCRDIV-LABEL: define dso_local void @test_sincos(
+// NCRDIV-SAME: double noundef [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 8)) [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 8)) [[Z:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
 // NCRDIV-NEXT:    [[__TMP_I:%.*]] = alloca double, align 8, addrspace(5)
 // NCRDIV-NEXT:    call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = call contract double @__ocml_sincos_f64(double noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
-// NCRDIV-NEXT:    store double [[CALL_I]], ptr [[Y:%.*]], align 8, !tbaa [[TBAA19]]
-// NCRDIV-NEXT:    [[TMP0:%.*]] = load double, ptr addrspace(5) [[__TMP_I]], align 8, !tbaa [[TBAA19]]
-// NCRDIV-NEXT:    store double [[TMP0]], ptr [[Z:%.*]], align 8, !tbaa [[TBAA19]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = call contract double @__ocml_sincos_f64(double noundef [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
+// NCRDIV-NEXT:    store double [[CALL_I]], ptr [[Y]], align 8, !tbaa [[DOUBLE_TBAA19]]
+// NCRDIV-NEXT:    [[TMP0:%.*]] = load double, ptr addrspace(5) [[__TMP_I]], align 8, !tbaa [[DOUBLE_TBAA19]]
+// NCRDIV-NEXT:    store double [[TMP0]], ptr [[Z]], align 8, !tbaa [[DOUBLE_TBAA19]]
 // NCRDIV-NEXT:    call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // NCRDIV-NEXT:    ret void
 //
-// AMDGCNSPIRV-LABEL: @test_sincos(
-// AMDGCNSPIRV-NEXT:  entry:
+// AMDGCNSPIRV-LABEL: define spir_func void @test_sincos(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]], ptr addrspace(4) noundef writeonly captures(none) initializes((0, 8)) [[Y:%.*]], ptr addrspace(4) noundef writeonly captures(none) initializes((0, 8)) [[Z:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR6]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
 // AMDGCNSPIRV-NEXT:    [[__TMP_I:%.*]] = alloca double, align 8
 // AMDGCNSPIRV-NEXT:    [[__TMP_ASCAST_I:%.*]] = addrspacecast ptr [[__TMP_I]] to ptr addrspace(4)
 // AMDGCNSPIRV-NEXT:    call addrspace(4) void @llvm.lifetime.start.p0(ptr nonnull [[__TMP_I]]) #[[ATTR15]]
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = call contract spir_func addrspace(4) double @__ocml_sincos_f64(double noundef [[X:%.*]], ptr noundef nonnull [[__TMP_I]]) #[[ATTR14]]
-// AMDGCNSPIRV-NEXT:    store double [[CALL_I]], ptr addrspace(4) [[Y:%.*]], align 8, !tbaa [[TBAA19]]
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = load double, ptr addrspace(4) [[__TMP_ASCAST_I]], align 8, !tbaa [[TBAA19]]
-// AMDGCNSPIRV-NEXT:    store double [[TMP0]], ptr addrspace(4) [[Z:%.*]], align 8, !tbaa [[TBAA19]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = call contract spir_func addrspace(4) double @__ocml_sincos_f64(double noundef [[X]], ptr noundef nonnull [[__TMP_I]]) #[[ATTR14]]
+// AMDGCNSPIRV-NEXT:    store double [[CALL_I]], ptr addrspace(4) [[Y]], align 8, !tbaa [[DOUBLE_TBAA19]]
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = load double, ptr addrspace(4) [[__TMP_ASCAST_I]], align 8, !tbaa [[DOUBLE_TBAA19]]
+// AMDGCNSPIRV-NEXT:    store double [[TMP0]], ptr addrspace(4) [[Z]], align 8, !tbaa [[DOUBLE_TBAA19]]
 // AMDGCNSPIRV-NEXT:    call addrspace(4) void @llvm.lifetime.end.p0(ptr nonnull [[__TMP_I]]) #[[ATTR15]]
 // AMDGCNSPIRV-NEXT:    ret void
 //
@@ -6315,59 +7123,64 @@ extern "C" __device__ void test_sincos(double x, double *y, double *z) {
   sincos(x, y, z);
 }
 
-// DEFAULT-LABEL: @test_sincospif(
-// DEFAULT-NEXT:  entry:
+// DEFAULT-LABEL: define dso_local void @test_sincospif(
+// DEFAULT-SAME: float noundef [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Z:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
 // DEFAULT-NEXT:    [[__TMP_I:%.*]] = alloca float, align 4, addrspace(5)
 // DEFAULT-NEXT:    call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = call contract float @__ocml_sincospi_f32(float noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
-// DEFAULT-NEXT:    store float [[CALL_I]], ptr [[Y:%.*]], align 4, !tbaa [[TBAA16]]
-// DEFAULT-NEXT:    [[TMP0:%.*]] = load float, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[TBAA16]]
-// DEFAULT-NEXT:    store float [[TMP0]], ptr [[Z:%.*]], align 4, !tbaa [[TBAA16]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = call contract float @__ocml_sincospi_f32(float noundef [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
+// DEFAULT-NEXT:    store float [[CALL_I]], ptr [[Y]], align 4, !tbaa [[FLOAT_TBAA16]]
+// DEFAULT-NEXT:    [[TMP0:%.*]] = load float, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[FLOAT_TBAA16]]
+// DEFAULT-NEXT:    store float [[TMP0]], ptr [[Z]], align 4, !tbaa [[FLOAT_TBAA16]]
 // DEFAULT-NEXT:    call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // DEFAULT-NEXT:    ret void
 //
-// FINITEONLY-LABEL: @test_sincospif(
-// FINITEONLY-NEXT:  entry:
+// FINITEONLY-LABEL: define dso_local void @test_sincospif(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Z:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
 // FINITEONLY-NEXT:    [[__TMP_I:%.*]] = alloca float, align 4, addrspace(5)
 // FINITEONLY-NEXT:    call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = call nnan ninf contract nofpclass(nan inf) float @__ocml_sincospi_f32(float noundef nofpclass(nan inf) [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
-// FINITEONLY-NEXT:    store float [[CALL_I]], ptr [[Y:%.*]], align 4, !tbaa [[TBAA16]]
-// FINITEONLY-NEXT:    [[TMP0:%.*]] = load float, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[TBAA16]]
-// FINITEONLY-NEXT:    store float [[TMP0]], ptr [[Z:%.*]], align 4, !tbaa [[TBAA16]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = call nnan ninf contract nofpclass(nan inf) float @__ocml_sincospi_f32(float noundef nofpclass(nan inf) [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
+// FINITEONLY-NEXT:    store float [[CALL_I]], ptr [[Y]], align 4, !tbaa [[FLOAT_TBAA16]]
+// FINITEONLY-NEXT:    [[TMP0:%.*]] = load float, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[FLOAT_TBAA16]]
+// FINITEONLY-NEXT:    store float [[TMP0]], ptr [[Z]], align 4, !tbaa [[FLOAT_TBAA16]]
 // FINITEONLY-NEXT:    call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // FINITEONLY-NEXT:    ret void
 //
-// APPROX-LABEL: @test_sincospif(
-// APPROX-NEXT:  entry:
+// APPROX-LABEL: define dso_local void @test_sincospif(
+// APPROX-SAME: float noundef [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Z:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
 // APPROX-NEXT:    [[__TMP_I:%.*]] = alloca float, align 4, addrspace(5)
 // APPROX-NEXT:    call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
-// APPROX-NEXT:    [[CALL_I:%.*]] = call contract float @__ocml_sincospi_f32(float noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
-// APPROX-NEXT:    store float [[CALL_I]], ptr [[Y:%.*]], align 4, !tbaa [[TBAA16]]
-// APPROX-NEXT:    [[TMP0:%.*]] = load float, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[TBAA16]]
-// APPROX-NEXT:    store float [[TMP0]], ptr [[Z:%.*]], align 4, !tbaa [[TBAA16]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = call contract float @__ocml_sincospi_f32(float noundef [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
+// APPROX-NEXT:    store float [[CALL_I]], ptr [[Y]], align 4, !tbaa [[FLOAT_TBAA16]]
+// APPROX-NEXT:    [[TMP0:%.*]] = load float, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[FLOAT_TBAA16]]
+// APPROX-NEXT:    store float [[TMP0]], ptr [[Z]], align 4, !tbaa [[FLOAT_TBAA16]]
 // APPROX-NEXT:    call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // APPROX-NEXT:    ret void
 //
-// NCRDIV-LABEL: @test_sincospif(
-// NCRDIV-NEXT:  entry:
+// NCRDIV-LABEL: define dso_local void @test_sincospif(
+// NCRDIV-SAME: float noundef [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Z:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
 // NCRDIV-NEXT:    [[__TMP_I:%.*]] = alloca float, align 4, addrspace(5)
 // NCRDIV-NEXT:    call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = call contract float @__ocml_sincospi_f32(float noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
-// NCRDIV-NEXT:    store float [[CALL_I]], ptr [[Y:%.*]], align 4, !tbaa [[TBAA17]]
-// NCRDIV-NEXT:    [[TMP0:%.*]] = load float, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[TBAA17]]
-// NCRDIV-NEXT:    store float [[TMP0]], ptr [[Z:%.*]], align 4, !tbaa [[TBAA17]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = call contract float @__ocml_sincospi_f32(float noundef [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
+// NCRDIV-NEXT:    store float [[CALL_I]], ptr [[Y]], align 4, !tbaa [[FLOAT_TBAA17]]
+// NCRDIV-NEXT:    [[TMP0:%.*]] = load float, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[FLOAT_TBAA17]]
+// NCRDIV-NEXT:    store float [[TMP0]], ptr [[Z]], align 4, !tbaa [[FLOAT_TBAA17]]
 // NCRDIV-NEXT:    call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // NCRDIV-NEXT:    ret void
 //
-// AMDGCNSPIRV-LABEL: @test_sincospif(
-// AMDGCNSPIRV-NEXT:  entry:
+// AMDGCNSPIRV-LABEL: define spir_func void @test_sincospif(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]], ptr addrspace(4) noundef writeonly captures(none) initializes((0, 4)) [[Y:%.*]], ptr addrspace(4) noundef writeonly captures(none) initializes((0, 4)) [[Z:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR6]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
 // AMDGCNSPIRV-NEXT:    [[__TMP_I:%.*]] = alloca float, align 4
 // AMDGCNSPIRV-NEXT:    [[__TMP_ASCAST_I:%.*]] = addrspacecast ptr [[__TMP_I]] to ptr addrspace(4)
 // AMDGCNSPIRV-NEXT:    call addrspace(4) void @llvm.lifetime.start.p0(ptr nonnull [[__TMP_I]]) #[[ATTR15]]
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = call contract spir_func addrspace(4) float @__ocml_sincospi_f32(float noundef [[X:%.*]], ptr noundef nonnull [[__TMP_I]]) #[[ATTR14]]
-// AMDGCNSPIRV-NEXT:    store float [[CALL_I]], ptr addrspace(4) [[Y:%.*]], align 4, !tbaa [[TBAA17]]
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = load float, ptr addrspace(4) [[__TMP_ASCAST_I]], align 4, !tbaa [[TBAA17]]
-// AMDGCNSPIRV-NEXT:    store float [[TMP0]], ptr addrspace(4) [[Z:%.*]], align 4, !tbaa [[TBAA17]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = call contract spir_func addrspace(4) float @__ocml_sincospi_f32(float noundef [[X]], ptr noundef nonnull [[__TMP_I]]) #[[ATTR14]]
+// AMDGCNSPIRV-NEXT:    store float [[CALL_I]], ptr addrspace(4) [[Y]], align 4, !tbaa [[FLOAT_TBAA17]]
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = load float, ptr addrspace(4) [[__TMP_ASCAST_I]], align 4, !tbaa [[FLOAT_TBAA17]]
+// AMDGCNSPIRV-NEXT:    store float [[TMP0]], ptr addrspace(4) [[Z]], align 4, !tbaa [[FLOAT_TBAA17]]
 // AMDGCNSPIRV-NEXT:    call addrspace(4) void @llvm.lifetime.end.p0(ptr nonnull [[__TMP_I]]) #[[ATTR15]]
 // AMDGCNSPIRV-NEXT:    ret void
 //
@@ -6375,59 +7188,64 @@ extern "C" __device__ void test_sincospif(float x, float *y, float *z) {
   sincospif(x, y, z);
 }
 
-// DEFAULT-LABEL: @test_sincospi(
-// DEFAULT-NEXT:  entry:
+// DEFAULT-LABEL: define dso_local void @test_sincospi(
+// DEFAULT-SAME: double noundef [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 8)) [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 8)) [[Z:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
 // DEFAULT-NEXT:    [[__TMP_I:%.*]] = alloca double, align 8, addrspace(5)
 // DEFAULT-NEXT:    call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = call contract double @__ocml_sincospi_f64(double noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
-// DEFAULT-NEXT:    store double [[CALL_I]], ptr [[Y:%.*]], align 8, !tbaa [[TBAA18]]
-// DEFAULT-NEXT:    [[TMP0:%.*]] = load double, ptr addrspace(5) [[__TMP_I]], align 8, !tbaa [[TBAA18]]
-// DEFAULT-NEXT:    store double [[TMP0]], ptr [[Z:%.*]], align 8, !tbaa [[TBAA18]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = call contract double @__ocml_sincospi_f64(double noundef [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
+// DEFAULT-NEXT:    store double [[CALL_I]], ptr [[Y]], align 8, !tbaa [[DOUBLE_TBAA18]]
+// DEFAULT-NEXT:    [[TMP0:%.*]] = load double, ptr addrspace(5) [[__TMP_I]], align 8, !tbaa [[DOUBLE_TBAA18]]
+// DEFAULT-NEXT:    store double [[TMP0]], ptr [[Z]], align 8, !tbaa [[DOUBLE_TBAA18]]
 // DEFAULT-NEXT:    call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // DEFAULT-NEXT:    ret void
 //
-// FINITEONLY-LABEL: @test_sincospi(
-// FINITEONLY-NEXT:  entry:
+// FINITEONLY-LABEL: define dso_local void @test_sincospi(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 8)) [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 8)) [[Z:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
 // FINITEONLY-NEXT:    [[__TMP_I:%.*]] = alloca double, align 8, addrspace(5)
 // FINITEONLY-NEXT:    call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = call nnan ninf contract nofpclass(nan inf) double @__ocml_sincospi_f64(double noundef nofpclass(nan inf) [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
-// FINITEONLY-NEXT:    store double [[CALL_I]], ptr [[Y:%.*]], align 8, !tbaa [[TBAA18]]
-// FINITEONLY-NEXT:    [[TMP0:%.*]] = load double, ptr addrspace(5) [[__TMP_I]], align 8, !tbaa [[TBAA18]]
-// FINITEONLY-NEXT:    store double [[TMP0]], ptr [[Z:%.*]], align 8, !tbaa [[TBAA18]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = call nnan ninf contract nofpclass(nan inf) double @__ocml_sincospi_f64(double noundef nofpclass(nan inf) [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
+// FINITEONLY-NEXT:    store double [[CALL_I]], ptr [[Y]], align 8, !tbaa [[DOUBLE_TBAA18]]
+// FINITEONLY-NEXT:    [[TMP0:%.*]] = load double, ptr addrspace(5) [[__TMP_I]], align 8, !tbaa [[DOUBLE_TBAA18]]
+// FINITEONLY-NEXT:    store double [[TMP0]], ptr [[Z]], align 8, !tbaa [[DOUBLE_TBAA18]]
 // FINITEONLY-NEXT:    call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // FINITEONLY-NEXT:    ret void
 //
-// APPROX-LABEL: @test_sincospi(
-// APPROX-NEXT:  entry:
+// APPROX-LABEL: define dso_local void @test_sincospi(
+// APPROX-SAME: double noundef [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 8)) [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 8)) [[Z:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
 // APPROX-NEXT:    [[__TMP_I:%.*]] = alloca double, align 8, addrspace(5)
 // APPROX-NEXT:    call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
-// APPROX-NEXT:    [[CALL_I:%.*]] = call contract double @__ocml_sincospi_f64(double noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
-// APPROX-NEXT:    store double [[CALL_I]], ptr [[Y:%.*]], align 8, !tbaa [[TBAA18]]
-// APPROX-NEXT:    [[TMP0:%.*]] = load double, ptr addrspace(5) [[__TMP_I]], align 8, !tbaa [[TBAA18]]
-// APPROX-NEXT:    store double [[TMP0]], ptr [[Z:%.*]], align 8, !tbaa [[TBAA18]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = call contract double @__ocml_sincospi_f64(double noundef [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
+// APPROX-NEXT:    store double [[CALL_I]], ptr [[Y]], align 8, !tbaa [[DOUBLE_TBAA18]]
+// APPROX-NEXT:    [[TMP0:%.*]] = load double, ptr addrspace(5) [[__TMP_I]], align 8, !tbaa [[DOUBLE_TBAA18]]
+// APPROX-NEXT:    store double [[TMP0]], ptr [[Z]], align 8, !tbaa [[DOUBLE_TBAA18]]
 // APPROX-NEXT:    call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // APPROX-NEXT:    ret void
 //
-// NCRDIV-LABEL: @test_sincospi(
-// NCRDIV-NEXT:  entry:
+// NCRDIV-LABEL: define dso_local void @test_sincospi(
+// NCRDIV-SAME: double noundef [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 8)) [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 8)) [[Z:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
 // NCRDIV-NEXT:    [[__TMP_I:%.*]] = alloca double, align 8, addrspace(5)
 // NCRDIV-NEXT:    call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = call contract double @__ocml_sincospi_f64(double noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
-// NCRDIV-NEXT:    store double [[CALL_I]], ptr [[Y:%.*]], align 8, !tbaa [[TBAA19]]
-// NCRDIV-NEXT:    [[TMP0:%.*]] = load double, ptr addrspace(5) [[__TMP_I]], align 8, !tbaa [[TBAA19]]
-// NCRDIV-NEXT:    store double [[TMP0]], ptr [[Z:%.*]], align 8, !tbaa [[TBAA19]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = call contract double @__ocml_sincospi_f64(double noundef [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
+// NCRDIV-NEXT:    store double [[CALL_I]], ptr [[Y]], align 8, !tbaa [[DOUBLE_TBAA19]]
+// NCRDIV-NEXT:    [[TMP0:%.*]] = load double, ptr addrspace(5) [[__TMP_I]], align 8, !tbaa [[DOUBLE_TBAA19]]
+// NCRDIV-NEXT:    store double [[TMP0]], ptr [[Z]], align 8, !tbaa [[DOUBLE_TBAA19]]
 // NCRDIV-NEXT:    call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // NCRDIV-NEXT:    ret void
 //
-// AMDGCNSPIRV-LABEL: @test_sincospi(
-// AMDGCNSPIRV-NEXT:  entry:
+// AMDGCNSPIRV-LABEL: define spir_func void @test_sincospi(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]], ptr addrspace(4) noundef writeonly captures(none) initializes((0, 8)) [[Y:%.*]], ptr addrspace(4) noundef writeonly captures(none) initializes((0, 8)) [[Z:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR6]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
 // AMDGCNSPIRV-NEXT:    [[__TMP_I:%.*]] = alloca double, align 8
 // AMDGCNSPIRV-NEXT:    [[__TMP_ASCAST_I:%.*]] = addrspacecast ptr [[__TMP_I]] to ptr addrspace(4)
 // AMDGCNSPIRV-NEXT:    call addrspace(4) void @llvm.lifetime.start.p0(ptr nonnull [[__TMP_I]]) #[[ATTR15]]
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = call contract spir_func addrspace(4) double @__ocml_sincospi_f64(double noundef [[X:%.*]], ptr noundef nonnull [[__TMP_I]]) #[[ATTR14]]
-// AMDGCNSPIRV-NEXT:    store double [[CALL_I]], ptr addrspace(4) [[Y:%.*]], align 8, !tbaa [[TBAA19]]
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = load double, ptr addrspace(4) [[__TMP_ASCAST_I]], align 8, !tbaa [[TBAA19]]
-// AMDGCNSPIRV-NEXT:    store double [[TMP0]], ptr addrspace(4) [[Z:%.*]], align 8, !tbaa [[TBAA19]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = call contract spir_func addrspace(4) double @__ocml_sincospi_f64(double noundef [[X]], ptr noundef nonnull [[__TMP_I]]) #[[ATTR14]]
+// AMDGCNSPIRV-NEXT:    store double [[CALL_I]], ptr addrspace(4) [[Y]], align 8, !tbaa [[DOUBLE_TBAA19]]
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = load double, ptr addrspace(4) [[__TMP_ASCAST_I]], align 8, !tbaa [[DOUBLE_TBAA19]]
+// AMDGCNSPIRV-NEXT:    store double [[TMP0]], ptr addrspace(4) [[Z]], align 8, !tbaa [[DOUBLE_TBAA19]]
 // AMDGCNSPIRV-NEXT:    call addrspace(4) void @llvm.lifetime.end.p0(ptr nonnull [[__TMP_I]]) #[[ATTR15]]
 // AMDGCNSPIRV-NEXT:    ret void
 //
@@ -6435,549 +7253,640 @@ extern "C" __device__ void test_sincospi(double x, double *y, double *z) {
   sincospi(x, y, z);
 }
 
-// DEFAULT-LABEL: @test_sinf(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_sin_f32(float noundef [[X:%.*]]) #[[ATTR16]]
+// DEFAULT-LABEL: define dso_local noundef float @test_sinf(
+// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_sin_f32(float noundef [[X]]) #[[ATTR16]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_sinf(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_sin_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_sinf(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_sin_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
-// APPROX-LABEL: @test_sinf(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I1:%.*]] = tail call contract noundef float @__ocml_native_sin_f32(float noundef [[X:%.*]]) #[[ATTR16]]
+// APPROX-LABEL: define dso_local noundef float @test_sinf(
+// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I1:%.*]] = tail call contract noundef float @__ocml_native_sin_f32(float noundef [[X]]) #[[ATTR16]]
 // APPROX-NEXT:    ret float [[CALL_I1]]
 //
-// NCRDIV-LABEL: @test_sinf(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_sin_f32(float noundef [[X:%.*]]) #[[ATTR16]]
+// NCRDIV-LABEL: define dso_local noundef float @test_sinf(
+// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_sin_f32(float noundef [[X]]) #[[ATTR16]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_sinf(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_sin_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_sinf(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR6]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_sin_f32(float noundef [[X]]) #[[ATTR14]]
 // AMDGCNSPIRV-NEXT:    ret float [[CALL_I]]
 //
 extern "C" __device__ float test_sinf(float x) {
   return sinf(x);
 }
 
-// DEFAULT-LABEL: @test_sin(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_sin_f64(double noundef [[X:%.*]]) #[[ATTR16]]
+// DEFAULT-LABEL: define dso_local noundef double @test_sin(
+// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_sin_f64(double noundef [[X]]) #[[ATTR16]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_sin(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_sin_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_sin(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_sin_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
-// APPROX-LABEL: @test_sin(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_sin_f64(double noundef [[X:%.*]]) #[[ATTR16]]
+// APPROX-LABEL: define dso_local noundef double @test_sin(
+// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_sin_f64(double noundef [[X]]) #[[ATTR16]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_sin(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_sin_f64(double noundef [[X:%.*]]) #[[ATTR16]]
+// NCRDIV-LABEL: define dso_local noundef double @test_sin(
+// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_sin_f64(double noundef [[X]]) #[[ATTR16]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_sin(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_sin_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_sin(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR6]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_sin_f64(double noundef [[X]]) #[[ATTR14]]
 // AMDGCNSPIRV-NEXT:    ret double [[CALL_I]]
 //
 extern "C" __device__ double test_sin(double x) {
   return sin(x);
 }
 
-// DEFAULT-LABEL: @test_sinpif(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_sinpi_f32(float noundef [[X:%.*]]) #[[ATTR16]]
+// DEFAULT-LABEL: define dso_local noundef float @test_sinpif(
+// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_sinpi_f32(float noundef [[X]]) #[[ATTR16]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_sinpif(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_sinpi_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_sinpif(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_sinpi_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
-// APPROX-LABEL: @test_sinpif(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_sinpi_f32(float noundef [[X:%.*]]) #[[ATTR16]]
+// APPROX-LABEL: define dso_local noundef float @test_sinpif(
+// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_sinpi_f32(float noundef [[X]]) #[[ATTR16]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_sinpif(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_sinpi_f32(float noundef [[X:%.*]]) #[[ATTR16]]
+// NCRDIV-LABEL: define dso_local noundef float @test_sinpif(
+// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_sinpi_f32(float noundef [[X]]) #[[ATTR16]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_sinpif(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_sinpi_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_sinpif(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR6]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_sinpi_f32(float noundef [[X]]) #[[ATTR14]]
 // AMDGCNSPIRV-NEXT:    ret float [[CALL_I]]
 //
 extern "C" __device__ float test_sinpif(float x) {
   return sinpif(x);
 }
 
-// DEFAULT-LABEL: @test_sinpi(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_sinpi_f64(double noundef [[X:%.*]]) #[[ATTR16]]
+// DEFAULT-LABEL: define dso_local noundef double @test_sinpi(
+// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_sinpi_f64(double noundef [[X]]) #[[ATTR16]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_sinpi(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_sinpi_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_sinpi(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_sinpi_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
-// APPROX-LABEL: @test_sinpi(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_sinpi_f64(double noundef [[X:%.*]]) #[[ATTR16]]
+// APPROX-LABEL: define dso_local noundef double @test_sinpi(
+// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_sinpi_f64(double noundef [[X]]) #[[ATTR16]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_sinpi(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_sinpi_f64(double noundef [[X:%.*]]) #[[ATTR16]]
+// NCRDIV-LABEL: define dso_local noundef double @test_sinpi(
+// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_sinpi_f64(double noundef [[X]]) #[[ATTR16]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_sinpi(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_sinpi_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_sinpi(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR6]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_sinpi_f64(double noundef [[X]]) #[[ATTR14]]
 // AMDGCNSPIRV-NEXT:    ret double [[CALL_I]]
 //
 extern "C" __device__ double test_sinpi(double x) {
   return sinpi(x);
 }
 
-// DEFAULT-LABEL: @test_sqrtf(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.sqrt.f32(float [[X:%.*]])
+// DEFAULT-LABEL: define dso_local noundef float @test_sqrtf(
+// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.sqrt.f32(float [[X]])
 // DEFAULT-NEXT:    ret float [[TMP0]]
 //
-// FINITEONLY-LABEL: @test_sqrtf(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.sqrt.f32(float nofpclass(nan inf) [[X:%.*]])
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_sqrtf(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.sqrt.f32(float nofpclass(nan inf) [[X]])
 // FINITEONLY-NEXT:    ret float [[TMP0]]
 //
-// APPROX-LABEL: @test_sqrtf(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.sqrt.f32(float [[X:%.*]])
+// APPROX-LABEL: define dso_local noundef float @test_sqrtf(
+// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.sqrt.f32(float [[X]])
 // APPROX-NEXT:    ret float [[TMP0]]
 //
-// NCRDIV-LABEL: @test_sqrtf(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.sqrt.f32(float [[X:%.*]]), !fpmath [[META25:![0-9]+]]
+// NCRDIV-LABEL: define dso_local noundef float @test_sqrtf(
+// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.sqrt.f32(float [[X]]), !fpmath [[META25:![0-9]+]]
 // NCRDIV-NEXT:    ret float [[TMP0]]
 //
-// AMDGCNSPIRV-LABEL: @test_sqrtf(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.sqrt.f32(float [[X:%.*]])
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_sqrtf(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.sqrt.f32(float [[X]])
 // AMDGCNSPIRV-NEXT:    ret float [[TMP0]]
 //
 extern "C" __device__ float test_sqrtf(float x) {
   return sqrtf(x);
 }
 
-// DEFAULT-LABEL: @test_sqrt(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.sqrt.f64(double [[X:%.*]])
+// DEFAULT-LABEL: define dso_local noundef double @test_sqrt(
+// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.sqrt.f64(double [[X]])
 // DEFAULT-NEXT:    ret double [[TMP0]]
 //
-// FINITEONLY-LABEL: @test_sqrt(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef double @llvm.sqrt.f64(double nofpclass(nan inf) [[X:%.*]])
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_sqrt(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef double @llvm.sqrt.f64(double nofpclass(nan inf) [[X]])
 // FINITEONLY-NEXT:    ret double [[TMP0]]
 //
-// APPROX-LABEL: @test_sqrt(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.sqrt.f64(double [[X:%.*]])
+// APPROX-LABEL: define dso_local noundef double @test_sqrt(
+// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.sqrt.f64(double [[X]])
 // APPROX-NEXT:    ret double [[TMP0]]
 //
-// NCRDIV-LABEL: @test_sqrt(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.sqrt.f64(double [[X:%.*]])
+// NCRDIV-LABEL: define dso_local noundef double @test_sqrt(
+// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.sqrt.f64(double [[X]])
 // NCRDIV-NEXT:    ret double [[TMP0]]
 //
-// AMDGCNSPIRV-LABEL: @test_sqrt(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.sqrt.f64(double [[X:%.*]])
+// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_sqrt(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.sqrt.f64(double [[X]])
 // AMDGCNSPIRV-NEXT:    ret double [[TMP0]]
 //
 extern "C" __device__ double test_sqrt(double x) {
   return sqrt(x);
 }
 
-// DEFAULT-LABEL: @test_tanf(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tan_f32(float noundef [[X:%.*]]) #[[ATTR16]]
+// DEFAULT-LABEL: define dso_local noundef float @test_tanf(
+// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tan_f32(float noundef [[X]]) #[[ATTR16]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_tanf(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_tan_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_tanf(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_tan_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
-// APPROX-LABEL: @test_tanf(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tan_f32(float noundef [[X:%.*]]) #[[ATTR16]]
+// APPROX-LABEL: define dso_local noundef float @test_tanf(
+// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tan_f32(float noundef [[X]]) #[[ATTR16]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_tanf(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tan_f32(float noundef [[X:%.*]]) #[[ATTR16]]
+// NCRDIV-LABEL: define dso_local noundef float @test_tanf(
+// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tan_f32(float noundef [[X]]) #[[ATTR16]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_tanf(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_tan_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_tanf(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR6]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_tan_f32(float noundef [[X]]) #[[ATTR14]]
 // AMDGCNSPIRV-NEXT:    ret float [[CALL_I]]
 //
 extern "C" __device__ float test_tanf(float x) {
   return tanf(x);
 }
 
-// DEFAULT-LABEL: @test_tan(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tan_f64(double noundef [[X:%.*]]) #[[ATTR16]]
+// DEFAULT-LABEL: define dso_local noundef double @test_tan(
+// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tan_f64(double noundef [[X]]) #[[ATTR16]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_tan(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_tan_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_tan(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_tan_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
-// APPROX-LABEL: @test_tan(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tan_f64(double noundef [[X:%.*]]) #[[ATTR16]]
+// APPROX-LABEL: define dso_local noundef double @test_tan(
+// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tan_f64(double noundef [[X]]) #[[ATTR16]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_tan(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tan_f64(double noundef [[X:%.*]]) #[[ATTR16]]
+// NCRDIV-LABEL: define dso_local noundef double @test_tan(
+// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tan_f64(double noundef [[X]]) #[[ATTR16]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_tan(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_tan_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_tan(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR6]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_tan_f64(double noundef [[X]]) #[[ATTR14]]
 // AMDGCNSPIRV-NEXT:    ret double [[CALL_I]]
 //
 extern "C" __device__ double test_tan(double x) {
   return tan(x);
 }
 
-// DEFAULT-LABEL: @test_tanhf(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tanh_f32(float noundef [[X:%.*]]) #[[ATTR15]]
+// DEFAULT-LABEL: define dso_local noundef float @test_tanhf(
+// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tanh_f32(float noundef [[X]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_tanhf(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_tanh_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_tanhf(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_tanh_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
-// APPROX-LABEL: @test_tanhf(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tanh_f32(float noundef [[X:%.*]]) #[[ATTR15]]
+// APPROX-LABEL: define dso_local noundef float @test_tanhf(
+// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tanh_f32(float noundef [[X]]) #[[ATTR15]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_tanhf(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tanh_f32(float noundef [[X:%.*]]) #[[ATTR15]]
+// NCRDIV-LABEL: define dso_local noundef float @test_tanhf(
+// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tanh_f32(float noundef [[X]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_tanhf(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_tanh_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_tanhf(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR5]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_tanh_f32(float noundef [[X]]) #[[ATTR13]]
 // AMDGCNSPIRV-NEXT:    ret float [[CALL_I]]
 //
 extern "C" __device__ float test_tanhf(float x) {
   return tanhf(x);
 }
 
-// DEFAULT-LABEL: @test_tanh(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tanh_f64(double noundef [[X:%.*]]) #[[ATTR15]]
+// DEFAULT-LABEL: define dso_local noundef double @test_tanh(
+// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tanh_f64(double noundef [[X]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_tanh(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_tanh_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_tanh(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_tanh_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
-// APPROX-LABEL: @test_tanh(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tanh_f64(double noundef [[X:%.*]]) #[[ATTR15]]
+// APPROX-LABEL: define dso_local noundef double @test_tanh(
+// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tanh_f64(double noundef [[X]]) #[[ATTR15]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_tanh(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tanh_f64(double noundef [[X:%.*]]) #[[ATTR15]]
+// NCRDIV-LABEL: define dso_local noundef double @test_tanh(
+// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tanh_f64(double noundef [[X]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_tanh(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_tanh_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_tanh(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR5]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_tanh_f64(double noundef [[X]]) #[[ATTR13]]
 // AMDGCNSPIRV-NEXT:    ret double [[CALL_I]]
 //
 extern "C" __device__ double test_tanh(double x) {
   return tanh(x);
 }
 
-// DEFAULT-LABEL: @test_tgammaf(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tgamma_f32(float noundef [[X:%.*]]) #[[ATTR16]]
+// DEFAULT-LABEL: define dso_local noundef float @test_tgammaf(
+// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tgamma_f32(float noundef [[X]]) #[[ATTR16]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_tgammaf(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_tgamma_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_tgammaf(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_tgamma_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
-// APPROX-LABEL: @test_tgammaf(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tgamma_f32(float noundef [[X:%.*]]) #[[ATTR16]]
+// APPROX-LABEL: define dso_local noundef float @test_tgammaf(
+// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tgamma_f32(float noundef [[X]]) #[[ATTR16]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_tgammaf(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tgamma_f32(float noundef [[X:%.*]]) #[[ATTR16]]
+// NCRDIV-LABEL: define dso_local noundef float @test_tgammaf(
+// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tgamma_f32(float noundef [[X]]) #[[ATTR16]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_tgammaf(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_tgamma_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_tgammaf(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR6]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_tgamma_f32(float noundef [[X]]) #[[ATTR14]]
 // AMDGCNSPIRV-NEXT:    ret float [[CALL_I]]
 //
 extern "C" __device__ float test_tgammaf(float x) {
   return tgammaf(x);
 }
 
-// DEFAULT-LABEL: @test_tgamma(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tgamma_f64(double noundef [[X:%.*]]) #[[ATTR16]]
+// DEFAULT-LABEL: define dso_local noundef double @test_tgamma(
+// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tgamma_f64(double noundef [[X]]) #[[ATTR16]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_tgamma(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_tgamma_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_tgamma(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_tgamma_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
-// APPROX-LABEL: @test_tgamma(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tgamma_f64(double noundef [[X:%.*]]) #[[ATTR16]]
+// APPROX-LABEL: define dso_local noundef double @test_tgamma(
+// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tgamma_f64(double noundef [[X]]) #[[ATTR16]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_tgamma(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tgamma_f64(double noundef [[X:%.*]]) #[[ATTR16]]
+// NCRDIV-LABEL: define dso_local noundef double @test_tgamma(
+// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tgamma_f64(double noundef [[X]]) #[[ATTR16]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_tgamma(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_tgamma_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_tgamma(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR6]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_tgamma_f64(double noundef [[X]]) #[[ATTR14]]
 // AMDGCNSPIRV-NEXT:    ret double [[CALL_I]]
 //
 extern "C" __device__ double test_tgamma(double x) {
   return tgamma(x);
 }
 
-// DEFAULT-LABEL: @test_truncf(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.trunc.f32(float [[X:%.*]])
+// DEFAULT-LABEL: define dso_local noundef float @test_truncf(
+// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.trunc.f32(float [[X]])
 // DEFAULT-NEXT:    ret float [[TMP0]]
 //
-// FINITEONLY-LABEL: @test_truncf(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.trunc.f32(float nofpclass(nan inf) [[X:%.*]])
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_truncf(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.trunc.f32(float nofpclass(nan inf) [[X]])
 // FINITEONLY-NEXT:    ret float [[TMP0]]
 //
-// APPROX-LABEL: @test_truncf(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.trunc.f32(float [[X:%.*]])
+// APPROX-LABEL: define dso_local noundef float @test_truncf(
+// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.trunc.f32(float [[X]])
 // APPROX-NEXT:    ret float [[TMP0]]
 //
-// NCRDIV-LABEL: @test_truncf(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.trunc.f32(float [[X:%.*]])
+// NCRDIV-LABEL: define dso_local noundef float @test_truncf(
+// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.trunc.f32(float [[X]])
 // NCRDIV-NEXT:    ret float [[TMP0]]
 //
-// AMDGCNSPIRV-LABEL: @test_truncf(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.trunc.f32(float [[X:%.*]])
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_truncf(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.trunc.f32(float [[X]])
 // AMDGCNSPIRV-NEXT:    ret float [[TMP0]]
 //
 extern "C" __device__ float test_truncf(float x) {
   return truncf(x);
 }
 
-// DEFAULT-LABEL: @test_trunc(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.trunc.f64(double [[X:%.*]])
+// DEFAULT-LABEL: define dso_local noundef double @test_trunc(
+// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.trunc.f64(double [[X]])
 // DEFAULT-NEXT:    ret double [[TMP0]]
 //
-// FINITEONLY-LABEL: @test_trunc(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef double @llvm.trunc.f64(double nofpclass(nan inf) [[X:%.*]])
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_trunc(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef double @llvm.trunc.f64(double nofpclass(nan inf) [[X]])
 // FINITEONLY-NEXT:    ret double [[TMP0]]
 //
-// APPROX-LABEL: @test_trunc(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.trunc.f64(double [[X:%.*]])
+// APPROX-LABEL: define dso_local noundef double @test_trunc(
+// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.trunc.f64(double [[X]])
 // APPROX-NEXT:    ret double [[TMP0]]
 //
-// NCRDIV-LABEL: @test_trunc(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.trunc.f64(double [[X:%.*]])
+// NCRDIV-LABEL: define dso_local noundef double @test_trunc(
+// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.trunc.f64(double [[X]])
 // NCRDIV-NEXT:    ret double [[TMP0]]
 //
-// AMDGCNSPIRV-LABEL: @test_trunc(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.trunc.f64(double [[X:%.*]])
+// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_trunc(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.trunc.f64(double [[X]])
 // AMDGCNSPIRV-NEXT:    ret double [[TMP0]]
 //
 extern "C" __device__ double test_trunc(double x) {
   return trunc(x);
 }
 
-// DEFAULT-LABEL: @test_y0f(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[X:%.*]]) #[[ATTR16]]
+// DEFAULT-LABEL: define dso_local noundef float @test_y0f(
+// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[X]]) #[[ATTR16]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_y0f(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_y0_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_y0f(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_y0_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
-// APPROX-LABEL: @test_y0f(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[X:%.*]]) #[[ATTR16]]
+// APPROX-LABEL: define dso_local noundef float @test_y0f(
+// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[X]]) #[[ATTR16]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_y0f(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[X:%.*]]) #[[ATTR16]]
+// NCRDIV-LABEL: define dso_local noundef float @test_y0f(
+// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[X]]) #[[ATTR16]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_y0f(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_y0_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_y0f(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR6]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_y0_f32(float noundef [[X]]) #[[ATTR14]]
 // AMDGCNSPIRV-NEXT:    ret float [[CALL_I]]
 //
 extern "C" __device__ float test_y0f(float x) {
   return y0f(x);
 }
 
-// DEFAULT-LABEL: @test_y0(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[X:%.*]]) #[[ATTR16]]
+// DEFAULT-LABEL: define dso_local noundef double @test_y0(
+// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[X]]) #[[ATTR16]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_y0(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_y0_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_y0(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_y0_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
-// APPROX-LABEL: @test_y0(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[X:%.*]]) #[[ATTR16]]
+// APPROX-LABEL: define dso_local noundef double @test_y0(
+// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[X]]) #[[ATTR16]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_y0(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[X:%.*]]) #[[ATTR16]]
+// NCRDIV-LABEL: define dso_local noundef double @test_y0(
+// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[X]]) #[[ATTR16]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_y0(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_y0_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_y0(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR6]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_y0_f64(double noundef [[X]]) #[[ATTR14]]
 // AMDGCNSPIRV-NEXT:    ret double [[CALL_I]]
 //
 extern "C" __device__ double test_y0(double x) {
   return y0(x);
 }
 
-// DEFAULT-LABEL: @test_y1f(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_y1_f32(float noundef [[X:%.*]]) #[[ATTR16]]
+// DEFAULT-LABEL: define dso_local noundef float @test_y1f(
+// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_y1_f32(float noundef [[X]]) #[[ATTR16]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_y1f(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_y1_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_y1f(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_y1_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
-// APPROX-LABEL: @test_y1f(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_y1_f32(float noundef [[X:%.*]]) #[[ATTR16]]
+// APPROX-LABEL: define dso_local noundef float @test_y1f(
+// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_y1_f32(float noundef [[X]]) #[[ATTR16]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_y1f(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_y1_f32(float noundef [[X:%.*]]) #[[ATTR16]]
+// NCRDIV-LABEL: define dso_local noundef float @test_y1f(
+// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_y1_f32(float noundef [[X]]) #[[ATTR16]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_y1f(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_y1_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_y1f(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR6]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_y1_f32(float noundef [[X]]) #[[ATTR14]]
 // AMDGCNSPIRV-NEXT:    ret float [[CALL_I]]
 //
 extern "C" __device__ float test_y1f(float x) {
   return y1f(x);
 }
 
-// DEFAULT-LABEL: @test_y1(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_y1_f64(double noundef [[X:%.*]]) #[[ATTR16]]
+// DEFAULT-LABEL: define dso_local noundef double @test_y1(
+// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_y1_f64(double noundef [[X]]) #[[ATTR16]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test_y1(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_y1_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_y1(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_y1_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
-// APPROX-LABEL: @test_y1(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_y1_f64(double noundef [[X:%.*]]) #[[ATTR16]]
+// APPROX-LABEL: define dso_local noundef double @test_y1(
+// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_y1_f64(double noundef [[X]]) #[[ATTR16]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
-// NCRDIV-LABEL: @test_y1(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_y1_f64(double noundef [[X:%.*]]) #[[ATTR16]]
+// NCRDIV-LABEL: define dso_local noundef double @test_y1(
+// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_y1_f64(double noundef [[X]]) #[[ATTR16]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_y1(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_y1_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_y1(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR6]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_y1_f64(double noundef [[X]]) #[[ATTR14]]
 // AMDGCNSPIRV-NEXT:    ret double [[CALL_I]]
 //
 extern "C" __device__ double test_y1(double x) {
   return y1(x);
 }
 
-// DEFAULT-LABEL: @test_ynf(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    switch i32 [[X:%.*]], label [[IF_END4_I:%.*]] [
-// DEFAULT-NEXT:      i32 0, label [[IF_THEN_I:%.*]]
-// DEFAULT-NEXT:      i32 1, label [[IF_THEN2_I:%.*]]
+// DEFAULT-LABEL: define dso_local float @test_ynf(
+// DEFAULT-SAME: i32 noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    switch i32 [[X]], label %[[IF_END4_I:.*]] [
+// DEFAULT-NEXT:      i32 0, label %[[IF_THEN_I:.*]]
+// DEFAULT-NEXT:      i32 1, label %[[IF_THEN2_I:.*]]
 // DEFAULT-NEXT:    ]
-// DEFAULT:       if.then.i:
-// DEFAULT-NEXT:    [[CALL_I20_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[Y:%.*]]) #[[ATTR16]]
-// DEFAULT-NEXT:    br label [[_ZL3YNFIF_EXIT:%.*]]
-// DEFAULT:       if.then2.i:
+// DEFAULT:       [[IF_THEN_I]]:
+// DEFAULT-NEXT:    [[CALL_I20_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[Y]]) #[[ATTR16]]
+// DEFAULT-NEXT:    br label %[[_ZL3YNFIF_EXIT:.*]]
+// DEFAULT:       [[IF_THEN2_I]]:
 // DEFAULT-NEXT:    [[CALL_I22_I:%.*]] = tail call contract noundef float @__ocml_y1_f32(float noundef [[Y]]) #[[ATTR16]]
-// DEFAULT-NEXT:    br label [[_ZL3YNFIF_EXIT]]
-// DEFAULT:       if.end4.i:
+// DEFAULT-NEXT:    br label %[[_ZL3YNFIF_EXIT]]
+// DEFAULT:       [[IF_END4_I]]:
 // DEFAULT-NEXT:    [[CALL_I_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[Y]]) #[[ATTR16]]
 // DEFAULT-NEXT:    [[CALL_I21_I:%.*]] = tail call contract noundef float @__ocml_y1_f32(float noundef [[Y]]) #[[ATTR16]]
 // DEFAULT-NEXT:    [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1
-// DEFAULT-NEXT:    br i1 [[CMP7_I1]], label [[FOR_BODY_I:%.*]], label [[_ZL3YNFIF_EXIT]]
-// DEFAULT:       for.body.i:
-// DEFAULT-NEXT:    [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], [[FOR_BODY_I]] ], [ 1, [[IF_END4_I]] ]
-// DEFAULT-NEXT:    [[__X1_0_I3:%.*]] = phi float [ [[SUB_I:%.*]], [[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ]
-// DEFAULT-NEXT:    [[__X0_0_I2:%.*]] = phi float [ [[__X1_0_I3]], [[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ]
+// DEFAULT-NEXT:    br i1 [[CMP7_I1]], label %[[FOR_BODY_I:.*]], label %[[_ZL3YNFIF_EXIT]]
+// DEFAULT:       [[FOR_BODY_I]]:
+// DEFAULT-NEXT:    [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], %[[FOR_BODY_I]] ], [ 1, %[[IF_END4_I]] ]
+// DEFAULT-NEXT:    [[__X1_0_I3:%.*]] = phi float [ [[SUB_I:%.*]], %[[FOR_BODY_I]] ], [ [[CALL_I21_I]], %[[IF_END4_I]] ]
+// DEFAULT-NEXT:    [[__X0_0_I2:%.*]] = phi float [ [[__X1_0_I3]], %[[FOR_BODY_I]] ], [ [[CALL_I_I]], %[[IF_END4_I]] ]
 // DEFAULT-NEXT:    [[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1
 // DEFAULT-NEXT:    [[CONV_I:%.*]] = uitofp nneg i32 [[MUL_I]] to float
 // DEFAULT-NEXT:    [[DIV_I:%.*]] = fdiv contract float [[CONV_I]], [[Y]]
@@ -6985,32 +7894,33 @@ extern "C" __device__ double test_y1(double x) {
 // DEFAULT-NEXT:    [[SUB_I]] = fsub contract float [[MUL8_I]], [[__X0_0_I2]]
 // DEFAULT-NEXT:    [[INC_I]] = add nuw nsw i32 [[__I_0_I4]], 1
 // DEFAULT-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_I]], [[X]]
-// DEFAULT-NEXT:    br i1 [[EXITCOND_NOT]], label [[_ZL3YNFIF_EXIT]], label [[FOR_BODY_I]], !llvm.loop [[LOOP24:![0-9]+]]
-// DEFAULT:       _ZL3ynfif.exit:
-// DEFAULT-NEXT:    [[RETVAL_0_I:%.*]] = phi float [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ]
+// DEFAULT-NEXT:    br i1 [[EXITCOND_NOT]], label %[[_ZL3YNFIF_EXIT]], label %[[FOR_BODY_I]], !llvm.loop [[LOOP24:![0-9]+]]
+// DEFAULT:       [[_ZL3YNFIF_EXIT]]:
+// DEFAULT-NEXT:    [[RETVAL_0_I:%.*]] = phi float [ [[CALL_I20_I]], %[[IF_THEN_I]] ], [ [[CALL_I22_I]], %[[IF_THEN2_I]] ], [ [[CALL_I21_I]], %[[IF_END4_I]] ], [ [[SUB_I]], %[[FOR_BODY_I]] ]
 // DEFAULT-NEXT:    ret float [[RETVAL_0_I]]
 //
-// FINITEONLY-LABEL: @test_ynf(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    switch i32 [[X:%.*]], label [[IF_END4_I:%.*]] [
-// FINITEONLY-NEXT:      i32 0, label [[IF_THEN_I:%.*]]
-// FINITEONLY-NEXT:      i32 1, label [[IF_THEN2_I:%.*]]
+// FINITEONLY-LABEL: define dso_local nofpclass(nan inf) float @test_ynf(
+// FINITEONLY-SAME: i32 noundef [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    switch i32 [[X]], label %[[IF_END4_I:.*]] [
+// FINITEONLY-NEXT:      i32 0, label %[[IF_THEN_I:.*]]
+// FINITEONLY-NEXT:      i32 1, label %[[IF_THEN2_I:.*]]
 // FINITEONLY-NEXT:    ]
-// FINITEONLY:       if.then.i:
-// FINITEONLY-NEXT:    [[CALL_I20_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_y0_f32(float noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR16]]
-// FINITEONLY-NEXT:    br label [[_ZL3YNFIF_EXIT:%.*]]
-// FINITEONLY:       if.then2.i:
+// FINITEONLY:       [[IF_THEN_I]]:
+// FINITEONLY-NEXT:    [[CALL_I20_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_y0_f32(float noundef nofpclass(nan inf) [[Y]]) #[[ATTR16]]
+// FINITEONLY-NEXT:    br label %[[_ZL3YNFIF_EXIT:.*]]
+// FINITEONLY:       [[IF_THEN2_I]]:
 // FINITEONLY-NEXT:    [[CALL_I22_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_y1_f32(float noundef nofpclass(nan inf) [[Y]]) #[[ATTR16]]
-// FINITEONLY-NEXT:    br label [[_ZL3YNFIF_EXIT]]
-// FINITEONLY:       if.end4.i:
+// FINITEONLY-NEXT:    br label %[[_ZL3YNFIF_EXIT]]
+// FINITEONLY:       [[IF_END4_I]]:
 // FINITEONLY-NEXT:    [[CALL_I_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_y0_f32(float noundef nofpclass(nan inf) [[Y]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    [[CALL_I21_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_y1_f32(float noundef nofpclass(nan inf) [[Y]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1
-// FINITEONLY-NEXT:    br i1 [[CMP7_I1]], label [[FOR_BODY_I:%.*]], label [[_ZL3YNFIF_EXIT]]
-// FINITEONLY:       for.body.i:
-// FINITEONLY-NEXT:    [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], [[FOR_BODY_I]] ], [ 1, [[IF_END4_I]] ]
-// FINITEONLY-NEXT:    [[__X1_0_I3:%.*]] = phi float [ [[SUB_I:%.*]], [[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ]
-// FINITEONLY-NEXT:    [[__X0_0_I2:%.*]] = phi float [ [[__X1_0_I3]], [[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ]
+// FINITEONLY-NEXT:    br i1 [[CMP7_I1]], label %[[FOR_BODY_I:.*]], label %[[_ZL3YNFIF_EXIT]]
+// FINITEONLY:       [[FOR_BODY_I]]:
+// FINITEONLY-NEXT:    [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], %[[FOR_BODY_I]] ], [ 1, %[[IF_END4_I]] ]
+// FINITEONLY-NEXT:    [[__X1_0_I3:%.*]] = phi float [ [[SUB_I:%.*]], %[[FOR_BODY_I]] ], [ [[CALL_I21_I]], %[[IF_END4_I]] ]
+// FINITEONLY-NEXT:    [[__X0_0_I2:%.*]] = phi float [ [[__X1_0_I3]], %[[FOR_BODY_I]] ], [ [[CALL_I_I]], %[[IF_END4_I]] ]
 // FINITEONLY-NEXT:    [[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1
 // FINITEONLY-NEXT:    [[CONV_I:%.*]] = uitofp nneg i32 [[MUL_I]] to float
 // FINITEONLY-NEXT:    [[DIV_I:%.*]] = fdiv nnan ninf contract float [[CONV_I]], [[Y]]
@@ -7018,32 +7928,33 @@ extern "C" __device__ double test_y1(double x) {
 // FINITEONLY-NEXT:    [[SUB_I]] = fsub nnan ninf contract float [[MUL8_I]], [[__X0_0_I2]]
 // FINITEONLY-NEXT:    [[INC_I]] = add nuw nsw i32 [[__I_0_I4]], 1
 // FINITEONLY-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_I]], [[X]]
-// FINITEONLY-NEXT:    br i1 [[EXITCOND_NOT]], label [[_ZL3YNFIF_EXIT]], label [[FOR_BODY_I]], !llvm.loop [[LOOP24:![0-9]+]]
-// FINITEONLY:       _ZL3ynfif.exit:
-// FINITEONLY-NEXT:    [[RETVAL_0_I:%.*]] = phi float [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ]
+// FINITEONLY-NEXT:    br i1 [[EXITCOND_NOT]], label %[[_ZL3YNFIF_EXIT]], label %[[FOR_BODY_I]], !llvm.loop [[LOOP24:![0-9]+]]
+// FINITEONLY:       [[_ZL3YNFIF_EXIT]]:
+// FINITEONLY-NEXT:    [[RETVAL_0_I:%.*]] = phi float [ [[CALL_I20_I]], %[[IF_THEN_I]] ], [ [[CALL_I22_I]], %[[IF_THEN2_I]] ], [ [[CALL_I21_I]], %[[IF_END4_I]] ], [ [[SUB_I]], %[[FOR_BODY_I]] ]
 // FINITEONLY-NEXT:    ret float [[RETVAL_0_I]]
 //
-// APPROX-LABEL: @test_ynf(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    switch i32 [[X:%.*]], label [[IF_END4_I:%.*]] [
-// APPROX-NEXT:      i32 0, label [[IF_THEN_I:%.*]]
-// APPROX-NEXT:      i32 1, label [[IF_THEN2_I:%.*]]
+// APPROX-LABEL: define dso_local float @test_ynf(
+// APPROX-SAME: i32 noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    switch i32 [[X]], label %[[IF_END4_I:.*]] [
+// APPROX-NEXT:      i32 0, label %[[IF_THEN_I:.*]]
+// APPROX-NEXT:      i32 1, label %[[IF_THEN2_I:.*]]
 // APPROX-NEXT:    ]
-// APPROX:       if.then.i:
-// APPROX-NEXT:    [[CALL_I20_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[Y:%.*]]) #[[ATTR16]]
-// APPROX-NEXT:    br label [[_ZL3YNFIF_EXIT:%.*]]
-// APPROX:       if.then2.i:
+// APPROX:       [[IF_THEN_I]]:
+// APPROX-NEXT:    [[CALL_I20_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[Y]]) #[[ATTR16]]
+// APPROX-NEXT:    br label %[[_ZL3YNFIF_EXIT:.*]]
+// APPROX:       [[IF_THEN2_I]]:
 // APPROX-NEXT:    [[CALL_I22_I:%.*]] = tail call contract noundef float @__ocml_y1_f32(float noundef [[Y]]) #[[ATTR16]]
-// APPROX-NEXT:    br label [[_ZL3YNFIF_EXIT]]
-// APPROX:       if.end4.i:
+// APPROX-NEXT:    br label %[[_ZL3YNFIF_EXIT]]
+// APPROX:       [[IF_END4_I]]:
 // APPROX-NEXT:    [[CALL_I_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[Y]]) #[[ATTR16]]
 // APPROX-NEXT:    [[CALL_I21_I:%.*]] = tail call contract noundef float @__ocml_y1_f32(float noundef [[Y]]) #[[ATTR16]]
 // APPROX-NEXT:    [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1
-// APPROX-NEXT:    br i1 [[CMP7_I1]], label [[FOR_BODY_I:%.*]], label [[_ZL3YNFIF_EXIT]]
-// APPROX:       for.body.i:
-// APPROX-NEXT:    [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], [[FOR_BODY_I]] ], [ 1, [[IF_END4_I]] ]
-// APPROX-NEXT:    [[__X1_0_I3:%.*]] = phi float [ [[SUB_I:%.*]], [[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ]
-// APPROX-NEXT:    [[__X0_0_I2:%.*]] = phi float [ [[__X1_0_I3]], [[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ]
+// APPROX-NEXT:    br i1 [[CMP7_I1]], label %[[FOR_BODY_I:.*]], label %[[_ZL3YNFIF_EXIT]]
+// APPROX:       [[FOR_BODY_I]]:
+// APPROX-NEXT:    [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], %[[FOR_BODY_I]] ], [ 1, %[[IF_END4_I]] ]
+// APPROX-NEXT:    [[__X1_0_I3:%.*]] = phi float [ [[SUB_I:%.*]], %[[FOR_BODY_I]] ], [ [[CALL_I21_I]], %[[IF_END4_I]] ]
+// APPROX-NEXT:    [[__X0_0_I2:%.*]] = phi float [ [[__X1_0_I3]], %[[FOR_BODY_I]] ], [ [[CALL_I_I]], %[[IF_END4_I]] ]
 // APPROX-NEXT:    [[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1
 // APPROX-NEXT:    [[CONV_I:%.*]] = uitofp nneg i32 [[MUL_I]] to float
 // APPROX-NEXT:    [[DIV_I:%.*]] = fdiv contract float [[CONV_I]], [[Y]]
@@ -7051,32 +7962,33 @@ extern "C" __device__ double test_y1(double x) {
 // APPROX-NEXT:    [[SUB_I]] = fsub contract float [[MUL8_I]], [[__X0_0_I2]]
 // APPROX-NEXT:    [[INC_I]] = add nuw nsw i32 [[__I_0_I4]], 1
 // APPROX-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_I]], [[X]]
-// APPROX-NEXT:    br i1 [[EXITCOND_NOT]], label [[_ZL3YNFIF_EXIT]], label [[FOR_BODY_I]], !llvm.loop [[LOOP24:![0-9]+]]
-// APPROX:       _ZL3ynfif.exit:
-// APPROX-NEXT:    [[RETVAL_0_I:%.*]] = phi float [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ]
+// APPROX-NEXT:    br i1 [[EXITCOND_NOT]], label %[[_ZL3YNFIF_EXIT]], label %[[FOR_BODY_I]], !llvm.loop [[LOOP24:![0-9]+]]
+// APPROX:       [[_ZL3YNFIF_EXIT]]:
+// APPROX-NEXT:    [[RETVAL_0_I:%.*]] = phi float [ [[CALL_I20_I]], %[[IF_THEN_I]] ], [ [[CALL_I22_I]], %[[IF_THEN2_I]] ], [ [[CALL_I21_I]], %[[IF_END4_I]] ], [ [[SUB_I]], %[[FOR_BODY_I]] ]
 // APPROX-NEXT:    ret float [[RETVAL_0_I]]
 //
-// NCRDIV-LABEL: @test_ynf(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    switch i32 [[X:%.*]], label [[IF_END4_I:%.*]] [
-// NCRDIV-NEXT:      i32 0, label [[IF_THEN_I:%.*]]
-// NCRDIV-NEXT:      i32 1, label [[IF_THEN2_I:%.*]]
+// NCRDIV-LABEL: define dso_local float @test_ynf(
+// NCRDIV-SAME: i32 noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    switch i32 [[X]], label %[[IF_END4_I:.*]] [
+// NCRDIV-NEXT:      i32 0, label %[[IF_THEN_I:.*]]
+// NCRDIV-NEXT:      i32 1, label %[[IF_THEN2_I:.*]]
 // NCRDIV-NEXT:    ]
-// NCRDIV:       if.then.i:
-// NCRDIV-NEXT:    [[CALL_I20_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[Y:%.*]]) #[[ATTR16]]
-// NCRDIV-NEXT:    br label [[_ZL3YNFIF_EXIT:%.*]]
-// NCRDIV:       if.then2.i:
+// NCRDIV:       [[IF_THEN_I]]:
+// NCRDIV-NEXT:    [[CALL_I20_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[Y]]) #[[ATTR16]]
+// NCRDIV-NEXT:    br label %[[_ZL3YNFIF_EXIT:.*]]
+// NCRDIV:       [[IF_THEN2_I]]:
 // NCRDIV-NEXT:    [[CALL_I22_I:%.*]] = tail call contract noundef float @__ocml_y1_f32(float noundef [[Y]]) #[[ATTR16]]
-// NCRDIV-NEXT:    br label [[_ZL3YNFIF_EXIT]]
-// NCRDIV:       if.end4.i:
+// NCRDIV-NEXT:    br label %[[_ZL3YNFIF_EXIT]]
+// NCRDIV:       [[IF_END4_I]]:
 // NCRDIV-NEXT:    [[CALL_I_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[Y]]) #[[ATTR16]]
 // NCRDIV-NEXT:    [[CALL_I21_I:%.*]] = tail call contract noundef float @__ocml_y1_f32(float noundef [[Y]]) #[[ATTR16]]
 // NCRDIV-NEXT:    [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1
-// NCRDIV-NEXT:    br i1 [[CMP7_I1]], label [[FOR_BODY_I:%.*]], label [[_ZL3YNFIF_EXIT]]
-// NCRDIV:       for.body.i:
-// NCRDIV-NEXT:    [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], [[FOR_BODY_I]] ], [ 1, [[IF_END4_I]] ]
-// NCRDIV-NEXT:    [[__X1_0_I3:%.*]] = phi float [ [[SUB_I:%.*]], [[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ]
-// NCRDIV-NEXT:    [[__X0_0_I2:%.*]] = phi float [ [[__X1_0_I3]], [[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ]
+// NCRDIV-NEXT:    br i1 [[CMP7_I1]], label %[[FOR_BODY_I:.*]], label %[[_ZL3YNFIF_EXIT]]
+// NCRDIV:       [[FOR_BODY_I]]:
+// NCRDIV-NEXT:    [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], %[[FOR_BODY_I]] ], [ 1, %[[IF_END4_I]] ]
+// NCRDIV-NEXT:    [[__X1_0_I3:%.*]] = phi float [ [[SUB_I:%.*]], %[[FOR_BODY_I]] ], [ [[CALL_I21_I]], %[[IF_END4_I]] ]
+// NCRDIV-NEXT:    [[__X0_0_I2:%.*]] = phi float [ [[__X1_0_I3]], %[[FOR_BODY_I]] ], [ [[CALL_I_I]], %[[IF_END4_I]] ]
 // NCRDIV-NEXT:    [[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1
 // NCRDIV-NEXT:    [[CONV_I:%.*]] = uitofp nneg i32 [[MUL_I]] to float
 // NCRDIV-NEXT:    [[DIV_I:%.*]] = fdiv contract float [[CONV_I]], [[Y]], !fpmath [[META12]]
@@ -7084,32 +7996,33 @@ extern "C" __device__ double test_y1(double x) {
 // NCRDIV-NEXT:    [[SUB_I]] = fsub contract float [[MUL8_I]], [[__X0_0_I2]]
 // NCRDIV-NEXT:    [[INC_I]] = add nuw nsw i32 [[__I_0_I4]], 1
 // NCRDIV-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_I]], [[X]]
-// NCRDIV-NEXT:    br i1 [[EXITCOND_NOT]], label [[_ZL3YNFIF_EXIT]], label [[FOR_BODY_I]], !llvm.loop [[LOOP26:![0-9]+]]
-// NCRDIV:       _ZL3ynfif.exit:
-// NCRDIV-NEXT:    [[RETVAL_0_I:%.*]] = phi float [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ]
+// NCRDIV-NEXT:    br i1 [[EXITCOND_NOT]], label %[[_ZL3YNFIF_EXIT]], label %[[FOR_BODY_I]], !llvm.loop [[LOOP26:![0-9]+]]
+// NCRDIV:       [[_ZL3YNFIF_EXIT]]:
+// NCRDIV-NEXT:    [[RETVAL_0_I:%.*]] = phi float [ [[CALL_I20_I]], %[[IF_THEN_I]] ], [ [[CALL_I22_I]], %[[IF_THEN2_I]] ], [ [[CALL_I21_I]], %[[IF_END4_I]] ], [ [[SUB_I]], %[[FOR_BODY_I]] ]
 // NCRDIV-NEXT:    ret float [[RETVAL_0_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_ynf(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    switch i32 [[X:%.*]], label [[IF_END4_I:%.*]] [
-// AMDGCNSPIRV-NEXT:      i32 0, label [[IF_THEN_I:%.*]]
-// AMDGCNSPIRV-NEXT:      i32 1, label [[IF_THEN2_I:%.*]]
+// AMDGCNSPIRV-LABEL: define spir_func float @test_ynf(
+// AMDGCNSPIRV-SAME: i32 noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR6]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    switch i32 [[X]], label %[[IF_END4_I:.*]] [
+// AMDGCNSPIRV-NEXT:      i32 0, label %[[IF_THEN_I:.*]]
+// AMDGCNSPIRV-NEXT:      i32 1, label %[[IF_THEN2_I:.*]]
 // AMDGCNSPIRV-NEXT:    ]
-// AMDGCNSPIRV:       if.then.i:
-// AMDGCNSPIRV-NEXT:    [[CALL_I20_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_y0_f32(float noundef [[Y:%.*]]) #[[ATTR14]]
-// AMDGCNSPIRV-NEXT:    br label [[_ZL3YNFIF_EXIT:%.*]]
-// AMDGCNSPIRV:       if.then2.i:
+// AMDGCNSPIRV:       [[IF_THEN_I]]:
+// AMDGCNSPIRV-NEXT:    [[CALL_I20_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_y0_f32(float noundef [[Y]]) #[[ATTR14]]
+// AMDGCNSPIRV-NEXT:    br label %[[_ZL3YNFIF_EXIT:.*]]
+// AMDGCNSPIRV:       [[IF_THEN2_I]]:
 // AMDGCNSPIRV-NEXT:    [[CALL_I22_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_y1_f32(float noundef [[Y]]) #[[ATTR14]]
-// AMDGCNSPIRV-NEXT:    br label [[_ZL3YNFIF_EXIT]]
-// AMDGCNSPIRV:       if.end4.i:
+// AMDGCNSPIRV-NEXT:    br label %[[_ZL3YNFIF_EXIT]]
+// AMDGCNSPIRV:       [[IF_END4_I]]:
 // AMDGCNSPIRV-NEXT:    [[CALL_I_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_y0_f32(float noundef [[Y]]) #[[ATTR14]]
 // AMDGCNSPIRV-NEXT:    [[CALL_I21_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_y1_f32(float noundef [[Y]]) #[[ATTR14]]
 // AMDGCNSPIRV-NEXT:    [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1
-// AMDGCNSPIRV-NEXT:    br i1 [[CMP7_I1]], label [[FOR_BODY_I:%.*]], label [[_ZL3YNFIF_EXIT]]
-// AMDGCNSPIRV:       for.body.i:
-// AMDGCNSPIRV-NEXT:    [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], [[FOR_BODY_I]] ], [ 1, [[IF_END4_I]] ]
-// AMDGCNSPIRV-NEXT:    [[__X1_0_I3:%.*]] = phi float [ [[SUB_I:%.*]], [[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ]
-// AMDGCNSPIRV-NEXT:    [[__X0_0_I2:%.*]] = phi float [ [[__X1_0_I3]], [[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ]
+// AMDGCNSPIRV-NEXT:    br i1 [[CMP7_I1]], label %[[FOR_BODY_I:.*]], label %[[_ZL3YNFIF_EXIT]]
+// AMDGCNSPIRV:       [[FOR_BODY_I]]:
+// AMDGCNSPIRV-NEXT:    [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], %[[FOR_BODY_I]] ], [ 1, %[[IF_END4_I]] ]
+// AMDGCNSPIRV-NEXT:    [[__X1_0_I3:%.*]] = phi float [ [[SUB_I:%.*]], %[[FOR_BODY_I]] ], [ [[CALL_I21_I]], %[[IF_END4_I]] ]
+// AMDGCNSPIRV-NEXT:    [[__X0_0_I2:%.*]] = phi float [ [[__X1_0_I3]], %[[FOR_BODY_I]] ], [ [[CALL_I_I]], %[[IF_END4_I]] ]
 // AMDGCNSPIRV-NEXT:    [[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1
 // AMDGCNSPIRV-NEXT:    [[CONV_I:%.*]] = uitofp nneg i32 [[MUL_I]] to float
 // AMDGCNSPIRV-NEXT:    [[DIV_I:%.*]] = fdiv contract float [[CONV_I]], [[Y]]
@@ -7117,36 +8030,37 @@ extern "C" __device__ double test_y1(double x) {
 // AMDGCNSPIRV-NEXT:    [[SUB_I]] = fsub contract float [[MUL8_I]], [[__X0_0_I2]]
 // AMDGCNSPIRV-NEXT:    [[INC_I]] = add nuw nsw i32 [[__I_0_I4]], 1
 // AMDGCNSPIRV-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_I]], [[X]]
-// AMDGCNSPIRV-NEXT:    br i1 [[EXITCOND_NOT]], label [[_ZL3YNFIF_EXIT]], label [[FOR_BODY_I]], !llvm.loop [[LOOP25:![0-9]+]]
-// AMDGCNSPIRV:       _ZL3ynfif.exit:
-// AMDGCNSPIRV-NEXT:    [[RETVAL_0_I:%.*]] = phi float [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ]
+// AMDGCNSPIRV-NEXT:    br i1 [[EXITCOND_NOT]], label %[[_ZL3YNFIF_EXIT]], label %[[FOR_BODY_I]], !llvm.loop [[LOOP25:![0-9]+]]
+// AMDGCNSPIRV:       [[_ZL3YNFIF_EXIT]]:
+// AMDGCNSPIRV-NEXT:    [[RETVAL_0_I:%.*]] = phi float [ [[CALL_I20_I]], %[[IF_THEN_I]] ], [ [[CALL_I22_I]], %[[IF_THEN2_I]] ], [ [[CALL_I21_I]], %[[IF_END4_I]] ], [ [[SUB_I]], %[[FOR_BODY_I]] ]
 // AMDGCNSPIRV-NEXT:    ret float [[RETVAL_0_I]]
 //
 extern "C" __device__ float test_ynf(int x, float y) {
   return ynf(x, y);
 }
 
-// DEFAULT-LABEL: @test_yn(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    switch i32 [[X:%.*]], label [[IF_END4_I:%.*]] [
-// DEFAULT-NEXT:      i32 0, label [[IF_THEN_I:%.*]]
-// DEFAULT-NEXT:      i32 1, label [[IF_THEN2_I:%.*]]
+// DEFAULT-LABEL: define dso_local double @test_yn(
+// DEFAULT-SAME: i32 noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    switch i32 [[X]], label %[[IF_END4_I:.*]] [
+// DEFAULT-NEXT:      i32 0, label %[[IF_THEN_I:.*]]
+// DEFAULT-NEXT:      i32 1, label %[[IF_THEN2_I:.*]]
 // DEFAULT-NEXT:    ]
-// DEFAULT:       if.then.i:
-// DEFAULT-NEXT:    [[CALL_I20_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[Y:%.*]]) #[[ATTR16]]
-// DEFAULT-NEXT:    br label [[_ZL2YNID_EXIT:%.*]]
-// DEFAULT:       if.then2.i:
+// DEFAULT:       [[IF_THEN_I]]:
+// DEFAULT-NEXT:    [[CALL_I20_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[Y]]) #[[ATTR16]]
+// DEFAULT-NEXT:    br label %[[_ZL2YNID_EXIT:.*]]
+// DEFAULT:       [[IF_THEN2_I]]:
 // DEFAULT-NEXT:    [[CALL_I22_I:%.*]] = tail call contract noundef double @__ocml_y1_f64(double noundef [[Y]]) #[[ATTR16]]
-// DEFAULT-NEXT:    br label [[_ZL2YNID_EXIT]]
-// DEFAULT:       if.end4.i:
+// DEFAULT-NEXT:    br label %[[_ZL2YNID_EXIT]]
+// DEFAULT:       [[IF_END4_I]]:
 // DEFAULT-NEXT:    [[CALL_I_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[Y]]) #[[ATTR16]]
 // DEFAULT-NEXT:    [[CALL_I21_I:%.*]] = tail call contract noundef double @__ocml_y1_f64(double noundef [[Y]]) #[[ATTR16]]
 // DEFAULT-NEXT:    [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1
-// DEFAULT-NEXT:    br i1 [[CMP7_I1]], label [[FOR_BODY_I:%.*]], label [[_ZL2YNID_EXIT]]
-// DEFAULT:       for.body.i:
-// DEFAULT-NEXT:    [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], [[FOR_BODY_I]] ], [ 1, [[IF_END4_I]] ]
-// DEFAULT-NEXT:    [[__X1_0_I3:%.*]] = phi double [ [[SUB_I:%.*]], [[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ]
-// DEFAULT-NEXT:    [[__X0_0_I2:%.*]] = phi double [ [[__X1_0_I3]], [[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ]
+// DEFAULT-NEXT:    br i1 [[CMP7_I1]], label %[[FOR_BODY_I:.*]], label %[[_ZL2YNID_EXIT]]
+// DEFAULT:       [[FOR_BODY_I]]:
+// DEFAULT-NEXT:    [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], %[[FOR_BODY_I]] ], [ 1, %[[IF_END4_I]] ]
+// DEFAULT-NEXT:    [[__X1_0_I3:%.*]] = phi double [ [[SUB_I:%.*]], %[[FOR_BODY_I]] ], [ [[CALL_I21_I]], %[[IF_END4_I]] ]
+// DEFAULT-NEXT:    [[__X0_0_I2:%.*]] = phi double [ [[__X1_0_I3]], %[[FOR_BODY_I]] ], [ [[CALL_I_I]], %[[IF_END4_I]] ]
 // DEFAULT-NEXT:    [[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1
 // DEFAULT-NEXT:    [[CONV_I:%.*]] = uitofp nneg i32 [[MUL_I]] to double
 // DEFAULT-NEXT:    [[DIV_I:%.*]] = fdiv contract double [[CONV_I]], [[Y]]
@@ -7154,32 +8068,33 @@ extern "C" __device__ float test_ynf(int x, float y) {
 // DEFAULT-NEXT:    [[SUB_I]] = fsub contract double [[MUL8_I]], [[__X0_0_I2]]
 // DEFAULT-NEXT:    [[INC_I]] = add nuw nsw i32 [[__I_0_I4]], 1
 // DEFAULT-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_I]], [[X]]
-// DEFAULT-NEXT:    br i1 [[EXITCOND_NOT]], label [[_ZL2YNID_EXIT]], label [[FOR_BODY_I]], !llvm.loop [[LOOP25:![0-9]+]]
-// DEFAULT:       _ZL2ynid.exit:
-// DEFAULT-NEXT:    [[RETVAL_0_I:%.*]] = phi double [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ]
+// DEFAULT-NEXT:    br i1 [[EXITCOND_NOT]], label %[[_ZL2YNID_EXIT]], label %[[FOR_BODY_I]], !llvm.loop [[LOOP25:![0-9]+]]
+// DEFAULT:       [[_ZL2YNID_EXIT]]:
+// DEFAULT-NEXT:    [[RETVAL_0_I:%.*]] = phi double [ [[CALL_I20_I]], %[[IF_THEN_I]] ], [ [[CALL_I22_I]], %[[IF_THEN2_I]] ], [ [[CALL_I21_I]], %[[IF_END4_I]] ], [ [[SUB_I]], %[[FOR_BODY_I]] ]
 // DEFAULT-NEXT:    ret double [[RETVAL_0_I]]
 //
-// FINITEONLY-LABEL: @test_yn(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    switch i32 [[X:%.*]], label [[IF_END4_I:%.*]] [
-// FINITEONLY-NEXT:      i32 0, label [[IF_THEN_I:%.*]]
-// FINITEONLY-NEXT:      i32 1, label [[IF_THEN2_I:%.*]]
+// FINITEONLY-LABEL: define dso_local nofpclass(nan inf) double @test_yn(
+// FINITEONLY-SAME: i32 noundef [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    switch i32 [[X]], label %[[IF_END4_I:.*]] [
+// FINITEONLY-NEXT:      i32 0, label %[[IF_THEN_I:.*]]
+// FINITEONLY-NEXT:      i32 1, label %[[IF_THEN2_I:.*]]
 // FINITEONLY-NEXT:    ]
-// FINITEONLY:       if.then.i:
-// FINITEONLY-NEXT:    [[CALL_I20_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_y0_f64(double noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR16]]
-// FINITEONLY-NEXT:    br label [[_ZL2YNID_EXIT:%.*]]
-// FINITEONLY:       if.then2.i:
+// FINITEONLY:       [[IF_THEN_I]]:
+// FINITEONLY-NEXT:    [[CALL_I20_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_y0_f64(double noundef nofpclass(nan inf) [[Y]]) #[[ATTR16]]
+// FINITEONLY-NEXT:    br label %[[_ZL2YNID_EXIT:.*]]
+// FINITEONLY:       [[IF_THEN2_I]]:
 // FINITEONLY-NEXT:    [[CALL_I22_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_y1_f64(double noundef nofpclass(nan inf) [[Y]]) #[[ATTR16]]
-// FINITEONLY-NEXT:    br label [[_ZL2YNID_EXIT]]
-// FINITEONLY:       if.end4.i:
+// FINITEONLY-NEXT:    br label %[[_ZL2YNID_EXIT]]
+// FINITEONLY:       [[IF_END4_I]]:
 // FINITEONLY-NEXT:    [[CALL_I_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_y0_f64(double noundef nofpclass(nan inf) [[Y]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    [[CALL_I21_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_y1_f64(double noundef nofpclass(nan inf) [[Y]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1
-// FINITEONLY-NEXT:    br i1 [[CMP7_I1]], label [[FOR_BODY_I:%.*]], label [[_ZL2YNID_EXIT]]
-// FINITEONLY:       for.body.i:
-// FINITEONLY-NEXT:    [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], [[FOR_BODY_I]] ], [ 1, [[IF_END4_I]] ]
-// FINITEONLY-NEXT:    [[__X1_0_I3:%.*]] = phi double [ [[SUB_I:%.*]], [[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ]
-// FINITEONLY-NEXT:    [[__X0_0_I2:%.*]] = phi double [ [[__X1_0_I3]], [[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ]
+// FINITEONLY-NEXT:    br i1 [[CMP7_I1]], label %[[FOR_BODY_I:.*]], label %[[_ZL2YNID_EXIT]]
+// FINITEONLY:       [[FOR_BODY_I]]:
+// FINITEONLY-NEXT:    [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], %[[FOR_BODY_I]] ], [ 1, %[[IF_END4_I]] ]
+// FINITEONLY-NEXT:    [[__X1_0_I3:%.*]] = phi double [ [[SUB_I:%.*]], %[[FOR_BODY_I]] ], [ [[CALL_I21_I]], %[[IF_END4_I]] ]
+// FINITEONLY-NEXT:    [[__X0_0_I2:%.*]] = phi double [ [[__X1_0_I3]], %[[FOR_BODY_I]] ], [ [[CALL_I_I]], %[[IF_END4_I]] ]
 // FINITEONLY-NEXT:    [[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1
 // FINITEONLY-NEXT:    [[CONV_I:%.*]] = uitofp nneg i32 [[MUL_I]] to double
 // FINITEONLY-NEXT:    [[DIV_I:%.*]] = fdiv nnan ninf contract double [[CONV_I]], [[Y]]
@@ -7187,32 +8102,33 @@ extern "C" __device__ float test_ynf(int x, float y) {
 // FINITEONLY-NEXT:    [[SUB_I]] = fsub nnan ninf contract double [[MUL8_I]], [[__X0_0_I2]]
 // FINITEONLY-NEXT:    [[INC_I]] = add nuw nsw i32 [[__I_0_I4]], 1
 // FINITEONLY-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_I]], [[X]]
-// FINITEONLY-NEXT:    br i1 [[EXITCOND_NOT]], label [[_ZL2YNID_EXIT]], label [[FOR_BODY_I]], !llvm.loop [[LOOP25:![0-9]+]]
-// FINITEONLY:       _ZL2ynid.exit:
-// FINITEONLY-NEXT:    [[RETVAL_0_I:%.*]] = phi double [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ]
+// FINITEONLY-NEXT:    br i1 [[EXITCOND_NOT]], label %[[_ZL2YNID_EXIT]], label %[[FOR_BODY_I]], !llvm.loop [[LOOP25:![0-9]+]]
+// FINITEONLY:       [[_ZL2YNID_EXIT]]:
+// FINITEONLY-NEXT:    [[RETVAL_0_I:%.*]] = phi double [ [[CALL_I20_I]], %[[IF_THEN_I]] ], [ [[CALL_I22_I]], %[[IF_THEN2_I]] ], [ [[CALL_I21_I]], %[[IF_END4_I]] ], [ [[SUB_I]], %[[FOR_BODY_I]] ]
 // FINITEONLY-NEXT:    ret double [[RETVAL_0_I]]
 //
-// APPROX-LABEL: @test_yn(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    switch i32 [[X:%.*]], label [[IF_END4_I:%.*]] [
-// APPROX-NEXT:      i32 0, label [[IF_THEN_I:%.*]]
-// APPROX-NEXT:      i32 1, label [[IF_THEN2_I:%.*]]
+// APPROX-LABEL: define dso_local double @test_yn(
+// APPROX-SAME: i32 noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    switch i32 [[X]], label %[[IF_END4_I:.*]] [
+// APPROX-NEXT:      i32 0, label %[[IF_THEN_I:.*]]
+// APPROX-NEXT:      i32 1, label %[[IF_THEN2_I:.*]]
 // APPROX-NEXT:    ]
-// APPROX:       if.then.i:
-// APPROX-NEXT:    [[CALL_I20_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[Y:%.*]]) #[[ATTR16]]
-// APPROX-NEXT:    br label [[_ZL2YNID_EXIT:%.*]]
-// APPROX:       if.then2.i:
+// APPROX:       [[IF_THEN_I]]:
+// APPROX-NEXT:    [[CALL_I20_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[Y]]) #[[ATTR16]]
+// APPROX-NEXT:    br label %[[_ZL2YNID_EXIT:.*]]
+// APPROX:       [[IF_THEN2_I]]:
 // APPROX-NEXT:    [[CALL_I22_I:%.*]] = tail call contract noundef double @__ocml_y1_f64(double noundef [[Y]]) #[[ATTR16]]
-// APPROX-NEXT:    br label [[_ZL2YNID_EXIT]]
-// APPROX:       if.end4.i:
+// APPROX-NEXT:    br label %[[_ZL2YNID_EXIT]]
+// APPROX:       [[IF_END4_I]]:
 // APPROX-NEXT:    [[CALL_I_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[Y]]) #[[ATTR16]]
 // APPROX-NEXT:    [[CALL_I21_I:%.*]] = tail call contract noundef double @__ocml_y1_f64(double noundef [[Y]]) #[[ATTR16]]
 // APPROX-NEXT:    [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1
-// APPROX-NEXT:    br i1 [[CMP7_I1]], label [[FOR_BODY_I:%.*]], label [[_ZL2YNID_EXIT]]
-// APPROX:       for.body.i:
-// APPROX-NEXT:    [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], [[FOR_BODY_I]] ], [ 1, [[IF_END4_I]] ]
-// APPROX-NEXT:    [[__X1_0_I3:%.*]] = phi double [ [[SUB_I:%.*]], [[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ]
-// APPROX-NEXT:    [[__X0_0_I2:%.*]] = phi double [ [[__X1_0_I3]], [[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ]
+// APPROX-NEXT:    br i1 [[CMP7_I1]], label %[[FOR_BODY_I:.*]], label %[[_ZL2YNID_EXIT]]
+// APPROX:       [[FOR_BODY_I]]:
+// APPROX-NEXT:    [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], %[[FOR_BODY_I]] ], [ 1, %[[IF_END4_I]] ]
+// APPROX-NEXT:    [[__X1_0_I3:%.*]] = phi double [ [[SUB_I:%.*]], %[[FOR_BODY_I]] ], [ [[CALL_I21_I]], %[[IF_END4_I]] ]
+// APPROX-NEXT:    [[__X0_0_I2:%.*]] = phi double [ [[__X1_0_I3]], %[[FOR_BODY_I]] ], [ [[CALL_I_I]], %[[IF_END4_I]] ]
 // APPROX-NEXT:    [[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1
 // APPROX-NEXT:    [[CONV_I:%.*]] = uitofp nneg i32 [[MUL_I]] to double
 // APPROX-NEXT:    [[DIV_I:%.*]] = fdiv contract double [[CONV_I]], [[Y]]
@@ -7220,32 +8136,33 @@ extern "C" __device__ float test_ynf(int x, float y) {
 // APPROX-NEXT:    [[SUB_I]] = fsub contract double [[MUL8_I]], [[__X0_0_I2]]
 // APPROX-NEXT:    [[INC_I]] = add nuw nsw i32 [[__I_0_I4]], 1
 // APPROX-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_I]], [[X]]
-// APPROX-NEXT:    br i1 [[EXITCOND_NOT]], label [[_ZL2YNID_EXIT]], label [[FOR_BODY_I]], !llvm.loop [[LOOP25:![0-9]+]]
-// APPROX:       _ZL2ynid.exit:
-// APPROX-NEXT:    [[RETVAL_0_I:%.*]] = phi double [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ]
+// APPROX-NEXT:    br i1 [[EXITCOND_NOT]], label %[[_ZL2YNID_EXIT]], label %[[FOR_BODY_I]], !llvm.loop [[LOOP25:![0-9]+]]
+// APPROX:       [[_ZL2YNID_EXIT]]:
+// APPROX-NEXT:    [[RETVAL_0_I:%.*]] = phi double [ [[CALL_I20_I]], %[[IF_THEN_I]] ], [ [[CALL_I22_I]], %[[IF_THEN2_I]] ], [ [[CALL_I21_I]], %[[IF_END4_I]] ], [ [[SUB_I]], %[[FOR_BODY_I]] ]
 // APPROX-NEXT:    ret double [[RETVAL_0_I]]
 //
-// NCRDIV-LABEL: @test_yn(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    switch i32 [[X:%.*]], label [[IF_END4_I:%.*]] [
-// NCRDIV-NEXT:      i32 0, label [[IF_THEN_I:%.*]]
-// NCRDIV-NEXT:      i32 1, label [[IF_THEN2_I:%.*]]
+// NCRDIV-LABEL: define dso_local double @test_yn(
+// NCRDIV-SAME: i32 noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    switch i32 [[X]], label %[[IF_END4_I:.*]] [
+// NCRDIV-NEXT:      i32 0, label %[[IF_THEN_I:.*]]
+// NCRDIV-NEXT:      i32 1, label %[[IF_THEN2_I:.*]]
 // NCRDIV-NEXT:    ]
-// NCRDIV:       if.then.i:
-// NCRDIV-NEXT:    [[CALL_I20_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[Y:%.*]]) #[[ATTR16]]
-// NCRDIV-NEXT:    br label [[_ZL2YNID_EXIT:%.*]]
-// NCRDIV:       if.then2.i:
+// NCRDIV:       [[IF_THEN_I]]:
+// NCRDIV-NEXT:    [[CALL_I20_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[Y]]) #[[ATTR16]]
+// NCRDIV-NEXT:    br label %[[_ZL2YNID_EXIT:.*]]
+// NCRDIV:       [[IF_THEN2_I]]:
 // NCRDIV-NEXT:    [[CALL_I22_I:%.*]] = tail call contract noundef double @__ocml_y1_f64(double noundef [[Y]]) #[[ATTR16]]
-// NCRDIV-NEXT:    br label [[_ZL2YNID_EXIT]]
-// NCRDIV:       if.end4.i:
+// NCRDIV-NEXT:    br label %[[_ZL2YNID_EXIT]]
+// NCRDIV:       [[IF_END4_I]]:
 // NCRDIV-NEXT:    [[CALL_I_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[Y]]) #[[ATTR16]]
 // NCRDIV-NEXT:    [[CALL_I21_I:%.*]] = tail call contract noundef double @__ocml_y1_f64(double noundef [[Y]]) #[[ATTR16]]
 // NCRDIV-NEXT:    [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1
-// NCRDIV-NEXT:    br i1 [[CMP7_I1]], label [[FOR_BODY_I:%.*]], label [[_ZL2YNID_EXIT]]
-// NCRDIV:       for.body.i:
-// NCRDIV-NEXT:    [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], [[FOR_BODY_I]] ], [ 1, [[IF_END4_I]] ]
-// NCRDIV-NEXT:    [[__X1_0_I3:%.*]] = phi double [ [[SUB_I:%.*]], [[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ]
-// NCRDIV-NEXT:    [[__X0_0_I2:%.*]] = phi double [ [[__X1_0_I3]], [[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ]
+// NCRDIV-NEXT:    br i1 [[CMP7_I1]], label %[[FOR_BODY_I:.*]], label %[[_ZL2YNID_EXIT]]
+// NCRDIV:       [[FOR_BODY_I]]:
+// NCRDIV-NEXT:    [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], %[[FOR_BODY_I]] ], [ 1, %[[IF_END4_I]] ]
+// NCRDIV-NEXT:    [[__X1_0_I3:%.*]] = phi double [ [[SUB_I:%.*]], %[[FOR_BODY_I]] ], [ [[CALL_I21_I]], %[[IF_END4_I]] ]
+// NCRDIV-NEXT:    [[__X0_0_I2:%.*]] = phi double [ [[__X1_0_I3]], %[[FOR_BODY_I]] ], [ [[CALL_I_I]], %[[IF_END4_I]] ]
 // NCRDIV-NEXT:    [[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1
 // NCRDIV-NEXT:    [[CONV_I:%.*]] = uitofp nneg i32 [[MUL_I]] to double
 // NCRDIV-NEXT:    [[DIV_I:%.*]] = fdiv contract double [[CONV_I]], [[Y]]
@@ -7253,32 +8170,33 @@ extern "C" __device__ float test_ynf(int x, float y) {
 // NCRDIV-NEXT:    [[SUB_I]] = fsub contract double [[MUL8_I]], [[__X0_0_I2]]
 // NCRDIV-NEXT:    [[INC_I]] = add nuw nsw i32 [[__I_0_I4]], 1
 // NCRDIV-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_I]], [[X]]
-// NCRDIV-NEXT:    br i1 [[EXITCOND_NOT]], label [[_ZL2YNID_EXIT]], label [[FOR_BODY_I]], !llvm.loop [[LOOP27:![0-9]+]]
-// NCRDIV:       _ZL2ynid.exit:
-// NCRDIV-NEXT:    [[RETVAL_0_I:%.*]] = phi double [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ]
+// NCRDIV-NEXT:    br i1 [[EXITCOND_NOT]], label %[[_ZL2YNID_EXIT]], label %[[FOR_BODY_I]], !llvm.loop [[LOOP27:![0-9]+]]
+// NCRDIV:       [[_ZL2YNID_EXIT]]:
+// NCRDIV-NEXT:    [[RETVAL_0_I:%.*]] = phi double [ [[CALL_I20_I]], %[[IF_THEN_I]] ], [ [[CALL_I22_I]], %[[IF_THEN2_I]] ], [ [[CALL_I21_I]], %[[IF_END4_I]] ], [ [[SUB_I]], %[[FOR_BODY_I]] ]
 // NCRDIV-NEXT:    ret double [[RETVAL_0_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_yn(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    switch i32 [[X:%.*]], label [[IF_END4_I:%.*]] [
-// AMDGCNSPIRV-NEXT:      i32 0, label [[IF_THEN_I:%.*]]
-// AMDGCNSPIRV-NEXT:      i32 1, label [[IF_THEN2_I:%.*]]
+// AMDGCNSPIRV-LABEL: define spir_func double @test_yn(
+// AMDGCNSPIRV-SAME: i32 noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR6]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    switch i32 [[X]], label %[[IF_END4_I:.*]] [
+// AMDGCNSPIRV-NEXT:      i32 0, label %[[IF_THEN_I:.*]]
+// AMDGCNSPIRV-NEXT:      i32 1, label %[[IF_THEN2_I:.*]]
 // AMDGCNSPIRV-NEXT:    ]
-// AMDGCNSPIRV:       if.then.i:
-// AMDGCNSPIRV-NEXT:    [[CALL_I20_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_y0_f64(double noundef [[Y:%.*]]) #[[ATTR14]]
-// AMDGCNSPIRV-NEXT:    br label [[_ZL2YNID_EXIT:%.*]]
-// AMDGCNSPIRV:       if.then2.i:
+// AMDGCNSPIRV:       [[IF_THEN_I]]:
+// AMDGCNSPIRV-NEXT:    [[CALL_I20_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_y0_f64(double noundef [[Y]]) #[[ATTR14]]
+// AMDGCNSPIRV-NEXT:    br label %[[_ZL2YNID_EXIT:.*]]
+// AMDGCNSPIRV:       [[IF_THEN2_I]]:
 // AMDGCNSPIRV-NEXT:    [[CALL_I22_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_y1_f64(double noundef [[Y]]) #[[ATTR14]]
-// AMDGCNSPIRV-NEXT:    br label [[_ZL2YNID_EXIT]]
-// AMDGCNSPIRV:       if.end4.i:
+// AMDGCNSPIRV-NEXT:    br label %[[_ZL2YNID_EXIT]]
+// AMDGCNSPIRV:       [[IF_END4_I]]:
 // AMDGCNSPIRV-NEXT:    [[CALL_I_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_y0_f64(double noundef [[Y]]) #[[ATTR14]]
 // AMDGCNSPIRV-NEXT:    [[CALL_I21_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_y1_f64(double noundef [[Y]]) #[[ATTR14]]
 // AMDGCNSPIRV-NEXT:    [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1
-// AMDGCNSPIRV-NEXT:    br i1 [[CMP7_I1]], label [[FOR_BODY_I:%.*]], label [[_ZL2YNID_EXIT]]
-// AMDGCNSPIRV:       for.body.i:
-// AMDGCNSPIRV-NEXT:    [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], [[FOR_BODY_I]] ], [ 1, [[IF_END4_I]] ]
-// AMDGCNSPIRV-NEXT:    [[__X1_0_I3:%.*]] = phi double [ [[SUB_I:%.*]], [[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ]
-// AMDGCNSPIRV-NEXT:    [[__X0_0_I2:%.*]] = phi double [ [[__X1_0_I3]], [[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ]
+// AMDGCNSPIRV-NEXT:    br i1 [[CMP7_I1]], label %[[FOR_BODY_I:.*]], label %[[_ZL2YNID_EXIT]]
+// AMDGCNSPIRV:       [[FOR_BODY_I]]:
+// AMDGCNSPIRV-NEXT:    [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], %[[FOR_BODY_I]] ], [ 1, %[[IF_END4_I]] ]
+// AMDGCNSPIRV-NEXT:    [[__X1_0_I3:%.*]] = phi double [ [[SUB_I:%.*]], %[[FOR_BODY_I]] ], [ [[CALL_I21_I]], %[[IF_END4_I]] ]
+// AMDGCNSPIRV-NEXT:    [[__X0_0_I2:%.*]] = phi double [ [[__X1_0_I3]], %[[FOR_BODY_I]] ], [ [[CALL_I_I]], %[[IF_END4_I]] ]
 // AMDGCNSPIRV-NEXT:    [[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1
 // AMDGCNSPIRV-NEXT:    [[CONV_I:%.*]] = uitofp nneg i32 [[MUL_I]] to double
 // AMDGCNSPIRV-NEXT:    [[DIV_I:%.*]] = fdiv contract double [[CONV_I]], [[Y]]
@@ -7286,71 +8204,81 @@ extern "C" __device__ float test_ynf(int x, float y) {
 // AMDGCNSPIRV-NEXT:    [[SUB_I]] = fsub contract double [[MUL8_I]], [[__X0_0_I2]]
 // AMDGCNSPIRV-NEXT:    [[INC_I]] = add nuw nsw i32 [[__I_0_I4]], 1
 // AMDGCNSPIRV-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_I]], [[X]]
-// AMDGCNSPIRV-NEXT:    br i1 [[EXITCOND_NOT]], label [[_ZL2YNID_EXIT]], label [[FOR_BODY_I]], !llvm.loop [[LOOP26:![0-9]+]]
-// AMDGCNSPIRV:       _ZL2ynid.exit:
-// AMDGCNSPIRV-NEXT:    [[RETVAL_0_I:%.*]] = phi double [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ]
+// AMDGCNSPIRV-NEXT:    br i1 [[EXITCOND_NOT]], label %[[_ZL2YNID_EXIT]], label %[[FOR_BODY_I]], !llvm.loop [[LOOP26:![0-9]+]]
+// AMDGCNSPIRV:       [[_ZL2YNID_EXIT]]:
+// AMDGCNSPIRV-NEXT:    [[RETVAL_0_I:%.*]] = phi double [ [[CALL_I20_I]], %[[IF_THEN_I]] ], [ [[CALL_I22_I]], %[[IF_THEN2_I]] ], [ [[CALL_I21_I]], %[[IF_END4_I]] ], [ [[SUB_I]], %[[FOR_BODY_I]] ]
 // AMDGCNSPIRV-NEXT:    ret double [[RETVAL_0_I]]
 //
 extern "C" __device__ double test_yn(int x, double y) {
   return yn(x, y);
 }
 
-// DEFAULT-LABEL: @test___cosf(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_cos_f32(float noundef [[X:%.*]]) #[[ATTR16]]
+// DEFAULT-LABEL: define dso_local noundef float @test___cosf(
+// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_cos_f32(float noundef [[X]]) #[[ATTR16]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test___cosf(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_native_cos_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test___cosf(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_native_cos_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
-// APPROX-LABEL: @test___cosf(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_cos_f32(float noundef [[X:%.*]]) #[[ATTR16]]
+// APPROX-LABEL: define dso_local noundef float @test___cosf(
+// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_cos_f32(float noundef [[X]]) #[[ATTR16]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
-// NCRDIV-LABEL: @test___cosf(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_cos_f32(float noundef [[X:%.*]]) #[[ATTR16]]
+// NCRDIV-LABEL: define dso_local noundef float @test___cosf(
+// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_cos_f32(float noundef [[X]]) #[[ATTR16]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test___cosf(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_native_cos_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test___cosf(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR6]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_native_cos_f32(float noundef [[X]]) #[[ATTR14]]
 // AMDGCNSPIRV-NEXT:    ret float [[CALL_I]]
 //
 extern "C" __device__ float test___cosf(float x) {
   return __cosf(x);
 }
 
-// DEFAULT-LABEL: @test___exp10f(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[MUL_I:%.*]] = fmul contract float [[X:%.*]], 0x400A934F00000000
+// DEFAULT-LABEL: define dso_local noundef float @test___exp10f(
+// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[MUL_I:%.*]] = fmul contract float [[X]], 0x400A934F00000000
 // DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.amdgcn.exp2.f32(float [[MUL_I]])
 // DEFAULT-NEXT:    ret float [[TMP0]]
 //
-// FINITEONLY-LABEL: @test___exp10f(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[MUL_I:%.*]] = fmul nnan ninf contract float [[X:%.*]], 0x400A934F00000000
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test___exp10f(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[MUL_I:%.*]] = fmul nnan ninf contract float [[X]], 0x400A934F00000000
 // FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.amdgcn.exp2.f32(float [[MUL_I]])
 // FINITEONLY-NEXT:    ret float [[TMP0]]
 //
-// APPROX-LABEL: @test___exp10f(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[MUL_I:%.*]] = fmul contract float [[X:%.*]], 0x400A934F00000000
+// APPROX-LABEL: define dso_local noundef float @test___exp10f(
+// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[MUL_I:%.*]] = fmul contract float [[X]], 0x400A934F00000000
 // APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.amdgcn.exp2.f32(float [[MUL_I]])
 // APPROX-NEXT:    ret float [[TMP0]]
 //
-// NCRDIV-LABEL: @test___exp10f(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[MUL_I:%.*]] = fmul contract float [[X:%.*]], 0x400A934F00000000
+// NCRDIV-LABEL: define dso_local noundef float @test___exp10f(
+// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[MUL_I:%.*]] = fmul contract float [[X]], 0x400A934F00000000
 // NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.amdgcn.exp2.f32(float [[MUL_I]])
 // NCRDIV-NEXT:    ret float [[TMP0]]
 //
-// AMDGCNSPIRV-LABEL: @test___exp10f(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[MUL_I:%.*]] = fmul contract float [[X:%.*]], 0x400A934F00000000
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test___exp10f(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[MUL_I:%.*]] = fmul contract float [[X]], 0x400A934F00000000
 // AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.amdgcn.exp2.f32(float [[MUL_I]])
 // AMDGCNSPIRV-NEXT:    ret float [[TMP0]]
 //
@@ -7358,33 +8286,38 @@ extern "C" __device__ float test___exp10f(float x) {
   return __exp10f(x);
 }
 
-// DEFAULT-LABEL: @test___expf(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[MUL_I:%.*]] = fmul contract float [[X:%.*]], 0x3FF7154760000000
+// DEFAULT-LABEL: define dso_local noundef float @test___expf(
+// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[MUL_I:%.*]] = fmul contract float [[X]], 0x3FF7154760000000
 // DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.amdgcn.exp2.f32(float [[MUL_I]])
 // DEFAULT-NEXT:    ret float [[TMP0]]
 //
-// FINITEONLY-LABEL: @test___expf(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[MUL_I:%.*]] = fmul nnan ninf contract float [[X:%.*]], 0x3FF7154760000000
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test___expf(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[MUL_I:%.*]] = fmul nnan ninf contract float [[X]], 0x3FF7154760000000
 // FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.amdgcn.exp2.f32(float [[MUL_I]])
 // FINITEONLY-NEXT:    ret float [[TMP0]]
 //
-// APPROX-LABEL: @test___expf(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[MUL_I:%.*]] = fmul contract float [[X:%.*]], 0x3FF7154760000000
+// APPROX-LABEL: define dso_local noundef float @test___expf(
+// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[MUL_I:%.*]] = fmul contract float [[X]], 0x3FF7154760000000
 // APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.amdgcn.exp2.f32(float [[MUL_I]])
 // APPROX-NEXT:    ret float [[TMP0]]
 //
-// NCRDIV-LABEL: @test___expf(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[MUL_I:%.*]] = fmul contract float [[X:%.*]], 0x3FF7154760000000
+// NCRDIV-LABEL: define dso_local noundef float @test___expf(
+// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[MUL_I:%.*]] = fmul contract float [[X]], 0x3FF7154760000000
 // NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.amdgcn.exp2.f32(float [[MUL_I]])
 // NCRDIV-NEXT:    ret float [[TMP0]]
 //
-// AMDGCNSPIRV-LABEL: @test___expf(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[MUL_I:%.*]] = fmul contract float [[X:%.*]], 0x3FF7154760000000
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test___expf(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[MUL_I:%.*]] = fmul contract float [[X]], 0x3FF7154760000000
 // AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.amdgcn.exp2.f32(float [[MUL_I]])
 // AMDGCNSPIRV-NEXT:    ret float [[TMP0]]
 //
@@ -7392,389 +8325,454 @@ extern "C" __device__ float test___expf(float x) {
   return __expf(x);
 }
 
-// DEFAULT-LABEL: @test___fadd_rn(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[ADD_I:%.*]] = fadd contract float [[X:%.*]], [[Y:%.*]]
+// DEFAULT-LABEL: define dso_local noundef float @test___fadd_rn(
+// DEFAULT-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[ADD_I:%.*]] = fadd contract float [[X]], [[Y]]
 // DEFAULT-NEXT:    ret float [[ADD_I]]
 //
-// FINITEONLY-LABEL: @test___fadd_rn(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[ADD_I:%.*]] = fadd nnan ninf contract float [[X:%.*]], [[Y:%.*]]
+// FINITEONLY-LABEL: define dso_local nofpclass(nan inf) float @test___fadd_rn(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[ADD_I:%.*]] = fadd nnan ninf contract float [[X]], [[Y]]
 // FINITEONLY-NEXT:    ret float [[ADD_I]]
 //
-// APPROX-LABEL: @test___fadd_rn(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[ADD_I:%.*]] = fadd contract float [[X:%.*]], [[Y:%.*]]
+// APPROX-LABEL: define dso_local noundef float @test___fadd_rn(
+// APPROX-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[ADD_I:%.*]] = fadd contract float [[X]], [[Y]]
 // APPROX-NEXT:    ret float [[ADD_I]]
 //
-// NCRDIV-LABEL: @test___fadd_rn(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[ADD_I:%.*]] = fadd contract float [[X:%.*]], [[Y:%.*]]
+// NCRDIV-LABEL: define dso_local noundef float @test___fadd_rn(
+// NCRDIV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[ADD_I:%.*]] = fadd contract float [[X]], [[Y]]
 // NCRDIV-NEXT:    ret float [[ADD_I]]
 //
-// AMDGCNSPIRV-LABEL: @test___fadd_rn(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[ADD_I:%.*]] = fadd contract float [[X:%.*]], [[Y:%.*]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test___fadd_rn(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[ADD_I:%.*]] = fadd contract float [[X]], [[Y]]
 // AMDGCNSPIRV-NEXT:    ret float [[ADD_I]]
 //
 extern "C" __device__ float test___fadd_rn(float x, float y) {
   return __fadd_rn(x, y);
 }
 
-// DEFAULT-LABEL: @test___fdividef(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[DIV_I:%.*]] = fdiv contract float [[X:%.*]], [[Y:%.*]]
+// DEFAULT-LABEL: define dso_local noundef float @test___fdividef(
+// DEFAULT-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[DIV_I:%.*]] = fdiv contract float [[X]], [[Y]]
 // DEFAULT-NEXT:    ret float [[DIV_I]]
 //
-// FINITEONLY-LABEL: @test___fdividef(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[DIV_I:%.*]] = fdiv nnan ninf contract float [[X:%.*]], [[Y:%.*]]
+// FINITEONLY-LABEL: define dso_local nofpclass(nan inf) float @test___fdividef(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[DIV_I:%.*]] = fdiv nnan ninf contract float [[X]], [[Y]]
 // FINITEONLY-NEXT:    ret float [[DIV_I]]
 //
-// APPROX-LABEL: @test___fdividef(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[DIV_I:%.*]] = fdiv contract float [[X:%.*]], [[Y:%.*]]
+// APPROX-LABEL: define dso_local noundef float @test___fdividef(
+// APPROX-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[DIV_I:%.*]] = fdiv contract float [[X]], [[Y]]
 // APPROX-NEXT:    ret float [[DIV_I]]
 //
-// NCRDIV-LABEL: @test___fdividef(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[DIV_I:%.*]] = fdiv contract float [[X:%.*]], [[Y:%.*]], !fpmath [[META12]]
+// NCRDIV-LABEL: define dso_local noundef float @test___fdividef(
+// NCRDIV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[DIV_I:%.*]] = fdiv contract float [[X]], [[Y]], !fpmath [[META12]]
 // NCRDIV-NEXT:    ret float [[DIV_I]]
 //
-// AMDGCNSPIRV-LABEL: @test___fdividef(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[DIV_I:%.*]] = fdiv contract float [[X:%.*]], [[Y:%.*]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test___fdividef(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[DIV_I:%.*]] = fdiv contract float [[X]], [[Y]]
 // AMDGCNSPIRV-NEXT:    ret float [[DIV_I]]
 //
 extern "C" __device__ float test___fdividef(float x, float y) {
   return __fdividef(x, y);
 }
 
-// DEFAULT-LABEL: @test__fmaf_rn(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.fma.f32(float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]])
+// DEFAULT-LABEL: define dso_local noundef float @test__fmaf_rn(
+// DEFAULT-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.fma.f32(float [[X]], float [[Y]], float [[Z]])
 // DEFAULT-NEXT:    ret float [[TMP0]]
 //
-// FINITEONLY-LABEL: @test__fmaf_rn(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.fma.f32(float nofpclass(nan inf) [[X:%.*]], float nofpclass(nan inf) [[Y:%.*]], float nofpclass(nan inf) [[Z:%.*]])
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test__fmaf_rn(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]], float noundef nofpclass(nan inf) [[Z:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.fma.f32(float nofpclass(nan inf) [[X]], float nofpclass(nan inf) [[Y]], float nofpclass(nan inf) [[Z]])
 // FINITEONLY-NEXT:    ret float [[TMP0]]
 //
-// APPROX-LABEL: @test__fmaf_rn(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.fma.f32(float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]])
+// APPROX-LABEL: define dso_local noundef float @test__fmaf_rn(
+// APPROX-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.fma.f32(float [[X]], float [[Y]], float [[Z]])
 // APPROX-NEXT:    ret float [[TMP0]]
 //
-// NCRDIV-LABEL: @test__fmaf_rn(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.fma.f32(float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]])
+// NCRDIV-LABEL: define dso_local noundef float @test__fmaf_rn(
+// NCRDIV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.fma.f32(float [[X]], float [[Y]], float [[Z]])
 // NCRDIV-NEXT:    ret float [[TMP0]]
 //
-// AMDGCNSPIRV-LABEL: @test__fmaf_rn(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.fma.f32(float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]])
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test__fmaf_rn(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.fma.f32(float [[X]], float [[Y]], float [[Z]])
 // AMDGCNSPIRV-NEXT:    ret float [[TMP0]]
 //
 extern "C" __device__ float test__fmaf_rn(float x, float y, float z) {
   return __fmaf_rn(x, y, z);
 }
 
-// DEFAULT-LABEL: @test___fmul_rn(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[MUL_I:%.*]] = fmul contract float [[X:%.*]], [[Y:%.*]]
+// DEFAULT-LABEL: define dso_local noundef float @test___fmul_rn(
+// DEFAULT-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[MUL_I:%.*]] = fmul contract float [[X]], [[Y]]
 // DEFAULT-NEXT:    ret float [[MUL_I]]
 //
-// FINITEONLY-LABEL: @test___fmul_rn(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[MUL_I:%.*]] = fmul nnan ninf contract float [[X:%.*]], [[Y:%.*]]
+// FINITEONLY-LABEL: define dso_local nofpclass(nan inf) float @test___fmul_rn(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[MUL_I:%.*]] = fmul nnan ninf contract float [[X]], [[Y]]
 // FINITEONLY-NEXT:    ret float [[MUL_I]]
 //
-// APPROX-LABEL: @test___fmul_rn(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[MUL_I:%.*]] = fmul contract float [[X:%.*]], [[Y:%.*]]
+// APPROX-LABEL: define dso_local noundef float @test___fmul_rn(
+// APPROX-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[MUL_I:%.*]] = fmul contract float [[X]], [[Y]]
 // APPROX-NEXT:    ret float [[MUL_I]]
 //
-// NCRDIV-LABEL: @test___fmul_rn(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[MUL_I:%.*]] = fmul contract float [[X:%.*]], [[Y:%.*]]
+// NCRDIV-LABEL: define dso_local noundef float @test___fmul_rn(
+// NCRDIV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[MUL_I:%.*]] = fmul contract float [[X]], [[Y]]
 // NCRDIV-NEXT:    ret float [[MUL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test___fmul_rn(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[MUL_I:%.*]] = fmul contract float [[X:%.*]], [[Y:%.*]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test___fmul_rn(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[MUL_I:%.*]] = fmul contract float [[X]], [[Y]]
 // AMDGCNSPIRV-NEXT:    ret float [[MUL_I]]
 //
 extern "C" __device__ float test___fmul_rn(float x, float y) {
   return __fmul_rn(x, y);
 }
 
-// DEFAULT-LABEL: @test___frcp_rn(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[DIV_I:%.*]] = fdiv contract float 1.000000e+00, [[X:%.*]]
+// DEFAULT-LABEL: define dso_local noundef float @test___frcp_rn(
+// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[DIV_I:%.*]] = fdiv contract float 1.000000e+00, [[X]]
 // DEFAULT-NEXT:    ret float [[DIV_I]]
 //
-// FINITEONLY-LABEL: @test___frcp_rn(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[DIV_I:%.*]] = fdiv nnan ninf contract float 1.000000e+00, [[X:%.*]]
+// FINITEONLY-LABEL: define dso_local nofpclass(nan inf) float @test___frcp_rn(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[DIV_I:%.*]] = fdiv nnan ninf contract float 1.000000e+00, [[X]]
 // FINITEONLY-NEXT:    ret float [[DIV_I]]
 //
-// APPROX-LABEL: @test___frcp_rn(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[DIV_I:%.*]] = fdiv contract float 1.000000e+00, [[X:%.*]]
+// APPROX-LABEL: define dso_local noundef float @test___frcp_rn(
+// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[DIV_I:%.*]] = fdiv contract float 1.000000e+00, [[X]]
 // APPROX-NEXT:    ret float [[DIV_I]]
 //
-// NCRDIV-LABEL: @test___frcp_rn(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[DIV_I:%.*]] = fdiv contract float 1.000000e+00, [[X:%.*]], !fpmath [[META12]]
+// NCRDIV-LABEL: define dso_local noundef float @test___frcp_rn(
+// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[DIV_I:%.*]] = fdiv contract float 1.000000e+00, [[X]], !fpmath [[META12]]
 // NCRDIV-NEXT:    ret float [[DIV_I]]
 //
-// AMDGCNSPIRV-LABEL: @test___frcp_rn(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[DIV_I:%.*]] = fdiv contract float 1.000000e+00, [[X:%.*]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test___frcp_rn(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[DIV_I:%.*]] = fdiv contract float 1.000000e+00, [[X]]
 // AMDGCNSPIRV-NEXT:    ret float [[DIV_I]]
 //
 extern "C" __device__ float test___frcp_rn(float x) {
   return __frcp_rn(x);
 }
 
-// DEFAULT-LABEL: @test___frsqrt_rn(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.amdgcn.rsq.f32(float [[X:%.*]])
+// DEFAULT-LABEL: define dso_local noundef float @test___frsqrt_rn(
+// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.amdgcn.rsq.f32(float [[X]])
 // DEFAULT-NEXT:    ret float [[TMP0]]
 //
-// FINITEONLY-LABEL: @test___frsqrt_rn(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.amdgcn.rsq.f32(float nofpclass(nan inf) [[X:%.*]])
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test___frsqrt_rn(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.amdgcn.rsq.f32(float nofpclass(nan inf) [[X]])
 // FINITEONLY-NEXT:    ret float [[TMP0]]
 //
-// APPROX-LABEL: @test___frsqrt_rn(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.amdgcn.rsq.f32(float [[X:%.*]])
+// APPROX-LABEL: define dso_local noundef float @test___frsqrt_rn(
+// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.amdgcn.rsq.f32(float [[X]])
 // APPROX-NEXT:    ret float [[TMP0]]
 //
-// NCRDIV-LABEL: @test___frsqrt_rn(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.amdgcn.rsq.f32(float [[X:%.*]])
+// NCRDIV-LABEL: define dso_local noundef float @test___frsqrt_rn(
+// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.amdgcn.rsq.f32(float [[X]])
 // NCRDIV-NEXT:    ret float [[TMP0]]
 //
-// AMDGCNSPIRV-LABEL: @test___frsqrt_rn(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.amdgcn.rsq.f32(float [[X:%.*]])
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test___frsqrt_rn(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.amdgcn.rsq.f32(float [[X]])
 // AMDGCNSPIRV-NEXT:    ret float [[TMP0]]
 //
 extern "C" __device__ float test___frsqrt_rn(float x) {
   return __frsqrt_rn(x);
 }
 
-// DEFAULT-LABEL: @test___fsqrt_rn(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_sqrt_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// DEFAULT-LABEL: define dso_local noundef float @test___fsqrt_rn(
+// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_sqrt_f32(float noundef [[X]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test___fsqrt_rn(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_native_sqrt_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test___fsqrt_rn(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_native_sqrt_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
-// APPROX-LABEL: @test___fsqrt_rn(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_sqrt_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// APPROX-LABEL: define dso_local noundef float @test___fsqrt_rn(
+// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_sqrt_f32(float noundef [[X]]) #[[ATTR14]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
-// NCRDIV-LABEL: @test___fsqrt_rn(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_sqrt_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// NCRDIV-LABEL: define dso_local noundef float @test___fsqrt_rn(
+// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_sqrt_f32(float noundef [[X]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test___fsqrt_rn(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_native_sqrt_f32(float noundef [[X:%.*]]) #[[ATTR12]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test___fsqrt_rn(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR4]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_native_sqrt_f32(float noundef [[X]]) #[[ATTR12]]
 // AMDGCNSPIRV-NEXT:    ret float [[CALL_I]]
 //
 extern "C" __device__ float test___fsqrt_rn(float x) {
   return __fsqrt_rn(x);
 }
 
-// DEFAULT-LABEL: @test___fsub_rn(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[SUB_I:%.*]] = fsub contract float [[X:%.*]], [[Y:%.*]]
+// DEFAULT-LABEL: define dso_local noundef float @test___fsub_rn(
+// DEFAULT-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[SUB_I:%.*]] = fsub contract float [[X]], [[Y]]
 // DEFAULT-NEXT:    ret float [[SUB_I]]
 //
-// FINITEONLY-LABEL: @test___fsub_rn(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[SUB_I:%.*]] = fsub nnan ninf contract float [[X:%.*]], [[Y:%.*]]
+// FINITEONLY-LABEL: define dso_local nofpclass(nan inf) float @test___fsub_rn(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[SUB_I:%.*]] = fsub nnan ninf contract float [[X]], [[Y]]
 // FINITEONLY-NEXT:    ret float [[SUB_I]]
 //
-// APPROX-LABEL: @test___fsub_rn(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[SUB_I:%.*]] = fsub contract float [[X:%.*]], [[Y:%.*]]
+// APPROX-LABEL: define dso_local noundef float @test___fsub_rn(
+// APPROX-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[SUB_I:%.*]] = fsub contract float [[X]], [[Y]]
 // APPROX-NEXT:    ret float [[SUB_I]]
 //
-// NCRDIV-LABEL: @test___fsub_rn(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[SUB_I:%.*]] = fsub contract float [[X:%.*]], [[Y:%.*]]
+// NCRDIV-LABEL: define dso_local noundef float @test___fsub_rn(
+// NCRDIV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[SUB_I:%.*]] = fsub contract float [[X]], [[Y]]
 // NCRDIV-NEXT:    ret float [[SUB_I]]
 //
-// AMDGCNSPIRV-LABEL: @test___fsub_rn(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[SUB_I:%.*]] = fsub contract float [[X:%.*]], [[Y:%.*]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test___fsub_rn(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[SUB_I:%.*]] = fsub contract float [[X]], [[Y]]
 // AMDGCNSPIRV-NEXT:    ret float [[SUB_I]]
 //
 extern "C" __device__ float test___fsub_rn(float x, float y) {
   return __fsub_rn(x, y);
 }
 
-// DEFAULT-LABEL: @test___log10f(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.log10.f32(float [[X:%.*]])
+// DEFAULT-LABEL: define dso_local noundef float @test___log10f(
+// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.log10.f32(float [[X]])
 // DEFAULT-NEXT:    ret float [[TMP0]]
 //
-// FINITEONLY-LABEL: @test___log10f(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.log10.f32(float nofpclass(nan inf) [[X:%.*]])
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test___log10f(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.log10.f32(float nofpclass(nan inf) [[X]])
 // FINITEONLY-NEXT:    ret float [[TMP0]]
 //
-// APPROX-LABEL: @test___log10f(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.log10.f32(float [[X:%.*]])
+// APPROX-LABEL: define dso_local noundef float @test___log10f(
+// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.log10.f32(float [[X]])
 // APPROX-NEXT:    ret float [[TMP0]]
 //
-// NCRDIV-LABEL: @test___log10f(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.log10.f32(float [[X:%.*]])
+// NCRDIV-LABEL: define dso_local noundef float @test___log10f(
+// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.log10.f32(float [[X]])
 // NCRDIV-NEXT:    ret float [[TMP0]]
 //
-// AMDGCNSPIRV-LABEL: @test___log10f(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.log10.f32(float [[X:%.*]])
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test___log10f(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.log10.f32(float [[X]])
 // AMDGCNSPIRV-NEXT:    ret float [[TMP0]]
 //
 extern "C" __device__ float test___log10f(float x) {
   return __log10f(x);
 }
 
-// DEFAULT-LABEL: @test___log2f(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.amdgcn.log.f32(float [[X:%.*]])
+// DEFAULT-LABEL: define dso_local noundef float @test___log2f(
+// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.amdgcn.log.f32(float [[X]])
 // DEFAULT-NEXT:    ret float [[TMP0]]
 //
-// FINITEONLY-LABEL: @test___log2f(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.amdgcn.log.f32(float nofpclass(nan inf) [[X:%.*]])
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test___log2f(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.amdgcn.log.f32(float nofpclass(nan inf) [[X]])
 // FINITEONLY-NEXT:    ret float [[TMP0]]
 //
-// APPROX-LABEL: @test___log2f(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.amdgcn.log.f32(float [[X:%.*]])
+// APPROX-LABEL: define dso_local noundef float @test___log2f(
+// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.amdgcn.log.f32(float [[X]])
 // APPROX-NEXT:    ret float [[TMP0]]
 //
-// NCRDIV-LABEL: @test___log2f(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.amdgcn.log.f32(float [[X:%.*]])
+// NCRDIV-LABEL: define dso_local noundef float @test___log2f(
+// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.amdgcn.log.f32(float [[X]])
 // NCRDIV-NEXT:    ret float [[TMP0]]
 //
-// AMDGCNSPIRV-LABEL: @test___log2f(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.amdgcn.log.f32(float [[X:%.*]])
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test___log2f(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.amdgcn.log.f32(float [[X]])
 // AMDGCNSPIRV-NEXT:    ret float [[TMP0]]
 //
 extern "C" __device__ float test___log2f(float x) {
   return __log2f(x);
 }
 
-// DEFAULT-LABEL: @test___logf(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.log.f32(float [[X:%.*]])
+// DEFAULT-LABEL: define dso_local noundef float @test___logf(
+// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.log.f32(float [[X]])
 // DEFAULT-NEXT:    ret float [[TMP0]]
 //
-// FINITEONLY-LABEL: @test___logf(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.log.f32(float nofpclass(nan inf) [[X:%.*]])
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test___logf(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.log.f32(float nofpclass(nan inf) [[X]])
 // FINITEONLY-NEXT:    ret float [[TMP0]]
 //
-// APPROX-LABEL: @test___logf(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.log.f32(float [[X:%.*]])
+// APPROX-LABEL: define dso_local noundef float @test___logf(
+// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.log.f32(float [[X]])
 // APPROX-NEXT:    ret float [[TMP0]]
 //
-// NCRDIV-LABEL: @test___logf(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.log.f32(float [[X:%.*]])
+// NCRDIV-LABEL: define dso_local noundef float @test___logf(
+// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.log.f32(float [[X]])
 // NCRDIV-NEXT:    ret float [[TMP0]]
 //
-// AMDGCNSPIRV-LABEL: @test___logf(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.log.f32(float [[X:%.*]])
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test___logf(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.log.f32(float [[X]])
 // AMDGCNSPIRV-NEXT:    ret float [[TMP0]]
 //
 extern "C" __device__ float test___logf(float x) {
   return __logf(x);
 }
 
-// DEFAULT-LABEL: @test___powf(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pow_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR15]]
+// DEFAULT-LABEL: define dso_local noundef float @test___powf(
+// DEFAULT-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pow_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test___powf(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_pow_f32(float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR15]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test___powf(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_pow_f32(float noundef nofpclass(nan inf) [[X]], float noundef nofpclass(nan inf) [[Y]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
-// APPROX-LABEL: @test___powf(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pow_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR15]]
+// APPROX-LABEL: define dso_local noundef float @test___powf(
+// APPROX-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pow_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR15]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
-// NCRDIV-LABEL: @test___powf(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pow_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR15]]
+// NCRDIV-LABEL: define dso_local noundef float @test___powf(
+// NCRDIV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pow_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test___powf(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_pow_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR13]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test___powf(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR5]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_pow_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR13]]
 // AMDGCNSPIRV-NEXT:    ret float [[CALL_I]]
 //
 extern "C" __device__ float test___powf(float x, float y) {
   return __powf(x, y);
 }
 
-// DEFAULT-LABEL: @test___saturatef(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CMP_I:%.*]] = fcmp contract olt float [[X:%.*]], 0.000000e+00
+// DEFAULT-LABEL: define dso_local noundef float @test___saturatef(
+// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CMP_I:%.*]] = fcmp contract olt float [[X]], 0.000000e+00
 // DEFAULT-NEXT:    [[CMP1_I:%.*]] = fcmp contract ogt float [[X]], 1.000000e+00
 // DEFAULT-NEXT:    [[COND_I:%.*]] = select contract i1 [[CMP1_I]], float 1.000000e+00, float [[X]]
 // DEFAULT-NEXT:    [[COND5_I:%.*]] = select contract i1 [[CMP_I]], float 0.000000e+00, float [[COND_I]]
 // DEFAULT-NEXT:    ret float [[COND5_I]]
 //
-// FINITEONLY-LABEL: @test___saturatef(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CMP_I:%.*]] = fcmp nnan ninf contract olt float [[X:%.*]], 0.000000e+00
+// FINITEONLY-LABEL: define dso_local nofpclass(nan inf) float @test___saturatef(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CMP_I:%.*]] = fcmp nnan ninf contract olt float [[X]], 0.000000e+00
 // FINITEONLY-NEXT:    [[CMP1_I:%.*]] = fcmp nnan ninf contract ogt float [[X]], 1.000000e+00
 // FINITEONLY-NEXT:    [[COND_I:%.*]] = select nnan ninf contract i1 [[CMP1_I]], float 1.000000e+00, float [[X]]
 // FINITEONLY-NEXT:    [[COND5_I:%.*]] = select nnan ninf contract i1 [[CMP_I]], float 0.000000e+00, float [[COND_I]]
 // FINITEONLY-NEXT:    ret float [[COND5_I]]
 //
-// APPROX-LABEL: @test___saturatef(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CMP_I:%.*]] = fcmp contract olt float [[X:%.*]], 0.000000e+00
+// APPROX-LABEL: define dso_local noundef float @test___saturatef(
+// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CMP_I:%.*]] = fcmp contract olt float [[X]], 0.000000e+00
 // APPROX-NEXT:    [[CMP1_I:%.*]] = fcmp contract ogt float [[X]], 1.000000e+00
 // APPROX-NEXT:    [[COND_I:%.*]] = select contract i1 [[CMP1_I]], float 1.000000e+00, float [[X]]
 // APPROX-NEXT:    [[COND5_I:%.*]] = select contract i1 [[CMP_I]], float 0.000000e+00, float [[COND_I]]
 // APPROX-NEXT:    ret float [[COND5_I]]
 //
-// NCRDIV-LABEL: @test___saturatef(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CMP_I:%.*]] = fcmp contract olt float [[X:%.*]], 0.000000e+00
+// NCRDIV-LABEL: define dso_local noundef float @test___saturatef(
+// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CMP_I:%.*]] = fcmp contract olt float [[X]], 0.000000e+00
 // NCRDIV-NEXT:    [[CMP1_I:%.*]] = fcmp contract ogt float [[X]], 1.000000e+00
 // NCRDIV-NEXT:    [[COND_I:%.*]] = select contract i1 [[CMP1_I]], float 1.000000e+00, float [[X]]
 // NCRDIV-NEXT:    [[COND5_I:%.*]] = select contract i1 [[CMP_I]], float 0.000000e+00, float [[COND_I]]
 // NCRDIV-NEXT:    ret float [[COND5_I]]
 //
-// AMDGCNSPIRV-LABEL: @test___saturatef(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CMP_I:%.*]] = fcmp contract olt float [[X:%.*]], 0.000000e+00
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test___saturatef(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CMP_I:%.*]] = fcmp contract olt float [[X]], 0.000000e+00
 // AMDGCNSPIRV-NEXT:    [[CMP1_I:%.*]] = fcmp contract ogt float [[X]], 1.000000e+00
 // AMDGCNSPIRV-NEXT:    [[COND_I:%.*]] = select contract i1 [[CMP1_I]], float 1.000000e+00, float [[X]]
 // AMDGCNSPIRV-NEXT:    [[COND5_I:%.*]] = select contract i1 [[CMP_I]], float 0.000000e+00, float [[COND_I]]
@@ -7784,114 +8782,129 @@ extern "C" __device__ float test___saturatef(float x) {
   return __saturatef(x);
 }
 
-// DEFAULT-LABEL: @test___sincosf(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract float @__ocml_native_sin_f32(float noundef [[X:%.*]]) #[[ATTR16]]
-// DEFAULT-NEXT:    store float [[CALL_I]], ptr [[Y:%.*]], align 4, !tbaa [[TBAA16]]
+// DEFAULT-LABEL: define dso_local void @test___sincosf(
+// DEFAULT-SAME: float noundef [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Z:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract float @__ocml_native_sin_f32(float noundef [[X]]) #[[ATTR16]]
+// DEFAULT-NEXT:    store float [[CALL_I]], ptr [[Y]], align 4, !tbaa [[FLOAT_TBAA16]]
 // DEFAULT-NEXT:    [[CALL1_I:%.*]] = tail call contract float @__ocml_native_cos_f32(float noundef [[X]]) #[[ATTR16]]
-// DEFAULT-NEXT:    store float [[CALL1_I]], ptr [[Z:%.*]], align 4, !tbaa [[TBAA16]]
+// DEFAULT-NEXT:    store float [[CALL1_I]], ptr [[Z]], align 4, !tbaa [[FLOAT_TBAA16]]
 // DEFAULT-NEXT:    ret void
 //
-// FINITEONLY-LABEL: @test___sincosf(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract nofpclass(nan inf) float @__ocml_native_sin_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
-// FINITEONLY-NEXT:    store float [[CALL_I]], ptr [[Y:%.*]], align 4, !tbaa [[TBAA16]]
+// FINITEONLY-LABEL: define dso_local void @test___sincosf(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Z:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract nofpclass(nan inf) float @__ocml_native_sin_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR16]]
+// FINITEONLY-NEXT:    store float [[CALL_I]], ptr [[Y]], align 4, !tbaa [[FLOAT_TBAA16]]
 // FINITEONLY-NEXT:    [[CALL1_I:%.*]] = tail call nnan ninf contract nofpclass(nan inf) float @__ocml_native_cos_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR16]]
-// FINITEONLY-NEXT:    store float [[CALL1_I]], ptr [[Z:%.*]], align 4, !tbaa [[TBAA16]]
+// FINITEONLY-NEXT:    store float [[CALL1_I]], ptr [[Z]], align 4, !tbaa [[FLOAT_TBAA16]]
 // FINITEONLY-NEXT:    ret void
 //
-// APPROX-LABEL: @test___sincosf(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract float @__ocml_native_sin_f32(float noundef [[X:%.*]]) #[[ATTR16]]
-// APPROX-NEXT:    store float [[CALL_I]], ptr [[Y:%.*]], align 4, !tbaa [[TBAA16]]
+// APPROX-LABEL: define dso_local void @test___sincosf(
+// APPROX-SAME: float noundef [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Z:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract float @__ocml_native_sin_f32(float noundef [[X]]) #[[ATTR16]]
+// APPROX-NEXT:    store float [[CALL_I]], ptr [[Y]], align 4, !tbaa [[FLOAT_TBAA16]]
 // APPROX-NEXT:    [[CALL1_I:%.*]] = tail call contract float @__ocml_native_cos_f32(float noundef [[X]]) #[[ATTR16]]
-// APPROX-NEXT:    store float [[CALL1_I]], ptr [[Z:%.*]], align 4, !tbaa [[TBAA16]]
+// APPROX-NEXT:    store float [[CALL1_I]], ptr [[Z]], align 4, !tbaa [[FLOAT_TBAA16]]
 // APPROX-NEXT:    ret void
 //
-// NCRDIV-LABEL: @test___sincosf(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract float @__ocml_native_sin_f32(float noundef [[X:%.*]]) #[[ATTR16]]
-// NCRDIV-NEXT:    store float [[CALL_I]], ptr [[Y:%.*]], align 4, !tbaa [[TBAA17]]
+// NCRDIV-LABEL: define dso_local void @test___sincosf(
+// NCRDIV-SAME: float noundef [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Z:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract float @__ocml_native_sin_f32(float noundef [[X]]) #[[ATTR16]]
+// NCRDIV-NEXT:    store float [[CALL_I]], ptr [[Y]], align 4, !tbaa [[FLOAT_TBAA17]]
 // NCRDIV-NEXT:    [[CALL1_I:%.*]] = tail call contract float @__ocml_native_cos_f32(float noundef [[X]]) #[[ATTR16]]
-// NCRDIV-NEXT:    store float [[CALL1_I]], ptr [[Z:%.*]], align 4, !tbaa [[TBAA17]]
+// NCRDIV-NEXT:    store float [[CALL1_I]], ptr [[Z]], align 4, !tbaa [[FLOAT_TBAA17]]
 // NCRDIV-NEXT:    ret void
 //
-// AMDGCNSPIRV-LABEL: @test___sincosf(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func addrspace(4) float @__ocml_native_sin_f32(float noundef [[X:%.*]]) #[[ATTR14]]
-// AMDGCNSPIRV-NEXT:    store float [[CALL_I]], ptr addrspace(4) [[Y:%.*]], align 4, !tbaa [[TBAA17]]
+// AMDGCNSPIRV-LABEL: define spir_func void @test___sincosf(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]], ptr addrspace(4) noundef writeonly captures(none) initializes((0, 4)) [[Y:%.*]], ptr addrspace(4) noundef writeonly captures(none) initializes((0, 4)) [[Z:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR6]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func addrspace(4) float @__ocml_native_sin_f32(float noundef [[X]]) #[[ATTR14]]
+// AMDGCNSPIRV-NEXT:    store float [[CALL_I]], ptr addrspace(4) [[Y]], align 4, !tbaa [[FLOAT_TBAA17]]
 // AMDGCNSPIRV-NEXT:    [[CALL1_I:%.*]] = tail call contract spir_func addrspace(4) float @__ocml_native_cos_f32(float noundef [[X]]) #[[ATTR14]]
-// AMDGCNSPIRV-NEXT:    store float [[CALL1_I]], ptr addrspace(4) [[Z:%.*]], align 4, !tbaa [[TBAA17]]
+// AMDGCNSPIRV-NEXT:    store float [[CALL1_I]], ptr addrspace(4) [[Z]], align 4, !tbaa [[FLOAT_TBAA17]]
 // AMDGCNSPIRV-NEXT:    ret void
 //
 extern "C" __device__ void test___sincosf(float x, float *y, float *z) {
   __sincosf(x, y, z);
 }
 
-// DEFAULT-LABEL: @test___sinf(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_sin_f32(float noundef [[X:%.*]]) #[[ATTR16]]
+// DEFAULT-LABEL: define dso_local noundef float @test___sinf(
+// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_sin_f32(float noundef [[X]]) #[[ATTR16]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
-// FINITEONLY-LABEL: @test___sinf(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_native_sin_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test___sinf(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_native_sin_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
-// APPROX-LABEL: @test___sinf(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_sin_f32(float noundef [[X:%.*]]) #[[ATTR16]]
+// APPROX-LABEL: define dso_local noundef float @test___sinf(
+// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_sin_f32(float noundef [[X]]) #[[ATTR16]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
-// NCRDIV-LABEL: @test___sinf(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_sin_f32(float noundef [[X:%.*]]) #[[ATTR16]]
+// NCRDIV-LABEL: define dso_local noundef float @test___sinf(
+// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_sin_f32(float noundef [[X]]) #[[ATTR16]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test___sinf(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_native_sin_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test___sinf(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR6]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_native_sin_f32(float noundef [[X]]) #[[ATTR14]]
 // AMDGCNSPIRV-NEXT:    ret float [[CALL_I]]
 //
 extern "C" __device__ float test___sinf(float x) {
   return __sinf(x);
 }
 
-// DEFAULT-LABEL: @test___tanf(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I3_I:%.*]] = tail call contract noundef float @__ocml_native_sin_f32(float noundef [[X:%.*]]) #[[ATTR16]]
+// DEFAULT-LABEL: define dso_local float @test___tanf(
+// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[CALL_I3_I:%.*]] = tail call contract noundef float @__ocml_native_sin_f32(float noundef [[X]]) #[[ATTR16]]
 // DEFAULT-NEXT:    [[CALL_I_I:%.*]] = tail call contract noundef float @__ocml_native_cos_f32(float noundef [[X]]) #[[ATTR16]]
 // DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract float @llvm.amdgcn.rcp.f32(float [[CALL_I_I]])
 // DEFAULT-NEXT:    [[MUL_I:%.*]] = fmul contract float [[CALL_I3_I]], [[TMP0]]
 // DEFAULT-NEXT:    ret float [[MUL_I]]
 //
-// FINITEONLY-LABEL: @test___tanf(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I3_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_native_sin_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
+// FINITEONLY-LABEL: define dso_local nofpclass(nan inf) float @test___tanf(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[CALL_I3_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_native_sin_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    [[CALL_I_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_native_cos_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract float @llvm.amdgcn.rcp.f32(float [[CALL_I_I]])
 // FINITEONLY-NEXT:    [[MUL_I:%.*]] = fmul nnan ninf contract float [[CALL_I3_I]], [[TMP0]]
 // FINITEONLY-NEXT:    ret float [[MUL_I]]
 //
-// APPROX-LABEL: @test___tanf(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I3_I:%.*]] = tail call contract noundef float @__ocml_native_sin_f32(float noundef [[X:%.*]]) #[[ATTR16]]
+// APPROX-LABEL: define dso_local float @test___tanf(
+// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[CALL_I3_I:%.*]] = tail call contract noundef float @__ocml_native_sin_f32(float noundef [[X]]) #[[ATTR16]]
 // APPROX-NEXT:    [[CALL_I_I:%.*]] = tail call contract noundef float @__ocml_native_cos_f32(float noundef [[X]]) #[[ATTR16]]
 // APPROX-NEXT:    [[TMP0:%.*]] = tail call contract float @llvm.amdgcn.rcp.f32(float [[CALL_I_I]])
 // APPROX-NEXT:    [[MUL_I:%.*]] = fmul contract float [[CALL_I3_I]], [[TMP0]]
 // APPROX-NEXT:    ret float [[MUL_I]]
 //
-// NCRDIV-LABEL: @test___tanf(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I3_I:%.*]] = tail call contract noundef float @__ocml_native_sin_f32(float noundef [[X:%.*]]) #[[ATTR16]]
+// NCRDIV-LABEL: define dso_local float @test___tanf(
+// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[CALL_I3_I:%.*]] = tail call contract noundef float @__ocml_native_sin_f32(float noundef [[X]]) #[[ATTR16]]
 // NCRDIV-NEXT:    [[CALL_I_I:%.*]] = tail call contract noundef float @__ocml_native_cos_f32(float noundef [[X]]) #[[ATTR16]]
 // NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract float @llvm.amdgcn.rcp.f32(float [[CALL_I_I]])
 // NCRDIV-NEXT:    [[MUL_I:%.*]] = fmul contract float [[CALL_I3_I]], [[TMP0]]
 // NCRDIV-NEXT:    ret float [[MUL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test___tanf(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CALL_I3_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_native_sin_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// AMDGCNSPIRV-LABEL: define spir_func float @test___tanf(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR6]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[CALL_I3_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_native_sin_f32(float noundef [[X]]) #[[ATTR14]]
 // AMDGCNSPIRV-NEXT:    [[CALL_I_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_native_cos_f32(float noundef [[X]]) #[[ATTR14]]
 // AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract addrspace(4) float @llvm.amdgcn.rcp.f32(float [[CALL_I_I]])
 // AMDGCNSPIRV-NEXT:    [[MUL_I:%.*]] = fmul contract float [[CALL_I3_I]], [[TMP0]]
@@ -7901,319 +8914,491 @@ extern "C" __device__ float test___tanf(float x) {
   return __tanf(x);
 }
 
-// DEFAULT-LABEL: @test___dadd_rn(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[ADD_I:%.*]] = fadd contract double [[X:%.*]], [[Y:%.*]]
+// DEFAULT-LABEL: define dso_local noundef double @test___dadd_rn(
+// DEFAULT-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[ADD_I:%.*]] = fadd contract double [[X]], [[Y]]
 // DEFAULT-NEXT:    ret double [[ADD_I]]
 //
-// FINITEONLY-LABEL: @test___dadd_rn(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[ADD_I:%.*]] = fadd nnan ninf contract double [[X:%.*]], [[Y:%.*]]
+// FINITEONLY-LABEL: define dso_local nofpclass(nan inf) double @test___dadd_rn(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[ADD_I:%.*]] = fadd nnan ninf contract double [[X]], [[Y]]
 // FINITEONLY-NEXT:    ret double [[ADD_I]]
 //
-// APPROX-LABEL: @test___dadd_rn(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[ADD_I:%.*]] = fadd contract double [[X:%.*]], [[Y:%.*]]
+// APPROX-LABEL: define dso_local noundef double @test___dadd_rn(
+// APPROX-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[ADD_I:%.*]] = fadd contract double [[X]], [[Y]]
 // APPROX-NEXT:    ret double [[ADD_I]]
 //
-// NCRDIV-LABEL: @test___dadd_rn(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[ADD_I:%.*]] = fadd contract double [[X:%.*]], [[Y:%.*]]
+// NCRDIV-LABEL: define dso_local noundef double @test___dadd_rn(
+// NCRDIV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[ADD_I:%.*]] = fadd contract double [[X]], [[Y]]
 // NCRDIV-NEXT:    ret double [[ADD_I]]
 //
-// AMDGCNSPIRV-LABEL: @test___dadd_rn(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[ADD_I:%.*]] = fadd contract double [[X:%.*]], [[Y:%.*]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef double @test___dadd_rn(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[ADD_I:%.*]] = fadd contract double [[X]], [[Y]]
 // AMDGCNSPIRV-NEXT:    ret double [[ADD_I]]
 //
 extern "C" __device__ double test___dadd_rn(double x, double y) {
   return __dadd_rn(x, y);
 }
 
-// DEFAULT-LABEL: @test___ddiv_rn(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[DIV_I:%.*]] = fdiv contract double [[X:%.*]], [[Y:%.*]]
+// DEFAULT-LABEL: define dso_local noundef double @test___ddiv_rn(
+// DEFAULT-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[DIV_I:%.*]] = fdiv contract double [[X]], [[Y]]
 // DEFAULT-NEXT:    ret double [[DIV_I]]
 //
-// FINITEONLY-LABEL: @test___ddiv_rn(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[DIV_I:%.*]] = fdiv nnan ninf contract double [[X:%.*]], [[Y:%.*]]
+// FINITEONLY-LABEL: define dso_local nofpclass(nan inf) double @test___ddiv_rn(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[DIV_I:%.*]] = fdiv nnan ninf contract double [[X]], [[Y]]
 // FINITEONLY-NEXT:    ret double [[DIV_I]]
 //
-// APPROX-LABEL: @test___ddiv_rn(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[DIV_I:%.*]] = fdiv contract double [[X:%.*]], [[Y:%.*]]
+// APPROX-LABEL: define dso_local noundef double @test___ddiv_rn(
+// APPROX-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[DIV_I:%.*]] = fdiv contract double [[X]], [[Y]]
 // APPROX-NEXT:    ret double [[DIV_I]]
 //
-// NCRDIV-LABEL: @test___ddiv_rn(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[DIV_I:%.*]] = fdiv contract double [[X:%.*]], [[Y:%.*]]
+// NCRDIV-LABEL: define dso_local noundef double @test___ddiv_rn(
+// NCRDIV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[DIV_I:%.*]] = fdiv contract double [[X]], [[Y]]
 // NCRDIV-NEXT:    ret double [[DIV_I]]
 //
-// AMDGCNSPIRV-LABEL: @test___ddiv_rn(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[DIV_I:%.*]] = fdiv contract double [[X:%.*]], [[Y:%.*]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef double @test___ddiv_rn(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[DIV_I:%.*]] = fdiv contract double [[X]], [[Y]]
 // AMDGCNSPIRV-NEXT:    ret double [[DIV_I]]
 //
 extern "C" __device__ double test___ddiv_rn(double x, double y) {
   return __ddiv_rn(x, y);
 }
 
-// DEFAULT-LABEL: @test___dmul_rn(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[MUL_I:%.*]] = fmul contract double [[X:%.*]], [[Y:%.*]]
+// DEFAULT-LABEL: define dso_local noundef double @test___dmul_rn(
+// DEFAULT-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[MUL_I:%.*]] = fmul contract double [[X]], [[Y]]
 // DEFAULT-NEXT:    ret double [[MUL_I]]
 //
-// FINITEONLY-LABEL: @test___dmul_rn(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[MUL_I:%.*]] = fmul nnan ninf contract double [[X:%.*]], [[Y:%.*]]
+// FINITEONLY-LABEL: define dso_local nofpclass(nan inf) double @test___dmul_rn(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[MUL_I:%.*]] = fmul nnan ninf contract double [[X]], [[Y]]
 // FINITEONLY-NEXT:    ret double [[MUL_I]]
 //
-// APPROX-LABEL: @test___dmul_rn(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[MUL_I:%.*]] = fmul contract double [[X:%.*]], [[Y:%.*]]
+// APPROX-LABEL: define dso_local noundef double @test___dmul_rn(
+// APPROX-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[MUL_I:%.*]] = fmul contract double [[X]], [[Y]]
 // APPROX-NEXT:    ret double [[MUL_I]]
 //
-// NCRDIV-LABEL: @test___dmul_rn(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[MUL_I:%.*]] = fmul contract double [[X:%.*]], [[Y:%.*]]
+// NCRDIV-LABEL: define dso_local noundef double @test___dmul_rn(
+// NCRDIV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[MUL_I:%.*]] = fmul contract double [[X]], [[Y]]
 // NCRDIV-NEXT:    ret double [[MUL_I]]
 //
-// AMDGCNSPIRV-LABEL: @test___dmul_rn(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[MUL_I:%.*]] = fmul contract double [[X:%.*]], [[Y:%.*]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef double @test___dmul_rn(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[MUL_I:%.*]] = fmul contract double [[X]], [[Y]]
 // AMDGCNSPIRV-NEXT:    ret double [[MUL_I]]
 //
 extern "C" __device__ double test___dmul_rn(double x, double y) {
   return __dmul_rn(x, y);
 }
 
-// DEFAULT-LABEL: @test___drcp_rn(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[DIV_I:%.*]] = fdiv contract double 1.000000e+00, [[X:%.*]]
+// DEFAULT-LABEL: define dso_local noundef double @test___drcp_rn(
+// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[DIV_I:%.*]] = fdiv contract double 1.000000e+00, [[X]]
 // DEFAULT-NEXT:    ret double [[DIV_I]]
 //
-// FINITEONLY-LABEL: @test___drcp_rn(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[DIV_I:%.*]] = fdiv nnan ninf contract double 1.000000e+00, [[X:%.*]]
+// FINITEONLY-LABEL: define dso_local nofpclass(nan inf) double @test___drcp_rn(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[DIV_I:%.*]] = fdiv nnan ninf contract double 1.000000e+00, [[X]]
 // FINITEONLY-NEXT:    ret double [[DIV_I]]
 //
-// APPROX-LABEL: @test___drcp_rn(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[DIV_I:%.*]] = fdiv contract double 1.000000e+00, [[X:%.*]]
+// APPROX-LABEL: define dso_local noundef double @test___drcp_rn(
+// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[DIV_I:%.*]] = fdiv contract double 1.000000e+00, [[X]]
 // APPROX-NEXT:    ret double [[DIV_I]]
 //
-// NCRDIV-LABEL: @test___drcp_rn(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[DIV_I:%.*]] = fdiv contract double 1.000000e+00, [[X:%.*]]
+// NCRDIV-LABEL: define dso_local noundef double @test___drcp_rn(
+// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[DIV_I:%.*]] = fdiv contract double 1.000000e+00, [[X]]
 // NCRDIV-NEXT:    ret double [[DIV_I]]
 //
-// AMDGCNSPIRV-LABEL: @test___drcp_rn(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[DIV_I:%.*]] = fdiv contract double 1.000000e+00, [[X:%.*]]
+// AMDGCNSPIRV-LABEL: define spir_func noundef double @test___drcp_rn(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[DIV_I:%.*]] = fdiv contract double 1.000000e+00, [[X]]
 // AMDGCNSPIRV-NEXT:    ret double [[DIV_I]]
 //
 extern "C" __device__ double test___drcp_rn(double x) {
   return __drcp_rn(x);
 }
 
-// DEFAULT-LABEL: @test___dsqrt_rn(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.sqrt.f64(double [[X:%.*]])
+// DEFAULT-LABEL: define dso_local noundef double @test___dsqrt_rn(
+// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.sqrt.f64(double [[X]])
 // DEFAULT-NEXT:    ret double [[TMP0]]
 //
-// FINITEONLY-LABEL: @test___dsqrt_rn(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef double @llvm.sqrt.f64(double nofpclass(nan inf) [[X:%.*]])
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test___dsqrt_rn(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef double @llvm.sqrt.f64(double nofpclass(nan inf) [[X]])
 // FINITEONLY-NEXT:    ret double [[TMP0]]
 //
-// APPROX-LABEL: @test___dsqrt_rn(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.sqrt.f64(double [[X:%.*]])
+// APPROX-LABEL: define dso_local noundef double @test___dsqrt_rn(
+// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.sqrt.f64(double [[X]])
 // APPROX-NEXT:    ret double [[TMP0]]
 //
-// NCRDIV-LABEL: @test___dsqrt_rn(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.sqrt.f64(double [[X:%.*]])
+// NCRDIV-LABEL: define dso_local noundef double @test___dsqrt_rn(
+// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.sqrt.f64(double [[X]])
 // NCRDIV-NEXT:    ret double [[TMP0]]
 //
-// AMDGCNSPIRV-LABEL: @test___dsqrt_rn(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.sqrt.f64(double [[X:%.*]])
+// AMDGCNSPIRV-LABEL: define spir_func noundef double @test___dsqrt_rn(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.sqrt.f64(double [[X]])
 // AMDGCNSPIRV-NEXT:    ret double [[TMP0]]
 //
 extern "C" __device__ double test___dsqrt_rn(double x) {
   return __dsqrt_rn(x);
 }
 
-// DEFAULT-LABEL: @test__fma_rn(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.fma.f64(double [[X:%.*]], double [[Y:%.*]], double [[Z:%.*]])
+// DEFAULT-LABEL: define dso_local noundef double @test__fma_rn(
+// DEFAULT-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.fma.f64(double [[X]], double [[Y]], double [[Z]])
 // DEFAULT-NEXT:    ret double [[TMP0]]
 //
-// FINITEONLY-LABEL: @test__fma_rn(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef double @llvm.fma.f64(double nofpclass(nan inf) [[X:%.*]], double nofpclass(nan inf) [[Y:%.*]], double nofpclass(nan inf) [[Z:%.*]])
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test__fma_rn(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]], double noundef nofpclass(nan inf) [[Z:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef double @llvm.fma.f64(double nofpclass(nan inf) [[X]], double nofpclass(nan inf) [[Y]], double nofpclass(nan inf) [[Z]])
 // FINITEONLY-NEXT:    ret double [[TMP0]]
 //
-// APPROX-LABEL: @test__fma_rn(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.fma.f64(double [[X:%.*]], double [[Y:%.*]], double [[Z:%.*]])
+// APPROX-LABEL: define dso_local noundef double @test__fma_rn(
+// APPROX-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.fma.f64(double [[X]], double [[Y]], double [[Z]])
 // APPROX-NEXT:    ret double [[TMP0]]
 //
-// NCRDIV-LABEL: @test__fma_rn(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.fma.f64(double [[X:%.*]], double [[Y:%.*]], double [[Z:%.*]])
+// NCRDIV-LABEL: define dso_local noundef double @test__fma_rn(
+// NCRDIV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.fma.f64(double [[X]], double [[Y]], double [[Z]])
 // NCRDIV-NEXT:    ret double [[TMP0]]
 //
-// AMDGCNSPIRV-LABEL: @test__fma_rn(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.fma.f64(double [[X:%.*]], double [[Y:%.*]], double [[Z:%.*]])
+// AMDGCNSPIRV-LABEL: define spir_func noundef double @test__fma_rn(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.fma.f64(double [[X]], double [[Y]], double [[Z]])
 // AMDGCNSPIRV-NEXT:    ret double [[TMP0]]
 //
 extern "C" __device__ double test__fma_rn(double x, double y, double z) {
   return __fma_rn(x, y, z);
 }
 
-// DEFAULT-LABEL: @test_float_min(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.minnum.f32(float [[X:%.*]], float [[Y:%.*]])
+// DEFAULT-LABEL: define dso_local noundef float @test_float_min(
+// DEFAULT-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.minnum.f32(float [[X]], float [[Y]])
 // DEFAULT-NEXT:    ret float [[TMP0]]
 //
-// FINITEONLY-LABEL: @test_float_min(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.minnum.f32(float nofpclass(nan inf) [[X:%.*]], float nofpclass(nan inf) [[Y:%.*]])
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_float_min(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.minnum.f32(float nofpclass(nan inf) [[X]], float nofpclass(nan inf) [[Y]])
 // FINITEONLY-NEXT:    ret float [[TMP0]]
 //
-// APPROX-LABEL: @test_float_min(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.minnum.f32(float [[X:%.*]], float [[Y:%.*]])
+// APPROX-LABEL: define dso_local noundef float @test_float_min(
+// APPROX-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.minnum.f32(float [[X]], float [[Y]])
 // APPROX-NEXT:    ret float [[TMP0]]
 //
-// NCRDIV-LABEL: @test_float_min(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.minnum.f32(float [[X:%.*]], float [[Y:%.*]])
+// NCRDIV-LABEL: define dso_local noundef float @test_float_min(
+// NCRDIV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.minnum.f32(float [[X]], float [[Y]])
 // NCRDIV-NEXT:    ret float [[TMP0]]
 //
-// AMDGCNSPIRV-LABEL: @test_float_min(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.minnum.f32(float [[X:%.*]], float [[Y:%.*]])
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_float_min(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.minnum.f32(float [[X]], float [[Y]])
 // AMDGCNSPIRV-NEXT:    ret float [[TMP0]]
 //
 extern "C" __device__ float test_float_min(float x, float y) {
   return min(x, y);
 }
 
-// DEFAULT-LABEL: @test_float_max(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.maxnum.f32(float [[X:%.*]], float [[Y:%.*]])
+// DEFAULT-LABEL: define dso_local noundef float @test_float_max(
+// DEFAULT-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.maxnum.f32(float [[X]], float [[Y]])
 // DEFAULT-NEXT:    ret float [[TMP0]]
 //
-// FINITEONLY-LABEL: @test_float_max(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.maxnum.f32(float nofpclass(nan inf) [[X:%.*]], float nofpclass(nan inf) [[Y:%.*]])
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_float_max(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.maxnum.f32(float nofpclass(nan inf) [[X]], float nofpclass(nan inf) [[Y]])
 // FINITEONLY-NEXT:    ret float [[TMP0]]
 //
-// APPROX-LABEL: @test_float_max(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.maxnum.f32(float [[X:%.*]], float [[Y:%.*]])
+// APPROX-LABEL: define dso_local noundef float @test_float_max(
+// APPROX-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.maxnum.f32(float [[X]], float [[Y]])
 // APPROX-NEXT:    ret float [[TMP0]]
 //
-// NCRDIV-LABEL: @test_float_max(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.maxnum.f32(float [[X:%.*]], float [[Y:%.*]])
+// NCRDIV-LABEL: define dso_local noundef float @test_float_max(
+// NCRDIV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.maxnum.f32(float [[X]], float [[Y]])
 // NCRDIV-NEXT:    ret float [[TMP0]]
 //
-// AMDGCNSPIRV-LABEL: @test_float_max(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.maxnum.f32(float [[X:%.*]], float [[Y:%.*]])
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_float_max(
+// AMDGCNSPIRV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.maxnum.f32(float [[X]], float [[Y]])
 // AMDGCNSPIRV-NEXT:    ret float [[TMP0]]
 //
 extern "C" __device__ float test_float_max(float x, float y) {
   return max(x, y);
 }
 
-// DEFAULT-LABEL: @test_double_min(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.minnum.f64(double [[X:%.*]], double [[Y:%.*]])
+// DEFAULT-LABEL: define dso_local noundef double @test_double_min(
+// DEFAULT-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.minnum.f64(double [[X]], double [[Y]])
 // DEFAULT-NEXT:    ret double [[TMP0]]
 //
-// FINITEONLY-LABEL: @test_double_min(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef double @llvm.minnum.f64(double nofpclass(nan inf) [[X:%.*]], double nofpclass(nan inf) [[Y:%.*]])
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_double_min(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef double @llvm.minnum.f64(double nofpclass(nan inf) [[X]], double nofpclass(nan inf) [[Y]])
 // FINITEONLY-NEXT:    ret double [[TMP0]]
 //
-// APPROX-LABEL: @test_double_min(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.minnum.f64(double [[X:%.*]], double [[Y:%.*]])
+// APPROX-LABEL: define dso_local noundef double @test_double_min(
+// APPROX-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.minnum.f64(double [[X]], double [[Y]])
 // APPROX-NEXT:    ret double [[TMP0]]
 //
-// NCRDIV-LABEL: @test_double_min(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.minnum.f64(double [[X:%.*]], double [[Y:%.*]])
+// NCRDIV-LABEL: define dso_local noundef double @test_double_min(
+// NCRDIV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.minnum.f64(double [[X]], double [[Y]])
 // NCRDIV-NEXT:    ret double [[TMP0]]
 //
-// AMDGCNSPIRV-LABEL: @test_double_min(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.minnum.f64(double [[X:%.*]], double [[Y:%.*]])
+// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_double_min(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.minnum.f64(double [[X]], double [[Y]])
 // AMDGCNSPIRV-NEXT:    ret double [[TMP0]]
 //
 extern "C" __device__ double test_double_min(double x, double y) {
   return min(x, y);
 }
 
-// DEFAULT-LABEL: @test_double_max(
-// DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.maxnum.f64(double [[X:%.*]], double [[Y:%.*]])
+// DEFAULT-LABEL: define dso_local noundef double @test_double_max(
+// DEFAULT-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// DEFAULT-NEXT:  [[ENTRY:.*:]]
+// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.maxnum.f64(double [[X]], double [[Y]])
 // DEFAULT-NEXT:    ret double [[TMP0]]
 //
-// FINITEONLY-LABEL: @test_double_max(
-// FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef double @llvm.maxnum.f64(double nofpclass(nan inf) [[X:%.*]], double nofpclass(nan inf) [[Y:%.*]])
+// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_double_max(
+// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// FINITEONLY-NEXT:  [[ENTRY:.*:]]
+// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef double @llvm.maxnum.f64(double nofpclass(nan inf) [[X]], double nofpclass(nan inf) [[Y]])
 // FINITEONLY-NEXT:    ret double [[TMP0]]
 //
-// APPROX-LABEL: @test_double_max(
-// APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.maxnum.f64(double [[X:%.*]], double [[Y:%.*]])
+// APPROX-LABEL: define dso_local noundef double @test_double_max(
+// APPROX-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// APPROX-NEXT:  [[ENTRY:.*:]]
+// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.maxnum.f64(double [[X]], double [[Y]])
 // APPROX-NEXT:    ret double [[TMP0]]
 //
-// NCRDIV-LABEL: @test_double_max(
-// NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.maxnum.f64(double [[X:%.*]], double [[Y:%.*]])
+// NCRDIV-LABEL: define dso_local noundef double @test_double_max(
+// NCRDIV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// NCRDIV-NEXT:  [[ENTRY:.*:]]
+// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.maxnum.f64(double [[X]], double [[Y]])
 // NCRDIV-NEXT:    ret double [[TMP0]]
 //
-// AMDGCNSPIRV-LABEL: @test_double_max(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.maxnum.f64(double [[X:%.*]], double [[Y:%.*]])
+// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_double_max(
+// AMDGCNSPIRV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.maxnum.f64(double [[X]], double [[Y]])
 // AMDGCNSPIRV-NEXT:    ret double [[TMP0]]
 //
 extern "C" __device__ double test_double_max(double x, double y) {
   return max(x, y);
 }
-// CHECK-LABEL: @test_int_min(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[COND_I:%.*]] = tail call noundef i32 @llvm.smin.i32(i32 [[X:%.*]], i32 [[Y:%.*]])
+// CHECK-LABEL: define dso_local noundef i32 @test_int_min(
+// CHECK-SAME: i32 noundef [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[COND_I:%.*]] = tail call noundef i32 @llvm.smin.i32(i32 [[X]], i32 [[Y]])
 // CHECK-NEXT:    ret i32 [[COND_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_int_min(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[COND_I:%.*]] = tail call noundef addrspace(4) i32 @llvm.smin.i32(i32 [[X:%.*]], i32 [[Y:%.*]])
+// AMDGCNSPIRV-LABEL: define spir_func noundef i32 @test_int_min(
+// AMDGCNSPIRV-SAME: i32 noundef [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[COND_I:%.*]] = tail call noundef addrspace(4) i32 @llvm.smin.i32(i32 [[X]], i32 [[Y]])
 // AMDGCNSPIRV-NEXT:    ret i32 [[COND_I]]
 //
 extern "C" __device__ int test_int_min(int x, int y) {
   return min(x, y);
 }
 
-// CHECK-LABEL: @test_int_max(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[COND_I:%.*]] = tail call noundef i32 @llvm.smax.i32(i32 [[X:%.*]], i32 [[Y:%.*]])
+// CHECK-LABEL: define dso_local noundef i32 @test_int_max(
+// CHECK-SAME: i32 noundef [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[COND_I:%.*]] = tail call noundef i32 @llvm.smax.i32(i32 [[X]], i32 [[Y]])
 // CHECK-NEXT:    ret i32 [[COND_I]]
 //
-// AMDGCNSPIRV-LABEL: @test_int_max(
-// AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[COND_I:%.*]] = tail call noundef addrspace(4) i32 @llvm.smax.i32(i32 [[X:%.*]], i32 [[Y:%.*]])
+// AMDGCNSPIRV-LABEL: define spir_func noundef i32 @test_int_max(
+// AMDGCNSPIRV-SAME: i32 noundef [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
+// AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT:    [[COND_I:%.*]] = tail call noundef addrspace(4) i32 @llvm.smax.i32(i32 [[X]], i32 [[Y]])
 // AMDGCNSPIRV-NEXT:    ret i32 [[COND_I]]
 //
 extern "C" __device__ int test_int_max(int x, int y) {
   return max(x, y);
 }
+//.
+// DEFAULT: [[CHAR_TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0}
+// DEFAULT: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0}
+// DEFAULT: [[META6]] = !{!"Simple C++ TBAA"}
+// DEFAULT: [[LOOP7]] = distinct !{[[LOOP7]], [[META8:![0-9]+]], [[META9:![0-9]+]]}
+// DEFAULT: [[META8]] = !{!"llvm.loop.mustprogress"}
+// DEFAULT: [[META9]] = !{!"llvm.loop.unroll.disable"}
+// DEFAULT: [[LOOP10]] = distinct !{[[LOOP10]], [[META8]], [[META9]]}
+// DEFAULT: [[LOOP11]] = distinct !{[[LOOP11]], [[META8]], [[META9]]}
+// DEFAULT: [[INT_TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0}
+// DEFAULT: [[META13]] = !{!"int", [[META5]], i64 0}
+// DEFAULT: [[LOOP14]] = distinct !{[[LOOP14]], [[META8]], [[META9]]}
+// DEFAULT: [[LOOP15]] = distinct !{[[LOOP15]], [[META8]], [[META9]]}
+// DEFAULT: [[FLOAT_TBAA16]] = !{[[META17:![0-9]+]], [[META17]], i64 0}
+// DEFAULT: [[META17]] = !{!"float", [[META5]], i64 0}
+// DEFAULT: [[DOUBLE_TBAA18]] = !{[[META19:![0-9]+]], [[META19]], i64 0}
+// DEFAULT: [[META19]] = !{!"double", [[META5]], i64 0}
+// DEFAULT: [[LOOP20]] = distinct !{[[LOOP20]], [[META8]], [[META9]]}
+// DEFAULT: [[LOOP21]] = distinct !{[[LOOP21]], [[META8]], [[META9]]}
+// DEFAULT: [[LOOP22]] = distinct !{[[LOOP22]], [[META8]], [[META9]]}
+// DEFAULT: [[LOOP23]] = distinct !{[[LOOP23]], [[META8]], [[META9]]}
+// DEFAULT: [[LOOP24]] = distinct !{[[LOOP24]], [[META8]], [[META9]]}
+// DEFAULT: [[LOOP25]] = distinct !{[[LOOP25]], [[META8]], [[META9]]}
+//.
+// FINITEONLY: [[CHAR_TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0}
+// FINITEONLY: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0}
+// FINITEONLY: [[META6]] = !{!"Simple C++ TBAA"}
+// FINITEONLY: [[LOOP7]] = distinct !{[[LOOP7]], [[META8:![0-9]+]], [[META9:![0-9]+]]}
+// FINITEONLY: [[META8]] = !{!"llvm.loop.mustprogress"}
+// FINITEONLY: [[META9]] = !{!"llvm.loop.unroll.disable"}
+// FINITEONLY: [[LOOP10]] = distinct !{[[LOOP10]], [[META8]], [[META9]]}
+// FINITEONLY: [[LOOP11]] = distinct !{[[LOOP11]], [[META8]], [[META9]]}
+// FINITEONLY: [[INT_TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0}
+// FINITEONLY: [[META13]] = !{!"int", [[META5]], i64 0}
+// FINITEONLY: [[LOOP14]] = distinct !{[[LOOP14]], [[META8]], [[META9]]}
+// FINITEONLY: [[LOOP15]] = distinct !{[[LOOP15]], [[META8]], [[META9]]}
+// FINITEONLY: [[FLOAT_TBAA16]] = !{[[META17:![0-9]+]], [[META17]], i64 0}
+// FINITEONLY: [[META17]] = !{!"float", [[META5]], i64 0}
+// FINITEONLY: [[DOUBLE_TBAA18]] = !{[[META19:![0-9]+]], [[META19]], i64 0}
+// FINITEONLY: [[META19]] = !{!"double", [[META5]], i64 0}
+// FINITEONLY: [[LOOP20]] = distinct !{[[LOOP20]], [[META8]], [[META9]]}
+// FINITEONLY: [[LOOP21]] = distinct !{[[LOOP21]], [[META8]], [[META9]]}
+// FINITEONLY: [[LOOP22]] = distinct !{[[LOOP22]], [[META8]], [[META9]]}
+// FINITEONLY: [[LOOP23]] = distinct !{[[LOOP23]], [[META8]], [[META9]]}
+// FINITEONLY: [[LOOP24]] = distinct !{[[LOOP24]], [[META8]], [[META9]]}
+// FINITEONLY: [[LOOP25]] = distinct !{[[LOOP25]], [[META8]], [[META9]]}
+//.
+// APPROX: [[CHAR_TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0}
+// APPROX: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0}
+// APPROX: [[META6]] = !{!"Simple C++ TBAA"}
+// APPROX: [[LOOP7]] = distinct !{[[LOOP7]], [[META8:![0-9]+]], [[META9:![0-9]+]]}
+// APPROX: [[META8]] = !{!"llvm.loop.mustprogress"}
+// APPROX: [[META9]] = !{!"llvm.loop.unroll.disable"}
+// APPROX: [[LOOP10]] = distinct !{[[LOOP10]], [[META8]], [[META9]]}
+// APPROX: [[LOOP11]] = distinct !{[[LOOP11]], [[META8]], [[META9]]}
+// APPROX: [[INT_TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0}
+// APPROX: [[META13]] = !{!"int", [[META5]], i64 0}
+// APPROX: [[LOOP14]] = distinct !{[[LOOP14]], [[META8]], [[META9]]}
+// APPROX: [[LOOP15]] = distinct !{[[LOOP15]], [[META8]], [[META9]]}
+// APPROX: [[FLOAT_TBAA16]] = !{[[META17:![0-9]+]], [[META17]], i64 0}
+// APPROX: [[META17]] = !{!"float", [[META5]], i64 0}
+// APPROX: [[DOUBLE_TBAA18]] = !{[[META19:![0-9]+]], [[META19]], i64 0}
+// APPROX: [[META19]] = !{!"double", [[META5]], i64 0}
+// APPROX: [[LOOP20]] = distinct !{[[LOOP20]], [[META8]], [[META9]]}
+// APPROX: [[LOOP21]] = distinct !{[[LOOP21]], [[META8]], [[META9]]}
+// APPROX: [[LOOP22]] = distinct !{[[LOOP22]], [[META8]], [[META9]]}
+// APPROX: [[LOOP23]] = distinct !{[[LOOP23]], [[META8]], [[META9]]}
+// APPROX: [[LOOP24]] = distinct !{[[LOOP24]], [[META8]], [[META9]]}
+// APPROX: [[LOOP25]] = distinct !{[[LOOP25]], [[META8]], [[META9]]}
+//.
+// NCRDIV: [[CHAR_TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0}
+// NCRDIV: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0}
+// NCRDIV: [[META6]] = !{!"Simple C++ TBAA"}
+// NCRDIV: [[LOOP7]] = distinct !{[[LOOP7]], [[META8:![0-9]+]], [[META9:![0-9]+]]}
+// NCRDIV: [[META8]] = !{!"llvm.loop.mustprogress"}
+// NCRDIV: [[META9]] = !{!"llvm.loop.unroll.disable"}
+// NCRDIV: [[LOOP10]] = distinct !{[[LOOP10]], [[META8]], [[META9]]}
+// NCRDIV: [[LOOP11]] = distinct !{[[LOOP11]], [[META8]], [[META9]]}
+// NCRDIV: [[META12]] = !{float 2.500000e+00}
+// NCRDIV: [[INT_TBAA13]] = !{[[META14:![0-9]+]], [[META14]], i64 0}
+// NCRDIV: [[META14]] = !{!"int", [[META5]], i64 0}
+// NCRDIV: [[LOOP15]] = distinct !{[[LOOP15]], [[META8]], [[META9]]}
+// NCRDIV: [[LOOP16]] = distinct !{[[LOOP16]], [[META8]], [[META9]]}
+// NCRDIV: [[FLOAT_TBAA17]] = !{[[META18:![0-9]+]], [[META18]], i64 0}
+// NCRDIV: [[META18]] = !{!"float", [[META5]], i64 0}
+// NCRDIV: [[DOUBLE_TBAA19]] = !{[[META20:![0-9]+]], [[META20]], i64 0}
+// NCRDIV: [[META20]] = !{!"double", [[META5]], i64 0}
+// NCRDIV: [[LOOP21]] = distinct !{[[LOOP21]], [[META8]], [[META9]]}
+// NCRDIV: [[LOOP22]] = distinct !{[[LOOP22]], [[META8]], [[META9]]}
+// NCRDIV: [[LOOP23]] = distinct !{[[LOOP23]], [[META8]], [[META9]]}
+// NCRDIV: [[LOOP24]] = distinct !{[[LOOP24]], [[META8]], [[META9]]}
+// NCRDIV: [[META25]] = !{float 3.000000e+00}
+// NCRDIV: [[LOOP26]] = distinct !{[[LOOP26]], [[META8]], [[META9]]}
+// NCRDIV: [[LOOP27]] = distinct !{[[LOOP27]], [[META8]], [[META9]]}
+//.
+// AMDGCNSPIRV: [[CHAR_TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0}
+// AMDGCNSPIRV: [[META6]] = !{!"omnipotent char", [[META7:![0-9]+]], i64 0}
+// AMDGCNSPIRV: [[META7]] = !{!"Simple C++ TBAA"}
+// AMDGCNSPIRV: [[LOOP8]] = distinct !{[[LOOP8]], [[META9:![0-9]+]], [[META10:![0-9]+]]}
+// AMDGCNSPIRV: [[META9]] = !{!"llvm.loop.mustprogress"}
+// AMDGCNSPIRV: [[META10]] = !{!"llvm.loop.unroll.disable"}
+// AMDGCNSPIRV: [[LOOP11]] = distinct !{[[LOOP11]], [[META9]], [[META10]]}
+// AMDGCNSPIRV: [[LOOP12]] = distinct !{[[LOOP12]], [[META9]], [[META10]]}
+// AMDGCNSPIRV: [[INT_TBAA13]] = !{[[META14:![0-9]+]], [[META14]], i64 0}
+// AMDGCNSPIRV: [[META14]] = !{!"int", [[META6]], i64 0}
+// AMDGCNSPIRV: [[LOOP15]] = distinct !{[[LOOP15]], [[META9]], [[META10]]}
+// AMDGCNSPIRV: [[LOOP16]] = distinct !{[[LOOP16]], [[META9]], [[META10]]}
+// AMDGCNSPIRV: [[FLOAT_TBAA17]] = !{[[META18:![0-9]+]], [[META18]], i64 0}
+// AMDGCNSPIRV: [[META18]] = !{!"float", [[META6]], i64 0}
+// AMDGCNSPIRV: [[DOUBLE_TBAA19]] = !{[[META20:![0-9]+]], [[META20]], i64 0}
+// AMDGCNSPIRV: [[META20]] = !{!"double", [[META6]], i64 0}
+// AMDGCNSPIRV: [[LOOP21]] = distinct !{[[LOOP21]], [[META9]], [[META10]]}
+// AMDGCNSPIRV: [[LOOP22]] = distinct !{[[LOOP22]], [[META9]], [[META10]]}
+// AMDGCNSPIRV: [[LOOP23]] = distinct !{[[LOOP23]], [[META9]], [[META10]]}
+// AMDGCNSPIRV: [[LOOP24]] = distinct !{[[LOOP24]], [[META9]], [[META10]]}
+// AMDGCNSPIRV: [[LOOP25]] = distinct !{[[LOOP25]], [[META9]], [[META10]]}
+// AMDGCNSPIRV: [[LOOP26]] = distinct !{[[LOOP26]], [[META9]], [[META10]]}
+//.
diff --git a/clang/test/Headers/wasm.c b/clang/test/Headers/wasm.c
index d277562..7f427ca 100644
--- a/clang/test/Headers/wasm.c
+++ b/clang/test/Headers/wasm.c
@@ -1,4 +1,4 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
 // REQUIRES: webassembly-registered-target, asserts
 
 // FIXME: This should not be using -O2 and implicitly testing the entire IR opt pipeline.
@@ -7,18 +7,20 @@
 
 #include <wasm_simd128.h>
 
-// CHECK-LABEL: @test_v128_load(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, ptr [[MEM:%.*]], align 1, !tbaa [[TBAA2:![0-9]+]]
+// CHECK-LABEL: define hidden <4 x i32> @test_v128_load(
+// CHECK-SAME: ptr noundef readonly captures(none) [[MEM:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, ptr [[MEM]], align 1, !tbaa [[CHAR_TBAA2:![0-9]+]]
 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
 //
 v128_t test_v128_load(const void *mem) {
   return wasm_v128_load(mem);
 }
 
-// CHECK-LABEL: @test_v128_load8_splat(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[MEM:%.*]], align 1, !tbaa [[TBAA2]]
+// CHECK-LABEL: define hidden <4 x i32> @test_v128_load8_splat(
+// CHECK-SAME: ptr noundef readonly captures(none) [[MEM:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[MEM]], align 1, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <16 x i8> poison, i8 [[TMP0]], i64 0
 // CHECK-NEXT:    [[VECINIT16_I:%.*]] = shufflevector <16 x i8> [[VECINIT_I]], <16 x i8> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[VECINIT16_I]] to <4 x i32>
@@ -28,9 +30,10 @@ v128_t test_v128_load8_splat(const void *mem) {
   return wasm_v128_load8_splat(mem);
 }
 
-// CHECK-LABEL: @test_v128_load16_splat(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i16, ptr [[MEM:%.*]], align 1, !tbaa [[TBAA2]]
+// CHECK-LABEL: define hidden <4 x i32> @test_v128_load16_splat(
+// CHECK-SAME: ptr noundef readonly captures(none) [[MEM:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load i16, ptr [[MEM]], align 1, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <8 x i16> poison, i16 [[TMP0]], i64 0
 // CHECK-NEXT:    [[VECINIT8_I:%.*]] = shufflevector <8 x i16> [[VECINIT_I]], <8 x i16> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> [[VECINIT8_I]] to <4 x i32>
@@ -40,9 +43,10 @@ v128_t test_v128_load16_splat(const void *mem) {
   return wasm_v128_load16_splat(mem);
 }
 
-// CHECK-LABEL: @test_v128_load32_splat(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[MEM:%.*]], align 1, !tbaa [[TBAA2]]
+// CHECK-LABEL: define hidden <4 x i32> @test_v128_load32_splat(
+// CHECK-SAME: ptr noundef readonly captures(none) [[MEM:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[MEM]], align 1, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i64 0
 // CHECK-NEXT:    [[VECINIT4_I:%.*]] = shufflevector <4 x i32> [[VECINIT_I]], <4 x i32> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    ret <4 x i32> [[VECINIT4_I]]
@@ -51,9 +55,10 @@ v128_t test_v128_load32_splat(const void *mem) {
   return wasm_v128_load32_splat(mem);
 }
 
-// CHECK-LABEL: @test_v128_load64_splat(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr [[MEM:%.*]], align 1, !tbaa [[TBAA2]]
+// CHECK-LABEL: define hidden <4 x i32> @test_v128_load64_splat(
+// CHECK-SAME: ptr noundef readonly captures(none) [[MEM:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr [[MEM]], align 1, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <2 x i64> poison, i64 [[TMP0]], i64 0
 // CHECK-NEXT:    [[VECINIT2_I:%.*]] = shufflevector <2 x i64> [[VECINIT_I]], <2 x i64> poison, <2 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[VECINIT2_I]] to <4 x i32>
@@ -63,9 +68,10 @@ v128_t test_v128_load64_splat(const void *mem) {
   return wasm_v128_load64_splat(mem);
 }
 
-// CHECK-LABEL: @test_i16x8_load8x8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i8>, ptr [[MEM:%.*]], align 1, !tbaa [[TBAA2]]
+// CHECK-LABEL: define hidden <4 x i32> @test_i16x8_load8x8(
+// CHECK-SAME: ptr noundef readonly captures(none) [[MEM:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i8>, ptr [[MEM]], align 1, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[CONV_I:%.*]] = sext <8 x i8> [[TMP0]] to <8 x i16>
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> [[CONV_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
@@ -74,9 +80,10 @@ v128_t test_i16x8_load8x8(const void *mem) {
   return wasm_i16x8_load8x8(mem);
 }
 
-// CHECK-LABEL: @test_u16x8_load8x8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i8>, ptr [[MEM:%.*]], align 1, !tbaa [[TBAA2]]
+// CHECK-LABEL: define hidden <4 x i32> @test_u16x8_load8x8(
+// CHECK-SAME: ptr noundef readonly captures(none) [[MEM:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i8>, ptr [[MEM]], align 1, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[CONV_I:%.*]] = zext <8 x i8> [[TMP0]] to <8 x i16>
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> [[CONV_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
@@ -85,9 +92,10 @@ v128_t test_u16x8_load8x8(const void *mem) {
   return wasm_u16x8_load8x8(mem);
 }
 
-// CHECK-LABEL: @test_i32x4_load16x4(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i16>, ptr [[MEM:%.*]], align 1, !tbaa [[TBAA2]]
+// CHECK-LABEL: define hidden range(i32 -32768, 32768) <4 x i32> @test_i32x4_load16x4(
+// CHECK-SAME: ptr noundef readonly captures(none) [[MEM:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i16>, ptr [[MEM]], align 1, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[CONV_I:%.*]] = sext <4 x i16> [[TMP0]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[CONV_I]]
 //
@@ -95,9 +103,10 @@ v128_t test_i32x4_load16x4(const void *mem) {
   return wasm_i32x4_load16x4(mem);
 }
 
-// CHECK-LABEL: @test_u32x4_load16x4(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i16>, ptr [[MEM:%.*]], align 1, !tbaa [[TBAA2]]
+// CHECK-LABEL: define hidden range(i32 0, 65536) <4 x i32> @test_u32x4_load16x4(
+// CHECK-SAME: ptr noundef readonly captures(none) [[MEM:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i16>, ptr [[MEM]], align 1, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[CONV_I:%.*]] = zext <4 x i16> [[TMP0]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[CONV_I]]
 //
@@ -105,9 +114,10 @@ v128_t test_u32x4_load16x4(const void *mem) {
   return wasm_u32x4_load16x4(mem);
 }
 
-// CHECK-LABEL: @test_i64x2_load32x2(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i32>, ptr [[MEM:%.*]], align 1, !tbaa [[TBAA2]]
+// CHECK-LABEL: define hidden <4 x i32> @test_i64x2_load32x2(
+// CHECK-SAME: ptr noundef readonly captures(none) [[MEM:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i32>, ptr [[MEM]], align 1, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[CONV_I:%.*]] = sext <2 x i32> [[TMP0]] to <2 x i64>
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[CONV_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
@@ -116,9 +126,10 @@ v128_t test_i64x2_load32x2(const void *mem) {
   return wasm_i64x2_load32x2(mem);
 }
 
-// CHECK-LABEL: @test_u64x2_load32x2(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i32>, ptr [[MEM:%.*]], align 1, !tbaa [[TBAA2]]
+// CHECK-LABEL: define hidden <4 x i32> @test_u64x2_load32x2(
+// CHECK-SAME: ptr noundef readonly captures(none) [[MEM:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i32>, ptr [[MEM]], align 1, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[CONV_I:%.*]] = zext <2 x i32> [[TMP0]] to <2 x i64>
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[CONV_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
@@ -127,9 +138,10 @@ v128_t test_u64x2_load32x2(const void *mem) {
   return wasm_u64x2_load32x2(mem);
 }
 
-// CHECK-LABEL: @test_v128_load32_zero(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[MEM:%.*]], align 1, !tbaa [[TBAA2]]
+// CHECK-LABEL: define hidden <4 x i32> @test_v128_load32_zero(
+// CHECK-SAME: ptr noundef readonly captures(none) [[MEM:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[MEM]], align 1, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[VECINIT4_I:%.*]] = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 [[TMP0]], i64 0
 // CHECK-NEXT:    ret <4 x i32> [[VECINIT4_I]]
 //
@@ -137,9 +149,10 @@ v128_t test_v128_load32_zero(const void *mem) {
   return wasm_v128_load32_zero(mem);
 }
 
-// CHECK-LABEL: @test_v128_load64_zero(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr [[MEM:%.*]], align 1, !tbaa [[TBAA2]]
+// CHECK-LABEL: define hidden <4 x i32> @test_v128_load64_zero(
+// CHECK-SAME: ptr noundef readonly captures(none) [[MEM:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr [[MEM]], align 1, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    [[VECINIT2_I:%.*]] = insertelement <2 x i64> <i64 poison, i64 0>, i64 [[TMP0]], i64 0
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[VECINIT2_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
@@ -148,10 +161,11 @@ v128_t test_v128_load64_zero(const void *mem) {
   return wasm_v128_load64_zero(mem);
 }
 
-// CHECK-LABEL: @test_v128_load8_lane(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[PTR:%.*]], align 1, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[VEC:%.*]] to <16 x i8>
+// CHECK-LABEL: define hidden <4 x i32> @test_v128_load8_lane(
+// CHECK-SAME: ptr noundef readonly captures(none) [[PTR:%.*]], <4 x i32> noundef [[VEC:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[PTR]], align 1, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[VEC]] to <16 x i8>
 // CHECK-NEXT:    [[VECINS_I:%.*]] = insertelement <16 x i8> [[TMP1]], i8 [[TMP0]], i64 15
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[VECINS_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
@@ -160,10 +174,11 @@ v128_t test_v128_load8_lane(const uint8_t *ptr, v128_t vec) {
   return wasm_v128_load8_lane(ptr, vec, 15);
 }
 
-// CHECK-LABEL: @test_v128_load16_lane(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i16, ptr [[PTR:%.*]], align 1, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[VEC:%.*]] to <8 x i16>
+// CHECK-LABEL: define hidden <4 x i32> @test_v128_load16_lane(
+// CHECK-SAME: ptr noundef readonly captures(none) [[PTR:%.*]], <4 x i32> noundef [[VEC:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load i16, ptr [[PTR]], align 1, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[VEC]] to <8 x i16>
 // CHECK-NEXT:    [[VECINS_I:%.*]] = insertelement <8 x i16> [[TMP1]], i16 [[TMP0]], i64 7
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[VECINS_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
@@ -172,20 +187,22 @@ v128_t test_v128_load16_lane(const uint16_t *ptr, v128_t vec) {
   return wasm_v128_load16_lane(ptr, vec, 7);
 }
 
-// CHECK-LABEL: @test_v128_load32_lane(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[PTR:%.*]], align 1, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[VECINS_I:%.*]] = insertelement <4 x i32> [[VEC:%.*]], i32 [[TMP0]], i64 3
+// CHECK-LABEL: define hidden <4 x i32> @test_v128_load32_lane(
+// CHECK-SAME: ptr noundef readonly captures(none) [[PTR:%.*]], <4 x i32> noundef [[VEC:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[PTR]], align 1, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[VECINS_I:%.*]] = insertelement <4 x i32> [[VEC]], i32 [[TMP0]], i64 3
 // CHECK-NEXT:    ret <4 x i32> [[VECINS_I]]
 //
 v128_t test_v128_load32_lane(const uint32_t *ptr, v128_t vec) {
   return wasm_v128_load32_lane(ptr, vec, 3);
 }
 
-// CHECK-LABEL: @test_v128_load64_lane(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr [[PTR:%.*]], align 1, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[VEC:%.*]] to <2 x i64>
+// CHECK-LABEL: define hidden <4 x i32> @test_v128_load64_lane(
+// CHECK-SAME: ptr noundef readonly captures(none) [[PTR:%.*]], <4 x i32> noundef [[VEC:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr [[PTR]], align 1, !tbaa [[CHAR_TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[VEC]] to <2 x i64>
 // CHECK-NEXT:    [[VECINS_I:%.*]] = insertelement <2 x i64> [[TMP1]], i64 [[TMP0]], i64 1
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[VECINS_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
@@ -194,76 +211,82 @@ v128_t test_v128_load64_lane(const uint64_t *ptr, v128_t vec) {
   return wasm_v128_load64_lane(ptr, vec, 1);
 }
 
-// CHECK-LABEL: @test_v128_store(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    store <4 x i32> [[A:%.*]], ptr [[MEM:%.*]], align 1, !tbaa [[TBAA2]]
+// CHECK-LABEL: define hidden void @test_v128_store(
+// CHECK-SAME: ptr noundef writeonly captures(none) initializes((0, 16)) [[MEM:%.*]], <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    store <4 x i32> [[A]], ptr [[MEM]], align 1, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 void test_v128_store(void *mem, v128_t a) {
   wasm_v128_store(mem, a);
 }
 
-// CHECK-LABEL: @test_v128_store8_lane(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[VEC:%.*]] to <16 x i8>
+// CHECK-LABEL: define hidden void @test_v128_store8_lane(
+// CHECK-SAME: ptr noundef writeonly captures(none) initializes((0, 1)) [[PTR:%.*]], <4 x i32> noundef [[VEC:%.*]]) local_unnamed_addr #[[ATTR1]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[VEC]] to <16 x i8>
 // CHECK-NEXT:    [[VECEXT_I:%.*]] = extractelement <16 x i8> [[TMP0]], i64 15
-// CHECK-NEXT:    store i8 [[VECEXT_I]], ptr [[PTR:%.*]], align 1, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store i8 [[VECEXT_I]], ptr [[PTR]], align 1, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 void test_v128_store8_lane(uint8_t *ptr, v128_t vec) {
   wasm_v128_store8_lane(ptr, vec, 15);
 }
 
-// CHECK-LABEL: @test_v128_store16_lane(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[VEC:%.*]] to <8 x i16>
+// CHECK-LABEL: define hidden void @test_v128_store16_lane(
+// CHECK-SAME: ptr noundef writeonly captures(none) initializes((0, 2)) [[PTR:%.*]], <4 x i32> noundef [[VEC:%.*]]) local_unnamed_addr #[[ATTR1]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[VEC]] to <8 x i16>
 // CHECK-NEXT:    [[VECEXT_I:%.*]] = extractelement <8 x i16> [[TMP0]], i64 7
-// CHECK-NEXT:    store i16 [[VECEXT_I]], ptr [[PTR:%.*]], align 1, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store i16 [[VECEXT_I]], ptr [[PTR]], align 1, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 void test_v128_store16_lane(uint16_t *ptr, v128_t vec) {
   wasm_v128_store16_lane(ptr, vec, 7);
 }
 
-// CHECK-LABEL: @test_v128_store32_lane(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VECEXT_I:%.*]] = extractelement <4 x i32> [[VEC:%.*]], i64 3
-// CHECK-NEXT:    store i32 [[VECEXT_I]], ptr [[PTR:%.*]], align 1, !tbaa [[TBAA2]]
+// CHECK-LABEL: define hidden void @test_v128_store32_lane(
+// CHECK-SAME: ptr noundef writeonly captures(none) initializes((0, 4)) [[PTR:%.*]], <4 x i32> noundef [[VEC:%.*]]) local_unnamed_addr #[[ATTR1]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VECEXT_I:%.*]] = extractelement <4 x i32> [[VEC]], i64 3
+// CHECK-NEXT:    store i32 [[VECEXT_I]], ptr [[PTR]], align 1, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 void test_v128_store32_lane(uint32_t *ptr, v128_t vec) {
   wasm_v128_store32_lane(ptr, vec, 3);
 }
 
-// CHECK-LABEL: @test_v128_store64_lane(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[VEC:%.*]] to <2 x i64>
+// CHECK-LABEL: define hidden void @test_v128_store64_lane(
+// CHECK-SAME: ptr noundef writeonly captures(none) initializes((0, 8)) [[PTR:%.*]], <4 x i32> noundef [[VEC:%.*]]) local_unnamed_addr #[[ATTR1]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[VEC]] to <2 x i64>
 // CHECK-NEXT:    [[VECEXT_I:%.*]] = extractelement <2 x i64> [[TMP0]], i64 1
-// CHECK-NEXT:    store i64 [[VECEXT_I]], ptr [[PTR:%.*]], align 1, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store i64 [[VECEXT_I]], ptr [[PTR]], align 1, !tbaa [[CHAR_TBAA2]]
 // CHECK-NEXT:    ret void
 //
 void test_v128_store64_lane(uint64_t *ptr, v128_t vec) {
   wasm_v128_store64_lane(ptr, vec, 1);
 }
 
-// CHECK-LABEL: @test_i8x16_make(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <16 x i8> poison, i8 [[C0:%.*]], i64 0
-// CHECK-NEXT:    [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 [[C1:%.*]], i64 1
-// CHECK-NEXT:    [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 [[C2:%.*]], i64 2
-// CHECK-NEXT:    [[VECINIT3_I:%.*]] = insertelement <16 x i8> [[VECINIT2_I]], i8 [[C3:%.*]], i64 3
-// CHECK-NEXT:    [[VECINIT4_I:%.*]] = insertelement <16 x i8> [[VECINIT3_I]], i8 [[C4:%.*]], i64 4
-// CHECK-NEXT:    [[VECINIT5_I:%.*]] = insertelement <16 x i8> [[VECINIT4_I]], i8 [[C5:%.*]], i64 5
-// CHECK-NEXT:    [[VECINIT6_I:%.*]] = insertelement <16 x i8> [[VECINIT5_I]], i8 [[C6:%.*]], i64 6
-// CHECK-NEXT:    [[VECINIT7_I:%.*]] = insertelement <16 x i8> [[VECINIT6_I]], i8 [[C7:%.*]], i64 7
-// CHECK-NEXT:    [[VECINIT8_I:%.*]] = insertelement <16 x i8> [[VECINIT7_I]], i8 [[C8:%.*]], i64 8
-// CHECK-NEXT:    [[VECINIT9_I:%.*]] = insertelement <16 x i8> [[VECINIT8_I]], i8 [[C9:%.*]], i64 9
-// CHECK-NEXT:    [[VECINIT10_I:%.*]] = insertelement <16 x i8> [[VECINIT9_I]], i8 [[C10:%.*]], i64 10
-// CHECK-NEXT:    [[VECINIT11_I:%.*]] = insertelement <16 x i8> [[VECINIT10_I]], i8 [[C11:%.*]], i64 11
-// CHECK-NEXT:    [[VECINIT12_I:%.*]] = insertelement <16 x i8> [[VECINIT11_I]], i8 [[C12:%.*]], i64 12
-// CHECK-NEXT:    [[VECINIT13_I:%.*]] = insertelement <16 x i8> [[VECINIT12_I]], i8 [[C13:%.*]], i64 13
-// CHECK-NEXT:    [[VECINIT14_I:%.*]] = insertelement <16 x i8> [[VECINIT13_I]], i8 [[C14:%.*]], i64 14
-// CHECK-NEXT:    [[VECINIT15_I:%.*]] = insertelement <16 x i8> [[VECINIT14_I]], i8 [[C15:%.*]], i64 15
+// CHECK-LABEL: define hidden <4 x i32> @test_i8x16_make(
+// CHECK-SAME: i8 noundef signext [[C0:%.*]], i8 noundef signext [[C1:%.*]], i8 noundef signext [[C2:%.*]], i8 noundef signext [[C3:%.*]], i8 noundef signext [[C4:%.*]], i8 noundef signext [[C5:%.*]], i8 noundef signext [[C6:%.*]], i8 noundef signext [[C7:%.*]], i8 noundef signext [[C8:%.*]], i8 noundef signext [[C9:%.*]], i8 noundef signext [[C10:%.*]], i8 noundef signext [[C11:%.*]], i8 noundef signext [[C12:%.*]], i8 noundef signext [[C13:%.*]], i8 noundef signext [[C14:%.*]], i8 noundef signext [[C15:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <16 x i8> poison, i8 [[C0]], i64 0
+// CHECK-NEXT:    [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 [[C1]], i64 1
+// CHECK-NEXT:    [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 [[C2]], i64 2
+// CHECK-NEXT:    [[VECINIT3_I:%.*]] = insertelement <16 x i8> [[VECINIT2_I]], i8 [[C3]], i64 3
+// CHECK-NEXT:    [[VECINIT4_I:%.*]] = insertelement <16 x i8> [[VECINIT3_I]], i8 [[C4]], i64 4
+// CHECK-NEXT:    [[VECINIT5_I:%.*]] = insertelement <16 x i8> [[VECINIT4_I]], i8 [[C5]], i64 5
+// CHECK-NEXT:    [[VECINIT6_I:%.*]] = insertelement <16 x i8> [[VECINIT5_I]], i8 [[C6]], i64 6
+// CHECK-NEXT:    [[VECINIT7_I:%.*]] = insertelement <16 x i8> [[VECINIT6_I]], i8 [[C7]], i64 7
+// CHECK-NEXT:    [[VECINIT8_I:%.*]] = insertelement <16 x i8> [[VECINIT7_I]], i8 [[C8]], i64 8
+// CHECK-NEXT:    [[VECINIT9_I:%.*]] = insertelement <16 x i8> [[VECINIT8_I]], i8 [[C9]], i64 9
+// CHECK-NEXT:    [[VECINIT10_I:%.*]] = insertelement <16 x i8> [[VECINIT9_I]], i8 [[C10]], i64 10
+// CHECK-NEXT:    [[VECINIT11_I:%.*]] = insertelement <16 x i8> [[VECINIT10_I]], i8 [[C11]], i64 11
+// CHECK-NEXT:    [[VECINIT12_I:%.*]] = insertelement <16 x i8> [[VECINIT11_I]], i8 [[C12]], i64 12
+// CHECK-NEXT:    [[VECINIT13_I:%.*]] = insertelement <16 x i8> [[VECINIT12_I]], i8 [[C13]], i64 13
+// CHECK-NEXT:    [[VECINIT14_I:%.*]] = insertelement <16 x i8> [[VECINIT13_I]], i8 [[C14]], i64 14
+// CHECK-NEXT:    [[VECINIT15_I:%.*]] = insertelement <16 x i8> [[VECINIT14_I]], i8 [[C15]], i64 15
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <16 x i8> [[VECINIT15_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
 //
@@ -271,24 +294,25 @@ v128_t test_i8x16_make(int8_t c0, int8_t c1, int8_t c2, int8_t c3, int8_t c4, in
   return wasm_i8x16_make(c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15);
 }
 
-// CHECK-LABEL: @test_u8x16_make(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <16 x i8> poison, i8 [[C0:%.*]], i64 0
-// CHECK-NEXT:    [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 [[C1:%.*]], i64 1
-// CHECK-NEXT:    [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 [[C2:%.*]], i64 2
-// CHECK-NEXT:    [[VECINIT3_I:%.*]] = insertelement <16 x i8> [[VECINIT2_I]], i8 [[C3:%.*]], i64 3
-// CHECK-NEXT:    [[VECINIT4_I:%.*]] = insertelement <16 x i8> [[VECINIT3_I]], i8 [[C4:%.*]], i64 4
-// CHECK-NEXT:    [[VECINIT5_I:%.*]] = insertelement <16 x i8> [[VECINIT4_I]], i8 [[C5:%.*]], i64 5
-// CHECK-NEXT:    [[VECINIT6_I:%.*]] = insertelement <16 x i8> [[VECINIT5_I]], i8 [[C6:%.*]], i64 6
-// CHECK-NEXT:    [[VECINIT7_I:%.*]] = insertelement <16 x i8> [[VECINIT6_I]], i8 [[C7:%.*]], i64 7
-// CHECK-NEXT:    [[VECINIT8_I:%.*]] = insertelement <16 x i8> [[VECINIT7_I]], i8 [[C8:%.*]], i64 8
-// CHECK-NEXT:    [[VECINIT9_I:%.*]] = insertelement <16 x i8> [[VECINIT8_I]], i8 [[C9:%.*]], i64 9
-// CHECK-NEXT:    [[VECINIT10_I:%.*]] = insertelement <16 x i8> [[VECINIT9_I]], i8 [[C10:%.*]], i64 10
-// CHECK-NEXT:    [[VECINIT11_I:%.*]] = insertelement <16 x i8> [[VECINIT10_I]], i8 [[C11:%.*]], i64 11
-// CHECK-NEXT:    [[VECINIT12_I:%.*]] = insertelement <16 x i8> [[VECINIT11_I]], i8 [[C12:%.*]], i64 12
-// CHECK-NEXT:    [[VECINIT13_I:%.*]] = insertelement <16 x i8> [[VECINIT12_I]], i8 [[C13:%.*]], i64 13
-// CHECK-NEXT:    [[VECINIT14_I:%.*]] = insertelement <16 x i8> [[VECINIT13_I]], i8 [[C14:%.*]], i64 14
-// CHECK-NEXT:    [[VECINIT15_I:%.*]] = insertelement <16 x i8> [[VECINIT14_I]], i8 [[C15:%.*]], i64 15
+// CHECK-LABEL: define hidden <4 x i32> @test_u8x16_make(
+// CHECK-SAME: i8 noundef zeroext [[C0:%.*]], i8 noundef zeroext [[C1:%.*]], i8 noundef zeroext [[C2:%.*]], i8 noundef zeroext [[C3:%.*]], i8 noundef zeroext [[C4:%.*]], i8 noundef zeroext [[C5:%.*]], i8 noundef zeroext [[C6:%.*]], i8 noundef zeroext [[C7:%.*]], i8 noundef zeroext [[C8:%.*]], i8 noundef zeroext [[C9:%.*]], i8 noundef zeroext [[C10:%.*]], i8 noundef zeroext [[C11:%.*]], i8 noundef zeroext [[C12:%.*]], i8 noundef zeroext [[C13:%.*]], i8 noundef zeroext [[C14:%.*]], i8 noundef zeroext [[C15:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <16 x i8> poison, i8 [[C0]], i64 0
+// CHECK-NEXT:    [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 [[C1]], i64 1
+// CHECK-NEXT:    [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 [[C2]], i64 2
+// CHECK-NEXT:    [[VECINIT3_I:%.*]] = insertelement <16 x i8> [[VECINIT2_I]], i8 [[C3]], i64 3
+// CHECK-NEXT:    [[VECINIT4_I:%.*]] = insertelement <16 x i8> [[VECINIT3_I]], i8 [[C4]], i64 4
+// CHECK-NEXT:    [[VECINIT5_I:%.*]] = insertelement <16 x i8> [[VECINIT4_I]], i8 [[C5]], i64 5
+// CHECK-NEXT:    [[VECINIT6_I:%.*]] = insertelement <16 x i8> [[VECINIT5_I]], i8 [[C6]], i64 6
+// CHECK-NEXT:    [[VECINIT7_I:%.*]] = insertelement <16 x i8> [[VECINIT6_I]], i8 [[C7]], i64 7
+// CHECK-NEXT:    [[VECINIT8_I:%.*]] = insertelement <16 x i8> [[VECINIT7_I]], i8 [[C8]], i64 8
+// CHECK-NEXT:    [[VECINIT9_I:%.*]] = insertelement <16 x i8> [[VECINIT8_I]], i8 [[C9]], i64 9
+// CHECK-NEXT:    [[VECINIT10_I:%.*]] = insertelement <16 x i8> [[VECINIT9_I]], i8 [[C10]], i64 10
+// CHECK-NEXT:    [[VECINIT11_I:%.*]] = insertelement <16 x i8> [[VECINIT10_I]], i8 [[C11]], i64 11
+// CHECK-NEXT:    [[VECINIT12_I:%.*]] = insertelement <16 x i8> [[VECINIT11_I]], i8 [[C12]], i64 12
+// CHECK-NEXT:    [[VECINIT13_I:%.*]] = insertelement <16 x i8> [[VECINIT12_I]], i8 [[C13]], i64 13
+// CHECK-NEXT:    [[VECINIT14_I:%.*]] = insertelement <16 x i8> [[VECINIT13_I]], i8 [[C14]], i64 14
+// CHECK-NEXT:    [[VECINIT15_I:%.*]] = insertelement <16 x i8> [[VECINIT14_I]], i8 [[C15]], i64 15
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <16 x i8> [[VECINIT15_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
 //
@@ -296,16 +320,17 @@ v128_t test_u8x16_make(uint8_t c0, uint8_t c1, uint8_t c2, uint8_t c3, uint8_t c
   return wasm_u8x16_make(c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15);
 }
 
-// CHECK-LABEL: @test_i16x8_make(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <8 x i16> poison, i16 [[C0:%.*]], i64 0
-// CHECK-NEXT:    [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 [[C1:%.*]], i64 1
-// CHECK-NEXT:    [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 [[C2:%.*]], i64 2
-// CHECK-NEXT:    [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 [[C3:%.*]], i64 3
-// CHECK-NEXT:    [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 [[C4:%.*]], i64 4
-// CHECK-NEXT:    [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 [[C5:%.*]], i64 5
-// CHECK-NEXT:    [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 [[C6:%.*]], i64 6
-// CHECK-NEXT:    [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 [[C7:%.*]], i64 7
+// CHECK-LABEL: define hidden <4 x i32> @test_i16x8_make(
+// CHECK-SAME: i16 noundef signext [[C0:%.*]], i16 noundef signext [[C1:%.*]], i16 noundef signext [[C2:%.*]], i16 noundef signext [[C3:%.*]], i16 noundef signext [[C4:%.*]], i16 noundef signext [[C5:%.*]], i16 noundef signext [[C6:%.*]], i16 noundef signext [[C7:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <8 x i16> poison, i16 [[C0]], i64 0
+// CHECK-NEXT:    [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 [[C1]], i64 1
+// CHECK-NEXT:    [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 [[C2]], i64 2
+// CHECK-NEXT:    [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 [[C3]], i64 3
+// CHECK-NEXT:    [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 [[C4]], i64 4
+// CHECK-NEXT:    [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 [[C5]], i64 5
+// CHECK-NEXT:    [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 [[C6]], i64 6
+// CHECK-NEXT:    [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 [[C7]], i64 7
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x i16> [[VECINIT7_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
 //
@@ -313,16 +338,17 @@ v128_t test_i16x8_make(int16_t c0, int16_t c1, int16_t c2, int16_t c3, int16_t c
   return wasm_i16x8_make(c0, c1, c2, c3, c4, c5, c6, c7);
 }
 
-// CHECK-LABEL: @test_u16x8_make(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <8 x i16> poison, i16 [[C0:%.*]], i64 0
-// CHECK-NEXT:    [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 [[C1:%.*]], i64 1
-// CHECK-NEXT:    [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 [[C2:%.*]], i64 2
-// CHECK-NEXT:    [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 [[C3:%.*]], i64 3
-// CHECK-NEXT:    [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 [[C4:%.*]], i64 4
-// CHECK-NEXT:    [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 [[C5:%.*]], i64 5
-// CHECK-NEXT:    [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 [[C6:%.*]], i64 6
-// CHECK-NEXT:    [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 [[C7:%.*]], i64 7
+// CHECK-LABEL: define hidden <4 x i32> @test_u16x8_make(
+// CHECK-SAME: i16 noundef zeroext [[C0:%.*]], i16 noundef zeroext [[C1:%.*]], i16 noundef zeroext [[C2:%.*]], i16 noundef zeroext [[C3:%.*]], i16 noundef zeroext [[C4:%.*]], i16 noundef zeroext [[C5:%.*]], i16 noundef zeroext [[C6:%.*]], i16 noundef zeroext [[C7:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <8 x i16> poison, i16 [[C0]], i64 0
+// CHECK-NEXT:    [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 [[C1]], i64 1
+// CHECK-NEXT:    [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 [[C2]], i64 2
+// CHECK-NEXT:    [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 [[C3]], i64 3
+// CHECK-NEXT:    [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 [[C4]], i64 4
+// CHECK-NEXT:    [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 [[C5]], i64 5
+// CHECK-NEXT:    [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 [[C6]], i64 6
+// CHECK-NEXT:    [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 [[C7]], i64 7
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x i16> [[VECINIT7_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
 //
@@ -330,34 +356,37 @@ v128_t test_u16x8_make(uint16_t c0, uint16_t c1, uint16_t c2, uint16_t c3, uint1
   return wasm_u16x8_make(c0, c1, c2, c3, c4, c5, c6, c7);
 }
 
-// CHECK-LABEL: @test_i32x4_make(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <4 x i32> poison, i32 [[C0:%.*]], i64 0
-// CHECK-NEXT:    [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 [[C1:%.*]], i64 1
-// CHECK-NEXT:    [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 [[C2:%.*]], i64 2
-// CHECK-NEXT:    [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 [[C3:%.*]], i64 3
+// CHECK-LABEL: define hidden <4 x i32> @test_i32x4_make(
+// CHECK-SAME: i32 noundef [[C0:%.*]], i32 noundef [[C1:%.*]], i32 noundef [[C2:%.*]], i32 noundef [[C3:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <4 x i32> poison, i32 [[C0]], i64 0
+// CHECK-NEXT:    [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 [[C1]], i64 1
+// CHECK-NEXT:    [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 [[C2]], i64 2
+// CHECK-NEXT:    [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 [[C3]], i64 3
 // CHECK-NEXT:    ret <4 x i32> [[VECINIT3_I]]
 //
 v128_t test_i32x4_make(int32_t c0, int32_t c1, int32_t c2, int32_t c3) {
   return wasm_i32x4_make(c0, c1, c2, c3);
 }
 
-// CHECK-LABEL: @test_u32x4_make(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <4 x i32> poison, i32 [[C0:%.*]], i64 0
-// CHECK-NEXT:    [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 [[C1:%.*]], i64 1
-// CHECK-NEXT:    [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 [[C2:%.*]], i64 2
-// CHECK-NEXT:    [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 [[C3:%.*]], i64 3
+// CHECK-LABEL: define hidden <4 x i32> @test_u32x4_make(
+// CHECK-SAME: i32 noundef [[C0:%.*]], i32 noundef [[C1:%.*]], i32 noundef [[C2:%.*]], i32 noundef [[C3:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <4 x i32> poison, i32 [[C0]], i64 0
+// CHECK-NEXT:    [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 [[C1]], i64 1
+// CHECK-NEXT:    [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 [[C2]], i64 2
+// CHECK-NEXT:    [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 [[C3]], i64 3
 // CHECK-NEXT:    ret <4 x i32> [[VECINIT3_I]]
 //
 v128_t test_u32x4_make(uint32_t c0, uint32_t c1, uint32_t c2, uint32_t c3) {
   return wasm_u32x4_make(c0, c1, c2, c3);
 }
 
-// CHECK-LABEL: @test_i64x2_make(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <2 x i64> poison, i64 [[C0:%.*]], i64 0
-// CHECK-NEXT:    [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 [[C1:%.*]], i64 1
+// CHECK-LABEL: define hidden <4 x i32> @test_i64x2_make(
+// CHECK-SAME: i64 noundef [[C0:%.*]], i64 noundef [[C1:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <2 x i64> poison, i64 [[C0]], i64 0
+// CHECK-NEXT:    [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 [[C1]], i64 1
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[VECINIT1_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
 //
@@ -365,10 +394,11 @@ v128_t test_i64x2_make(int64_t c0, int64_t c1) {
   return wasm_i64x2_make(c0, c1);
 }
 
-// CHECK-LABEL: @test_u64x2_make(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <2 x i64> poison, i64 [[C0:%.*]], i64 0
-// CHECK-NEXT:    [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 [[C1:%.*]], i64 1
+// CHECK-LABEL: define hidden <4 x i32> @test_u64x2_make(
+// CHECK-SAME: i64 noundef [[C0:%.*]], i64 noundef [[C1:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <2 x i64> poison, i64 [[C0]], i64 0
+// CHECK-NEXT:    [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 [[C1]], i64 1
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[VECINIT1_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
 //
@@ -376,12 +406,13 @@ v128_t test_u64x2_make(uint64_t c0, uint64_t c1) {
   return wasm_u64x2_make(c0, c1);
 }
 
-// CHECK-LABEL: @test_f32x4_make(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <4 x float> poison, float [[C0:%.*]], i64 0
-// CHECK-NEXT:    [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float [[C1:%.*]], i64 1
-// CHECK-NEXT:    [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float [[C2:%.*]], i64 2
-// CHECK-NEXT:    [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float [[C3:%.*]], i64 3
+// CHECK-LABEL: define hidden <4 x i32> @test_f32x4_make(
+// CHECK-SAME: float noundef [[C0:%.*]], float noundef [[C1:%.*]], float noundef [[C2:%.*]], float noundef [[C3:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <4 x float> poison, float [[C0]], i64 0
+// CHECK-NEXT:    [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float [[C1]], i64 1
+// CHECK-NEXT:    [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float [[C2]], i64 2
+// CHECK-NEXT:    [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float [[C3]], i64 3
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x float> [[VECINIT3_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
 //
@@ -389,10 +420,11 @@ v128_t test_f32x4_make(float c0, float c1, float c2, float c3) {
   return wasm_f32x4_make(c0, c1, c2, c3);
 }
 
-// CHECK-LABEL: @test_f64x2_make(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <2 x double> poison, double [[C0:%.*]], i64 0
-// CHECK-NEXT:    [[VECINIT1_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double [[C1:%.*]], i64 1
+// CHECK-LABEL: define hidden <4 x i32> @test_f64x2_make(
+// CHECK-SAME: double noundef [[C0:%.*]], double noundef [[C1:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <2 x double> poison, double [[C0]], i64 0
+// CHECK-NEXT:    [[VECINIT1_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double [[C1]], i64 1
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x double> [[VECINIT1_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
 //
@@ -400,169 +432,190 @@ v128_t test_f64x2_make(double c0, double c1) {
   return wasm_f64x2_make(c0, c1);
 }
 
-// CHECK-LABEL: @test_i8x16_const(
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define hidden noundef range(i32 50462976, 252579085) <4 x i32> @test_i8x16_const(
+// CHECK-SAME: ) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    ret <4 x i32> <i32 50462976, i32 117835012, i32 185207048, i32 252579084>
 //
 v128_t test_i8x16_const(void) {
   return wasm_i8x16_const(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
 }
 
-// CHECK-LABEL: @test_u8x16_const(
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define hidden noundef range(i32 50462976, 252579085) <4 x i32> @test_u8x16_const(
+// CHECK-SAME: ) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    ret <4 x i32> <i32 50462976, i32 117835012, i32 185207048, i32 252579084>
 //
 v128_t test_u8x16_const(void) {
   return wasm_u8x16_const(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
 }
 
-// CHECK-LABEL: @test_i16x8_const(
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define hidden noundef range(i32 65536, 458759) <4 x i32> @test_i16x8_const(
+// CHECK-SAME: ) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    ret <4 x i32> <i32 65536, i32 196610, i32 327684, i32 458758>
 //
 v128_t test_i16x8_const(void) {
   return wasm_i16x8_const(0, 1, 2, 3, 4, 5, 6, 7);
 }
 
-// CHECK-LABEL: @test_u16x8_const(
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define hidden noundef range(i32 65536, 458759) <4 x i32> @test_u16x8_const(
+// CHECK-SAME: ) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    ret <4 x i32> <i32 65536, i32 196610, i32 327684, i32 458758>
 //
 v128_t test_u16x8_const(void) {
   return wasm_u16x8_const(0, 1, 2, 3, 4, 5, 6, 7);
 }
 
-// CHECK-LABEL: @test_i32x4_const(
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define hidden noundef range(i32 0, 4) <4 x i32> @test_i32x4_const(
+// CHECK-SAME: ) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    ret <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 //
 v128_t test_i32x4_const(void) {
   return wasm_i32x4_const(0, 1, 2, 3);
 }
 
-// CHECK-LABEL: @test_u32x4_const(
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define hidden noundef range(i32 0, 4) <4 x i32> @test_u32x4_const(
+// CHECK-SAME: ) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    ret <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 //
 v128_t test_u32x4_const(void) {
   return wasm_u32x4_const(0, 1, 2, 3);
 }
 
-// CHECK-LABEL: @test_i64x2_const(
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define hidden noundef range(i32 0, 2) <4 x i32> @test_i64x2_const(
+// CHECK-SAME: ) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    ret <4 x i32> <i32 0, i32 0, i32 1, i32 0>
 //
 v128_t test_i64x2_const(void) {
   return wasm_i64x2_const(0, 1);
 }
 
-// CHECK-LABEL: @test_u64x2_const(
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define hidden noundef range(i32 0, 2) <4 x i32> @test_u64x2_const(
+// CHECK-SAME: ) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    ret <4 x i32> <i32 0, i32 0, i32 1, i32 0>
 //
 v128_t test_u64x2_const(void) {
   return wasm_u64x2_const(0, 1);
 }
 
-// CHECK-LABEL: @test_f32x4_const(
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define hidden noundef range(i32 0, 1077936129) <4 x i32> @test_f32x4_const(
+// CHECK-SAME: ) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    ret <4 x i32> <i32 0, i32 1065353216, i32 1073741824, i32 1077936128>
 //
 v128_t test_f32x4_const(void) {
   return wasm_f32x4_const(0, 1, 2, 3);
 }
 
-// CHECK-LABEL: @test_f64x2_const(
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define hidden noundef range(i32 0, 1072693249) <4 x i32> @test_f64x2_const(
+// CHECK-SAME: ) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    ret <4 x i32> <i32 0, i32 0, i32 0, i32 1072693248>
 //
 v128_t test_f64x2_const(void) {
   return wasm_f64x2_const(0, 1);
 }
 
-// CHECK-LABEL: @test_i8x16_const_splat(
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_i8x16_const_splat(
+// CHECK-SAME: ) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    ret <4 x i32> splat (i32 707406378)
 //
 v128_t test_i8x16_const_splat(void) {
   return wasm_i8x16_const_splat(42);
 }
 
-// CHECK-LABEL: @test_u8x16_const_splat(
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_u8x16_const_splat(
+// CHECK-SAME: ) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    ret <4 x i32> splat (i32 707406378)
 //
 v128_t test_u8x16_const_splat(void) {
   return wasm_u8x16_const_splat(42);
 }
 
-// CHECK-LABEL: @test_i16x8_const_splat(
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_i16x8_const_splat(
+// CHECK-SAME: ) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    ret <4 x i32> splat (i32 2752554)
 //
 v128_t test_i16x8_const_splat(void) {
   return wasm_i16x8_const_splat(42);
 }
 
-// CHECK-LABEL: @test_u16x8_const_splat(
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_u16x8_const_splat(
+// CHECK-SAME: ) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    ret <4 x i32> splat (i32 2752554)
 //
 v128_t test_u16x8_const_splat(void) {
   return wasm_u16x8_const_splat(42);
 }
 
-// CHECK-LABEL: @test_i32x4_const_splat(
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_i32x4_const_splat(
+// CHECK-SAME: ) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    ret <4 x i32> splat (i32 42)
 //
 v128_t test_i32x4_const_splat(void) {
   return wasm_i32x4_const_splat(42);
 }
 
-// CHECK-LABEL: @test_u32x4_const_splat(
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_u32x4_const_splat(
+// CHECK-SAME: ) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    ret <4 x i32> splat (i32 42)
 //
 v128_t test_u32x4_const_splat(void) {
   return wasm_u32x4_const_splat(42);
 }
 
-// CHECK-LABEL: @test_i64x2_const_splat(
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define hidden noundef range(i32 0, 43) <4 x i32> @test_i64x2_const_splat(
+// CHECK-SAME: ) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    ret <4 x i32> <i32 42, i32 0, i32 42, i32 0>
 //
 v128_t test_i64x2_const_splat(void) {
   return wasm_i64x2_const_splat(42);
 }
 
-// CHECK-LABEL: @test_u64x2_const_splat(
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define hidden noundef range(i32 0, 43) <4 x i32> @test_u64x2_const_splat(
+// CHECK-SAME: ) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    ret <4 x i32> <i32 42, i32 0, i32 42, i32 0>
 //
 v128_t test_u64x2_const_splat(void) {
   return wasm_u64x2_const_splat(42);
 }
 
-// CHECK-LABEL: @test_f32x4_const_splat(
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_f32x4_const_splat(
+// CHECK-SAME: ) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    ret <4 x i32> splat (i32 1109917696)
 //
 v128_t test_f32x4_const_splat(void) {
   return wasm_f32x4_const_splat(42);
 }
 
-// CHECK-LABEL: @test_f64x2_const_splat(
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define hidden noundef range(i32 0, 1078263809) <4 x i32> @test_f64x2_const_splat(
+// CHECK-SAME: ) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    ret <4 x i32> <i32 0, i32 1078263808, i32 0, i32 1078263808>
 //
 v128_t test_f64x2_const_splat(void) {
   return wasm_f64x2_const_splat(42);
 }
 
-// CHECK-LABEL: @test_i8x16_splat(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <16 x i8> poison, i8 [[A:%.*]], i64 0
+// CHECK-LABEL: define hidden <4 x i32> @test_i8x16_splat(
+// CHECK-SAME: i8 noundef signext [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <16 x i8> poison, i8 [[A]], i64 0
 // CHECK-NEXT:    [[VECINIT15_I:%.*]] = shufflevector <16 x i8> [[VECINIT_I]], <16 x i8> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <16 x i8> [[VECINIT15_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
@@ -571,9 +624,10 @@ v128_t test_i8x16_splat(int8_t a) {
   return wasm_i8x16_splat(a);
 }
 
-// CHECK-LABEL: @test_u8x16_splat(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <16 x i8> poison, i8 [[A:%.*]], i64 0
+// CHECK-LABEL: define hidden <4 x i32> @test_u8x16_splat(
+// CHECK-SAME: i8 noundef zeroext [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <16 x i8> poison, i8 [[A]], i64 0
 // CHECK-NEXT:    [[VECINIT15_I:%.*]] = shufflevector <16 x i8> [[VECINIT_I]], <16 x i8> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <16 x i8> [[VECINIT15_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
@@ -582,9 +636,10 @@ v128_t test_u8x16_splat(uint8_t a) {
   return wasm_u8x16_splat(a);
 }
 
-// CHECK-LABEL: @test_i8x16_extract_lane(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
+// CHECK-LABEL: define hidden noundef signext i8 @test_i8x16_extract_lane(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
 // CHECK-NEXT:    [[VECEXT_I:%.*]] = extractelement <16 x i8> [[TMP0]], i64 15
 // CHECK-NEXT:    ret i8 [[VECEXT_I]]
 //
@@ -592,9 +647,10 @@ int8_t test_i8x16_extract_lane(v128_t a) {
   return wasm_i8x16_extract_lane(a, 15);
 }
 
-// CHECK-LABEL: @test_u8x16_extract_lane(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
+// CHECK-LABEL: define hidden noundef zeroext i8 @test_u8x16_extract_lane(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
 // CHECK-NEXT:    [[VECEXT_I:%.*]] = extractelement <16 x i8> [[TMP0]], i64 15
 // CHECK-NEXT:    ret i8 [[VECEXT_I]]
 //
@@ -602,10 +658,11 @@ uint8_t test_u8x16_extract_lane(v128_t a) {
   return wasm_u8x16_extract_lane(a, 15);
 }
 
-// CHECK-LABEL: @test_i8x16_replace_lane(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT:    [[VECINS_I:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[B:%.*]], i64 15
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_i8x16_replace_lane(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], i8 noundef signext [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT:    [[VECINS_I:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[B]], i64 15
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[VECINS_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
 //
@@ -613,10 +670,11 @@ v128_t test_i8x16_replace_lane(v128_t a, int8_t b) {
   return wasm_i8x16_replace_lane(a, 15, b);
 }
 
-// CHECK-LABEL: @test_u8x16_replace_lane(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT:    [[VECINS_I:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[B:%.*]], i64 15
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_u8x16_replace_lane(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], i8 noundef zeroext [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT:    [[VECINS_I:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[B]], i64 15
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[VECINS_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
 //
@@ -624,9 +682,10 @@ v128_t test_u8x16_replace_lane(v128_t a, uint8_t b) {
   return wasm_u8x16_replace_lane(a, 15, b);
 }
 
-// CHECK-LABEL: @test_i16x8_splat(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <8 x i16> poison, i16 [[A:%.*]], i64 0
+// CHECK-LABEL: define hidden <4 x i32> @test_i16x8_splat(
+// CHECK-SAME: i16 noundef signext [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <8 x i16> poison, i16 [[A]], i64 0
 // CHECK-NEXT:    [[VECINIT7_I:%.*]] = shufflevector <8 x i16> [[VECINIT_I]], <8 x i16> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x i16> [[VECINIT7_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
@@ -635,9 +694,10 @@ v128_t test_i16x8_splat(int16_t a) {
   return wasm_i16x8_splat(a);
 }
 
-// CHECK-LABEL: @test_u16x8_splat(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <8 x i16> poison, i16 [[A:%.*]], i64 0
+// CHECK-LABEL: define hidden <4 x i32> @test_u16x8_splat(
+// CHECK-SAME: i16 noundef zeroext [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <8 x i16> poison, i16 [[A]], i64 0
 // CHECK-NEXT:    [[VECINIT7_I:%.*]] = shufflevector <8 x i16> [[VECINIT_I]], <8 x i16> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x i16> [[VECINIT7_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
@@ -646,9 +706,10 @@ v128_t test_u16x8_splat(uint16_t a) {
   return wasm_u16x8_splat(a);
 }
 
-// CHECK-LABEL: @test_i16x8_extract_lane(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
+// CHECK-LABEL: define hidden noundef signext i16 @test_i16x8_extract_lane(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16>
 // CHECK-NEXT:    [[VECEXT_I:%.*]] = extractelement <8 x i16> [[TMP0]], i64 7
 // CHECK-NEXT:    ret i16 [[VECEXT_I]]
 //
@@ -656,9 +717,10 @@ int16_t test_i16x8_extract_lane(v128_t a) {
   return wasm_i16x8_extract_lane(a, 7);
 }
 
-// CHECK-LABEL: @test_u16x8_extract_lane(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
+// CHECK-LABEL: define hidden noundef zeroext i16 @test_u16x8_extract_lane(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16>
 // CHECK-NEXT:    [[VECEXT_I:%.*]] = extractelement <8 x i16> [[TMP0]], i64 7
 // CHECK-NEXT:    ret i16 [[VECEXT_I]]
 //
@@ -666,10 +728,11 @@ uint16_t test_u16x8_extract_lane(v128_t a) {
   return wasm_u16x8_extract_lane(a, 7);
 }
 
-// CHECK-LABEL: @test_i16x8_replace_lane(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
-// CHECK-NEXT:    [[VECINS_I:%.*]] = insertelement <8 x i16> [[TMP0]], i16 [[B:%.*]], i64 7
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_i16x8_replace_lane(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], i16 noundef signext [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16>
+// CHECK-NEXT:    [[VECINS_I:%.*]] = insertelement <8 x i16> [[TMP0]], i16 [[B]], i64 7
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> [[VECINS_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
 //
@@ -677,10 +740,11 @@ v128_t test_i16x8_replace_lane(v128_t a, int16_t b) {
   return wasm_i16x8_replace_lane(a, 7, b);
 }
 
-// CHECK-LABEL: @test_u16x8_replace_lane(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
-// CHECK-NEXT:    [[VECINS_I:%.*]] = insertelement <8 x i16> [[TMP0]], i16 [[B:%.*]], i64 7
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_u16x8_replace_lane(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], i16 noundef zeroext [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16>
+// CHECK-NEXT:    [[VECINS_I:%.*]] = insertelement <8 x i16> [[TMP0]], i16 [[B]], i64 7
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> [[VECINS_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
 //
@@ -688,9 +752,10 @@ v128_t test_u16x8_replace_lane(v128_t a, uint16_t b) {
   return wasm_u16x8_replace_lane(a, 7, b);
 }
 
-// CHECK-LABEL: @test_i32x4_splat(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <4 x i32> poison, i32 [[A:%.*]], i64 0
+// CHECK-LABEL: define hidden <4 x i32> @test_i32x4_splat(
+// CHECK-SAME: i32 noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i64 0
 // CHECK-NEXT:    [[VECINIT3_I:%.*]] = shufflevector <4 x i32> [[VECINIT_I]], <4 x i32> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    ret <4 x i32> [[VECINIT3_I]]
 //
@@ -698,9 +763,10 @@ v128_t test_i32x4_splat(int32_t a) {
   return wasm_i32x4_splat(a);
 }
 
-// CHECK-LABEL: @test_u32x4_splat(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <4 x i32> poison, i32 [[A:%.*]], i64 0
+// CHECK-LABEL: define hidden <4 x i32> @test_u32x4_splat(
+// CHECK-SAME: i32 noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i64 0
 // CHECK-NEXT:    [[VECINIT3_I:%.*]] = shufflevector <4 x i32> [[VECINIT_I]], <4 x i32> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    ret <4 x i32> [[VECINIT3_I]]
 //
@@ -708,45 +774,50 @@ v128_t test_u32x4_splat(uint32_t a) {
   return wasm_u32x4_splat(a);
 }
 
-// CHECK-LABEL: @test_i32x4_extract_lane(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VECEXT_I:%.*]] = extractelement <4 x i32> [[A:%.*]], i64 3
+// CHECK-LABEL: define hidden noundef i32 @test_i32x4_extract_lane(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VECEXT_I:%.*]] = extractelement <4 x i32> [[A]], i64 3
 // CHECK-NEXT:    ret i32 [[VECEXT_I]]
 //
 int32_t test_i32x4_extract_lane(v128_t a) {
   return wasm_i32x4_extract_lane(a, 3);
 }
 
-// CHECK-LABEL: @test_u32x4_extract_lane(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VECEXT_I:%.*]] = extractelement <4 x i32> [[A:%.*]], i64 3
+// CHECK-LABEL: define hidden noundef i32 @test_u32x4_extract_lane(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VECEXT_I:%.*]] = extractelement <4 x i32> [[A]], i64 3
 // CHECK-NEXT:    ret i32 [[VECEXT_I]]
 //
 uint32_t test_u32x4_extract_lane(v128_t a) {
   return wasm_u32x4_extract_lane(a, 3);
 }
 
-// CHECK-LABEL: @test_i32x4_replace_lane(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VECINS_I:%.*]] = insertelement <4 x i32> [[A:%.*]], i32 [[B:%.*]], i64 3
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_i32x4_replace_lane(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], i32 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VECINS_I:%.*]] = insertelement <4 x i32> [[A]], i32 [[B]], i64 3
 // CHECK-NEXT:    ret <4 x i32> [[VECINS_I]]
 //
 v128_t test_i32x4_replace_lane(v128_t a, int32_t b) {
   return wasm_i32x4_replace_lane(a, 3, b);
 }
 
-// CHECK-LABEL: @test_u32x4_replace_lane(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VECINS_I:%.*]] = insertelement <4 x i32> [[A:%.*]], i32 [[B:%.*]], i64 3
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_u32x4_replace_lane(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], i32 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VECINS_I:%.*]] = insertelement <4 x i32> [[A]], i32 [[B]], i64 3
 // CHECK-NEXT:    ret <4 x i32> [[VECINS_I]]
 //
 v128_t test_u32x4_replace_lane(v128_t a, uint32_t b) {
   return wasm_u32x4_replace_lane(a, 3, b);
 }
 
-// CHECK-LABEL: @test_i64x2_splat(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <2 x i64> poison, i64 [[A:%.*]], i64 0
+// CHECK-LABEL: define hidden <4 x i32> @test_i64x2_splat(
+// CHECK-SAME: i64 noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <2 x i64> poison, i64 [[A]], i64 0
 // CHECK-NEXT:    [[VECINIT1_I:%.*]] = shufflevector <2 x i64> [[VECINIT_I]], <2 x i64> poison, <2 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[VECINIT1_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
@@ -755,9 +826,10 @@ v128_t test_i64x2_splat(int64_t a) {
   return wasm_i64x2_splat(a);
 }
 
-// CHECK-LABEL: @test_u64x2_splat(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <2 x i64> poison, i64 [[A:%.*]], i64 0
+// CHECK-LABEL: define hidden <4 x i32> @test_u64x2_splat(
+// CHECK-SAME: i64 noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <2 x i64> poison, i64 [[A]], i64 0
 // CHECK-NEXT:    [[VECINIT1_I:%.*]] = shufflevector <2 x i64> [[VECINIT_I]], <2 x i64> poison, <2 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[VECINIT1_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
@@ -766,9 +838,10 @@ v128_t test_u64x2_splat(uint64_t a) {
   return wasm_u64x2_splat(a);
 }
 
-// CHECK-LABEL: @test_i64x2_extract_lane(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
+// CHECK-LABEL: define hidden noundef i64 @test_i64x2_extract_lane(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x i64>
 // CHECK-NEXT:    [[VECEXT_I:%.*]] = extractelement <2 x i64> [[TMP0]], i64 1
 // CHECK-NEXT:    ret i64 [[VECEXT_I]]
 //
@@ -776,9 +849,10 @@ int64_t test_i64x2_extract_lane(v128_t a) {
   return wasm_i64x2_extract_lane(a, 1);
 }
 
-// CHECK-LABEL: @test_u64x2_extract_lane(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
+// CHECK-LABEL: define hidden noundef i64 @test_u64x2_extract_lane(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x i64>
 // CHECK-NEXT:    [[VECEXT_I:%.*]] = extractelement <2 x i64> [[TMP0]], i64 1
 // CHECK-NEXT:    ret i64 [[VECEXT_I]]
 //
@@ -786,10 +860,11 @@ uint64_t test_u64x2_extract_lane(v128_t a) {
   return wasm_u64x2_extract_lane(a, 1);
 }
 
-// CHECK-LABEL: @test_i64x2_replace_lane(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
-// CHECK-NEXT:    [[VECINS_I:%.*]] = insertelement <2 x i64> [[TMP0]], i64 [[B:%.*]], i64 1
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_i64x2_replace_lane(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], i64 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x i64>
+// CHECK-NEXT:    [[VECINS_I:%.*]] = insertelement <2 x i64> [[TMP0]], i64 [[B]], i64 1
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[VECINS_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
 //
@@ -797,10 +872,11 @@ v128_t test_i64x2_replace_lane(v128_t a, int64_t b) {
   return wasm_i64x2_replace_lane(a, 1, b);
 }
 
-// CHECK-LABEL: @test_u64x2_replace_lane(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
-// CHECK-NEXT:    [[VECINS_I:%.*]] = insertelement <2 x i64> [[TMP0]], i64 [[B:%.*]], i64 1
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_u64x2_replace_lane(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], i64 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x i64>
+// CHECK-NEXT:    [[VECINS_I:%.*]] = insertelement <2 x i64> [[TMP0]], i64 [[B]], i64 1
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[VECINS_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
 //
@@ -808,9 +884,10 @@ v128_t test_u64x2_replace_lane(v128_t a, uint64_t b) {
   return wasm_u64x2_replace_lane(a, 1, b);
 }
 
-// CHECK-LABEL: @test_f32x4_splat(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i64 0
+// CHECK-LABEL: define hidden <4 x i32> @test_f32x4_splat(
+// CHECK-SAME: float noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <4 x float> poison, float [[A]], i64 0
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x float> [[VECINIT_I]] to <4 x i32>
 // CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
@@ -819,9 +896,10 @@ v128_t test_f32x4_splat(float a) {
   return wasm_f32x4_splat(a);
 }
 
-// CHECK-LABEL: @test_f32x4_extract_lane(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
+// CHECK-LABEL: define hidden noundef float @test_f32x4_extract_lane(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <4 x float>
 // CHECK-NEXT:    [[VECEXT_I:%.*]] = extractelement <4 x float> [[TMP0]], i64 3
 // CHECK-NEXT:    ret float [[VECEXT_I]]
 //
@@ -829,10 +907,11 @@ float test_f32x4_extract_lane(v128_t a) {
   return wasm_f32x4_extract_lane(a, 3);
 }
 
-// CHECK-LABEL: @test_f32x4_replace_lane(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
-// CHECK-NEXT:    [[VECINS_I:%.*]] = insertelement <4 x float> [[TMP0]], float [[B:%.*]], i64 3
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_f32x4_replace_lane(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], float noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <4 x float>
+// CHECK-NEXT:    [[VECINS_I:%.*]] = insertelement <4 x float> [[TMP0]], float [[B]], i64 3
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x float> [[VECINS_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
 //
@@ -840,9 +919,10 @@ v128_t test_f32x4_replace_lane(v128_t a, float b) {
   return wasm_f32x4_replace_lane(a, 3, b);
 }
 
-// CHECK-LABEL: @test_f64x2_splat(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <2 x double> poison, double [[A:%.*]], i64 0
+// CHECK-LABEL: define hidden <4 x i32> @test_f64x2_splat(
+// CHECK-SAME: double noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <2 x double> poison, double [[A]], i64 0
 // CHECK-NEXT:    [[VECINIT1_I:%.*]] = shufflevector <2 x double> [[VECINIT_I]], <2 x double> poison, <2 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x double> [[VECINIT1_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
@@ -851,9 +931,10 @@ v128_t test_f64x2_splat(double a) {
   return wasm_f64x2_splat(a);
 }
 
-// CHECK-LABEL: @test_f64x2_extract_lane(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
+// CHECK-LABEL: define hidden noundef double @test_f64x2_extract_lane(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x double>
 // CHECK-NEXT:    [[VECEXT_I:%.*]] = extractelement <2 x double> [[TMP0]], i64 1
 // CHECK-NEXT:    ret double [[VECEXT_I]]
 //
@@ -861,10 +942,11 @@ double test_f64x2_extract_lane(v128_t a) {
   return wasm_f64x2_extract_lane(a, 1);
 }
 
-// CHECK-LABEL: @test_f64x2_replace_lane(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
-// CHECK-NEXT:    [[VECINS_I:%.*]] = insertelement <2 x double> [[TMP0]], double [[B:%.*]], i64 1
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_f64x2_replace_lane(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], double noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x double>
+// CHECK-NEXT:    [[VECINS_I:%.*]] = insertelement <2 x double> [[TMP0]], double [[B]], i64 1
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x double> [[VECINS_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
 //
@@ -872,10 +954,11 @@ v128_t test_f64x2_replace_lane(v128_t a, double b) {
   return wasm_f64x2_replace_lane(a, 1, b);
 }
 
-// CHECK-LABEL: @test_i8x16_eq(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_i8x16_eq(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp eq <16 x i8> [[TMP0]], [[TMP1]]
 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[SEXT_I]] to <4 x i32>
@@ -885,10 +968,11 @@ v128_t test_i8x16_eq(v128_t a, v128_t b) {
   return wasm_i8x16_eq(a, b);
 }
 
-// CHECK-LABEL: @test_i8x16_ne(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_i8x16_ne(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp ne <16 x i8> [[TMP0]], [[TMP1]]
 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[SEXT_I]] to <4 x i32>
@@ -898,10 +982,11 @@ v128_t test_i8x16_ne(v128_t a, v128_t b) {
   return wasm_i8x16_ne(a, b);
 }
 
-// CHECK-LABEL: @test_i8x16_lt(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_i8x16_lt(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp slt <16 x i8> [[TMP0]], [[TMP1]]
 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[SEXT_I]] to <4 x i32>
@@ -911,10 +996,11 @@ v128_t test_i8x16_lt(v128_t a, v128_t b) {
   return wasm_i8x16_lt(a, b);
 }
 
-// CHECK-LABEL: @test_u8x16_lt(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_u8x16_lt(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp ult <16 x i8> [[TMP0]], [[TMP1]]
 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[SEXT_I]] to <4 x i32>
@@ -924,10 +1010,11 @@ v128_t test_u8x16_lt(v128_t a, v128_t b) {
   return wasm_u8x16_lt(a, b);
 }
 
-// CHECK-LABEL: @test_i8x16_gt(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_i8x16_gt(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp sgt <16 x i8> [[TMP0]], [[TMP1]]
 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[SEXT_I]] to <4 x i32>
@@ -937,10 +1024,11 @@ v128_t test_i8x16_gt(v128_t a, v128_t b) {
   return wasm_i8x16_gt(a, b);
 }
 
-// CHECK-LABEL: @test_u8x16_gt(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_u8x16_gt(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp ugt <16 x i8> [[TMP0]], [[TMP1]]
 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[SEXT_I]] to <4 x i32>
@@ -950,10 +1038,11 @@ v128_t test_u8x16_gt(v128_t a, v128_t b) {
   return wasm_u8x16_gt(a, b);
 }
 
-// CHECK-LABEL: @test_i8x16_le(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_i8x16_le(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp sle <16 x i8> [[TMP0]], [[TMP1]]
 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[SEXT_I]] to <4 x i32>
@@ -963,10 +1052,11 @@ v128_t test_i8x16_le(v128_t a, v128_t b) {
   return wasm_i8x16_le(a, b);
 }
 
-// CHECK-LABEL: @test_u8x16_le(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_u8x16_le(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp ule <16 x i8> [[TMP0]], [[TMP1]]
 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[SEXT_I]] to <4 x i32>
@@ -976,10 +1066,11 @@ v128_t test_u8x16_le(v128_t a, v128_t b) {
   return wasm_u8x16_le(a, b);
 }
 
-// CHECK-LABEL: @test_i8x16_ge(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_i8x16_ge(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp sge <16 x i8> [[TMP0]], [[TMP1]]
 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[SEXT_I]] to <4 x i32>
@@ -989,10 +1080,11 @@ v128_t test_i8x16_ge(v128_t a, v128_t b) {
   return wasm_i8x16_ge(a, b);
 }
 
-// CHECK-LABEL: @test_u8x16_ge(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_u8x16_ge(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp uge <16 x i8> [[TMP0]], [[TMP1]]
 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[SEXT_I]] to <4 x i32>
@@ -1002,10 +1094,11 @@ v128_t test_u8x16_ge(v128_t a, v128_t b) {
   return wasm_u8x16_ge(a, b);
 }
 
-// CHECK-LABEL: @test_i16x8_eq(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_i16x8_eq(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <8 x i16>
 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp eq <8 x i16> [[TMP0]], [[TMP1]]
 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[SEXT_I]] to <4 x i32>
@@ -1015,10 +1108,11 @@ v128_t test_i16x8_eq(v128_t a, v128_t b) {
   return wasm_i16x8_eq(a, b);
 }
 
-// CHECK-LABEL: @test_i16x8_ne(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_i16x8_ne(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <8 x i16>
 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp ne <8 x i16> [[TMP0]], [[TMP1]]
 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[SEXT_I]] to <4 x i32>
@@ -1028,10 +1122,11 @@ v128_t test_i16x8_ne(v128_t a, v128_t b) {
   return wasm_i16x8_ne(a, b);
 }
 
-// CHECK-LABEL: @test_i16x8_lt(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_i16x8_lt(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <8 x i16>
 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp slt <8 x i16> [[TMP0]], [[TMP1]]
 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[SEXT_I]] to <4 x i32>
@@ -1041,10 +1136,11 @@ v128_t test_i16x8_lt(v128_t a, v128_t b) {
   return wasm_i16x8_lt(a, b);
 }
 
-// CHECK-LABEL: @test_u16x8_lt(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_u16x8_lt(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <8 x i16>
 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp ult <8 x i16> [[TMP0]], [[TMP1]]
 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[SEXT_I]] to <4 x i32>
@@ -1054,10 +1150,11 @@ v128_t test_u16x8_lt(v128_t a, v128_t b) {
   return wasm_u16x8_lt(a, b);
 }
 
-// CHECK-LABEL: @test_i16x8_gt(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_i16x8_gt(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <8 x i16>
 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp sgt <8 x i16> [[TMP0]], [[TMP1]]
 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[SEXT_I]] to <4 x i32>
@@ -1067,10 +1164,11 @@ v128_t test_i16x8_gt(v128_t a, v128_t b) {
   return wasm_i16x8_gt(a, b);
 }
 
-// CHECK-LABEL: @test_u16x8_gt(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_u16x8_gt(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <8 x i16>
 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp ugt <8 x i16> [[TMP0]], [[TMP1]]
 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[SEXT_I]] to <4 x i32>
@@ -1080,10 +1178,11 @@ v128_t test_u16x8_gt(v128_t a, v128_t b) {
   return wasm_u16x8_gt(a, b);
 }
 
-// CHECK-LABEL: @test_i16x8_le(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_i16x8_le(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <8 x i16>
 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp sle <8 x i16> [[TMP0]], [[TMP1]]
 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[SEXT_I]] to <4 x i32>
@@ -1093,10 +1192,11 @@ v128_t test_i16x8_le(v128_t a, v128_t b) {
   return wasm_i16x8_le(a, b);
 }
 
-// CHECK-LABEL: @test_u16x8_le(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_u16x8_le(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <8 x i16>
 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp ule <8 x i16> [[TMP0]], [[TMP1]]
 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[SEXT_I]] to <4 x i32>
@@ -1106,10 +1206,11 @@ v128_t test_u16x8_le(v128_t a, v128_t b) {
   return wasm_u16x8_le(a, b);
 }
 
-// CHECK-LABEL: @test_i16x8_ge(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_i16x8_ge(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <8 x i16>
 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp sge <8 x i16> [[TMP0]], [[TMP1]]
 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[SEXT_I]] to <4 x i32>
@@ -1119,10 +1220,11 @@ v128_t test_i16x8_ge(v128_t a, v128_t b) {
   return wasm_i16x8_ge(a, b);
 }
 
-// CHECK-LABEL: @test_u16x8_ge(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_u16x8_ge(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <8 x i16>
 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp uge <8 x i16> [[TMP0]], [[TMP1]]
 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[SEXT_I]] to <4 x i32>
@@ -1132,9 +1234,10 @@ v128_t test_u16x8_ge(v128_t a, v128_t b) {
   return wasm_u16x8_ge(a, b);
 }
 
-// CHECK-LABEL: @test_i32x4_eq(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[CMP_I:%.*]] = icmp eq <4 x i32> [[A:%.*]], [[B:%.*]]
+// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_i32x4_eq(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[CMP_I:%.*]] = icmp eq <4 x i32> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[SEXT_I]]
 //
@@ -1142,9 +1245,10 @@ v128_t test_i32x4_eq(v128_t a, v128_t b) {
   return wasm_i32x4_eq(a, b);
 }
 
-// CHECK-LABEL: @test_i32x4_ne(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[CMP_I:%.*]] = icmp ne <4 x i32> [[A:%.*]], [[B:%.*]]
+// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_i32x4_ne(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[CMP_I:%.*]] = icmp ne <4 x i32> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[SEXT_I]]
 //
@@ -1152,9 +1256,10 @@ v128_t test_i32x4_ne(v128_t a, v128_t b) {
   return wasm_i32x4_ne(a, b);
 }
 
-// CHECK-LABEL: @test_i32x4_lt(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[CMP_I:%.*]] = icmp slt <4 x i32> [[A:%.*]], [[B:%.*]]
+// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_i32x4_lt(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[CMP_I:%.*]] = icmp slt <4 x i32> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[SEXT_I]]
 //
@@ -1162,9 +1267,10 @@ v128_t test_i32x4_lt(v128_t a, v128_t b) {
   return wasm_i32x4_lt(a, b);
 }
 
-// CHECK-LABEL: @test_u32x4_lt(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[CMP_I:%.*]] = icmp ult <4 x i32> [[A:%.*]], [[B:%.*]]
+// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_u32x4_lt(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[CMP_I:%.*]] = icmp ult <4 x i32> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[SEXT_I]]
 //
@@ -1172,9 +1278,10 @@ v128_t test_u32x4_lt(v128_t a, v128_t b) {
   return wasm_u32x4_lt(a, b);
 }
 
-// CHECK-LABEL: @test_i32x4_gt(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[CMP_I:%.*]] = icmp sgt <4 x i32> [[A:%.*]], [[B:%.*]]
+// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_i32x4_gt(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[CMP_I:%.*]] = icmp sgt <4 x i32> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[SEXT_I]]
 //
@@ -1182,9 +1289,10 @@ v128_t test_i32x4_gt(v128_t a, v128_t b) {
   return wasm_i32x4_gt(a, b);
 }
 
-// CHECK-LABEL: @test_u32x4_gt(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[CMP_I:%.*]] = icmp ugt <4 x i32> [[A:%.*]], [[B:%.*]]
+// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_u32x4_gt(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[CMP_I:%.*]] = icmp ugt <4 x i32> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[SEXT_I]]
 //
@@ -1192,9 +1300,10 @@ v128_t test_u32x4_gt(v128_t a, v128_t b) {
   return wasm_u32x4_gt(a, b);
 }
 
-// CHECK-LABEL: @test_i32x4_le(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[CMP_I:%.*]] = icmp sle <4 x i32> [[A:%.*]], [[B:%.*]]
+// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_i32x4_le(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[CMP_I:%.*]] = icmp sle <4 x i32> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[SEXT_I]]
 //
@@ -1202,9 +1311,10 @@ v128_t test_i32x4_le(v128_t a, v128_t b) {
   return wasm_i32x4_le(a, b);
 }
 
-// CHECK-LABEL: @test_u32x4_le(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[CMP_I:%.*]] = icmp ule <4 x i32> [[A:%.*]], [[B:%.*]]
+// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_u32x4_le(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[CMP_I:%.*]] = icmp ule <4 x i32> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[SEXT_I]]
 //
@@ -1212,9 +1322,10 @@ v128_t test_u32x4_le(v128_t a, v128_t b) {
   return wasm_u32x4_le(a, b);
 }
 
-// CHECK-LABEL: @test_i32x4_ge(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[CMP_I:%.*]] = icmp sge <4 x i32> [[A:%.*]], [[B:%.*]]
+// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_i32x4_ge(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[CMP_I:%.*]] = icmp sge <4 x i32> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[SEXT_I]]
 //
@@ -1222,9 +1333,10 @@ v128_t test_i32x4_ge(v128_t a, v128_t b) {
   return wasm_i32x4_ge(a, b);
 }
 
-// CHECK-LABEL: @test_u32x4_ge(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[CMP_I:%.*]] = icmp uge <4 x i32> [[A:%.*]], [[B:%.*]]
+// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_u32x4_ge(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[CMP_I:%.*]] = icmp uge <4 x i32> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[SEXT_I]]
 //
@@ -1232,10 +1344,11 @@ v128_t test_u32x4_ge(v128_t a, v128_t b) {
   return wasm_u32x4_ge(a, b);
 }
 
-// CHECK-LABEL: @test_i64x2_eq(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x i64>
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_i64x2_eq(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x i64>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <2 x i64>
 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp eq <2 x i64> [[TMP0]], [[TMP1]]
 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[SEXT_I]] to <4 x i32>
@@ -1245,10 +1358,11 @@ v128_t test_i64x2_eq(v128_t a, v128_t b) {
   return wasm_i64x2_eq(a, b);
 }
 
-// CHECK-LABEL: @test_i64x2_ne(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x i64>
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_i64x2_ne(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x i64>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <2 x i64>
 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp ne <2 x i64> [[TMP0]], [[TMP1]]
 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[SEXT_I]] to <4 x i32>
@@ -1258,10 +1372,11 @@ v128_t test_i64x2_ne(v128_t a, v128_t b) {
   return wasm_i64x2_ne(a, b);
 }
 
-// CHECK-LABEL: @test_i64x2_lt(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x i64>
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_i64x2_lt(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x i64>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <2 x i64>
 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp slt <2 x i64> [[TMP0]], [[TMP1]]
 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[SEXT_I]] to <4 x i32>
@@ -1271,10 +1386,11 @@ v128_t test_i64x2_lt(v128_t a, v128_t b) {
   return wasm_i64x2_lt(a, b);
 }
 
-// CHECK-LABEL: @test_i64x2_gt(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x i64>
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_i64x2_gt(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x i64>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <2 x i64>
 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp sgt <2 x i64> [[TMP0]], [[TMP1]]
 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[SEXT_I]] to <4 x i32>
@@ -1284,10 +1400,11 @@ v128_t test_i64x2_gt(v128_t a, v128_t b) {
   return wasm_i64x2_gt(a, b);
 }
 
-// CHECK-LABEL: @test_i64x2_le(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x i64>
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_i64x2_le(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x i64>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <2 x i64>
 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp sle <2 x i64> [[TMP0]], [[TMP1]]
 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[SEXT_I]] to <4 x i32>
@@ -1297,10 +1414,11 @@ v128_t test_i64x2_le(v128_t a, v128_t b) {
   return wasm_i64x2_le(a, b);
 }
 
-// CHECK-LABEL: @test_i64x2_ge(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x i64>
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_i64x2_ge(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x i64>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <2 x i64>
 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp sge <2 x i64> [[TMP0]], [[TMP1]]
 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[SEXT_I]] to <4 x i32>
@@ -1310,10 +1428,11 @@ v128_t test_i64x2_ge(v128_t a, v128_t b) {
   return wasm_i64x2_ge(a, b);
 }
 
-// CHECK-LABEL: @test_f32x4_eq(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
+// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_f32x4_eq(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <4 x float>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <4 x float>
 // CHECK-NEXT:    [[CMP_I:%.*]] = fcmp oeq <4 x float> [[TMP0]], [[TMP1]]
 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[SEXT_I]]
@@ -1322,10 +1441,11 @@ v128_t test_f32x4_eq(v128_t a, v128_t b) {
   return wasm_f32x4_eq(a, b);
 }
 
-// CHECK-LABEL: @test_f32x4_ne(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
+// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_f32x4_ne(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <4 x float>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <4 x float>
 // CHECK-NEXT:    [[CMP_I:%.*]] = fcmp une <4 x float> [[TMP0]], [[TMP1]]
 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[SEXT_I]]
@@ -1334,10 +1454,11 @@ v128_t test_f32x4_ne(v128_t a, v128_t b) {
   return wasm_f32x4_ne(a, b);
 }
 
-// CHECK-LABEL: @test_f32x4_lt(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
+// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_f32x4_lt(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <4 x float>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <4 x float>
 // CHECK-NEXT:    [[CMP_I:%.*]] = fcmp olt <4 x float> [[TMP0]], [[TMP1]]
 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[SEXT_I]]
@@ -1346,10 +1467,11 @@ v128_t test_f32x4_lt(v128_t a, v128_t b) {
   return wasm_f32x4_lt(a, b);
 }
 
-// CHECK-LABEL: @test_f32x4_gt(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
+// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_f32x4_gt(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <4 x float>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <4 x float>
 // CHECK-NEXT:    [[CMP_I:%.*]] = fcmp ogt <4 x float> [[TMP0]], [[TMP1]]
 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[SEXT_I]]
@@ -1358,10 +1480,11 @@ v128_t test_f32x4_gt(v128_t a, v128_t b) {
   return wasm_f32x4_gt(a, b);
 }
 
-// CHECK-LABEL: @test_f32x4_le(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
+// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_f32x4_le(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <4 x float>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <4 x float>
 // CHECK-NEXT:    [[CMP_I:%.*]] = fcmp ole <4 x float> [[TMP0]], [[TMP1]]
 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[SEXT_I]]
@@ -1370,10 +1493,11 @@ v128_t test_f32x4_le(v128_t a, v128_t b) {
   return wasm_f32x4_le(a, b);
 }
 
-// CHECK-LABEL: @test_f32x4_ge(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
+// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_f32x4_ge(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <4 x float>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <4 x float>
 // CHECK-NEXT:    [[CMP_I:%.*]] = fcmp oge <4 x float> [[TMP0]], [[TMP1]]
 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[SEXT_I]]
@@ -1382,10 +1506,11 @@ v128_t test_f32x4_ge(v128_t a, v128_t b) {
   return wasm_f32x4_ge(a, b);
 }
 
-// CHECK-LABEL: @test_f64x2_eq(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_f64x2_eq(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x double>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <2 x double>
 // CHECK-NEXT:    [[CMP_I:%.*]] = fcmp oeq <2 x double> [[TMP0]], [[TMP1]]
 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[SEXT_I]] to <4 x i32>
@@ -1395,10 +1520,11 @@ v128_t test_f64x2_eq(v128_t a, v128_t b) {
   return wasm_f64x2_eq(a, b);
 }
 
-// CHECK-LABEL: @test_f64x2_ne(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_f64x2_ne(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x double>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <2 x double>
 // CHECK-NEXT:    [[CMP_I:%.*]] = fcmp une <2 x double> [[TMP0]], [[TMP1]]
 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[SEXT_I]] to <4 x i32>
@@ -1408,10 +1534,11 @@ v128_t test_f64x2_ne(v128_t a, v128_t b) {
   return wasm_f64x2_ne(a, b);
 }
 
-// CHECK-LABEL: @test_f64x2_lt(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_f64x2_lt(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x double>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <2 x double>
 // CHECK-NEXT:    [[CMP_I:%.*]] = fcmp olt <2 x double> [[TMP0]], [[TMP1]]
 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[SEXT_I]] to <4 x i32>
@@ -1421,10 +1548,11 @@ v128_t test_f64x2_lt(v128_t a, v128_t b) {
   return wasm_f64x2_lt(a, b);
 }
 
-// CHECK-LABEL: @test_f64x2_gt(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_f64x2_gt(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x double>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <2 x double>
 // CHECK-NEXT:    [[CMP_I:%.*]] = fcmp ogt <2 x double> [[TMP0]], [[TMP1]]
 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[SEXT_I]] to <4 x i32>
@@ -1434,10 +1562,11 @@ v128_t test_f64x2_gt(v128_t a, v128_t b) {
   return wasm_f64x2_gt(a, b);
 }
 
-// CHECK-LABEL: @test_f64x2_le(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_f64x2_le(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x double>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <2 x double>
 // CHECK-NEXT:    [[CMP_I:%.*]] = fcmp ole <2 x double> [[TMP0]], [[TMP1]]
 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[SEXT_I]] to <4 x i32>
@@ -1447,10 +1576,11 @@ v128_t test_f64x2_le(v128_t a, v128_t b) {
   return wasm_f64x2_le(a, b);
 }
 
-// CHECK-LABEL: @test_f64x2_ge(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_f64x2_ge(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x double>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <2 x double>
 // CHECK-NEXT:    [[CMP_I:%.*]] = fcmp oge <2 x double> [[TMP0]], [[TMP1]]
 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[SEXT_I]] to <4 x i32>
@@ -1460,55 +1590,61 @@ v128_t test_f64x2_ge(v128_t a, v128_t b) {
   return wasm_f64x2_ge(a, b);
 }
 
-// CHECK-LABEL: @test_v128_not(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[NOT_I:%.*]] = xor <4 x i32> [[A:%.*]], splat (i32 -1)
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_v128_not(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[NOT_I:%.*]] = xor <4 x i32> [[A]], splat (i32 -1)
 // CHECK-NEXT:    ret <4 x i32> [[NOT_I]]
 //
 v128_t test_v128_not(v128_t a) {
   return wasm_v128_not(a);
 }
 
-// CHECK-LABEL: @test_v128_and(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[AND_I:%.*]] = and <4 x i32> [[B:%.*]], [[A:%.*]]
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_v128_and(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[AND_I:%.*]] = and <4 x i32> [[B]], [[A]]
 // CHECK-NEXT:    ret <4 x i32> [[AND_I]]
 //
 v128_t test_v128_and(v128_t a, v128_t b) {
   return wasm_v128_and(a, b);
 }
 
-// CHECK-LABEL: @test_v128_or(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[OR_I:%.*]] = or <4 x i32> [[B:%.*]], [[A:%.*]]
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_v128_or(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[OR_I:%.*]] = or <4 x i32> [[B]], [[A]]
 // CHECK-NEXT:    ret <4 x i32> [[OR_I]]
 //
 v128_t test_v128_or(v128_t a, v128_t b) {
   return wasm_v128_or(a, b);
 }
 
-// CHECK-LABEL: @test_v128_xor(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[XOR_I:%.*]] = xor <4 x i32> [[B:%.*]], [[A:%.*]]
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_v128_xor(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[XOR_I:%.*]] = xor <4 x i32> [[B]], [[A]]
 // CHECK-NEXT:    ret <4 x i32> [[XOR_I]]
 //
 v128_t test_v128_xor(v128_t a, v128_t b) {
   return wasm_v128_xor(a, b);
 }
 
-// CHECK-LABEL: @test_v128_andnot(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[NOT_I:%.*]] = xor <4 x i32> [[B:%.*]], splat (i32 -1)
-// CHECK-NEXT:    [[AND_I:%.*]] = and <4 x i32> [[A:%.*]], [[NOT_I]]
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_v128_andnot(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[NOT_I:%.*]] = xor <4 x i32> [[B]], splat (i32 -1)
+// CHECK-NEXT:    [[AND_I:%.*]] = and <4 x i32> [[A]], [[NOT_I]]
 // CHECK-NEXT:    ret <4 x i32> [[AND_I]]
 //
 v128_t test_v128_andnot(v128_t a, v128_t b) {
   return wasm_v128_andnot(a, b);
 }
 
-// CHECK-LABEL: @test_v128_any_true(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
+// CHECK-LABEL: define hidden zeroext i1 @test_v128_any_true(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.wasm.anytrue.v16i8(<16 x i8> [[TMP0]])
 // CHECK-NEXT:    [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP1]], 0
 // CHECK-NEXT:    ret i1 [[TOBOOL_I]]
@@ -1517,18 +1653,20 @@ bool test_v128_any_true(v128_t a) {
   return wasm_v128_any_true(a);
 }
 
-// CHECK-LABEL: @test_v128_bitselect(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <4 x i32> @llvm.wasm.bitselect.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> [[MASK:%.*]])
+// CHECK-LABEL: define hidden <4 x i32> @test_v128_bitselect(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]], <4 x i32> noundef [[MASK:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <4 x i32> @llvm.wasm.bitselect.v4i32(<4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> [[MASK]])
 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
 //
 v128_t test_v128_bitselect(v128_t a, v128_t b, v128_t mask) {
   return wasm_v128_bitselect(a, b, mask);
 }
 
-// CHECK-LABEL: @test_i8x16_abs(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_i8x16_abs(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
 // CHECK-NEXT:    [[ABS_I:%.*]] = tail call <16 x i8> @llvm.abs.v16i8(<16 x i8> [[TMP0]], i1 false)
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[ABS_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
@@ -1537,9 +1675,10 @@ v128_t test_i8x16_abs(v128_t a) {
   return wasm_i8x16_abs(a);
 }
 
-// CHECK-LABEL: @test_i8x16_neg(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_i8x16_neg(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
 // CHECK-NEXT:    [[SUB_I:%.*]] = sub <16 x i8> zeroinitializer, [[TMP0]]
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[SUB_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
@@ -1548,9 +1687,10 @@ v128_t test_i8x16_neg(v128_t a) {
   return wasm_i8x16_neg(a);
 }
 
-// CHECK-LABEL: @test_i8x16_all_true(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
+// CHECK-LABEL: define hidden zeroext i1 @test_i8x16_all_true(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.wasm.alltrue.v16i8(<16 x i8> [[TMP0]])
 // CHECK-NEXT:    [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP1]], 0
 // CHECK-NEXT:    ret i1 [[TOBOOL_I]]
@@ -1559,9 +1699,10 @@ bool test_i8x16_all_true(v128_t a) {
   return wasm_i8x16_all_true(a);
 }
 
-// CHECK-LABEL: @test_i8x16_bitmask(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
+// CHECK-LABEL: define hidden i32 @test_i8x16_bitmask(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.wasm.bitmask.v16i8(<16 x i8> [[TMP0]])
 // CHECK-NEXT:    ret i32 [[TMP1]]
 //
@@ -1569,21 +1710,23 @@ uint32_t test_i8x16_bitmask(v128_t a) {
   return wasm_i8x16_bitmask(a);
 }
 
-// CHECK-LABEL: @test_i8x16_popcnt(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call range(i8 0, 9) <16 x i8> @llvm.ctpop.v16i8(<16 x i8> [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
-// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+// CHECK-LABEL: define hidden <4 x i32> @test_i8x16_popcnt(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT:    [[ELT_CTPOP_I:%.*]] = tail call range(i8 0, 9) <16 x i8> @llvm.ctpop.v16i8(<16 x i8> [[TMP0]])
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[ELT_CTPOP_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP1]]
 //
 v128_t test_i8x16_popcnt(v128_t a) {
   return wasm_i8x16_popcnt(a);
 }
 
-// CHECK-LABEL: @test_i8x16_shl(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[B:%.*]] to i8
+// CHECK-LABEL: define hidden <4 x i32> @test_i8x16_shl(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], i32 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[B]] to i8
 // CHECK-NEXT:    [[TMP2:%.*]] = and i8 [[TMP1]], 7
 // CHECK-NEXT:    [[TMP3:%.*]] = insertelement <16 x i8> poison, i8 [[TMP2]], i64 0
 // CHECK-NEXT:    [[SH_PROM_I:%.*]] = shufflevector <16 x i8> [[TMP3]], <16 x i8> poison, <16 x i32> zeroinitializer
@@ -1595,10 +1738,11 @@ v128_t test_i8x16_shl(v128_t a, uint32_t b) {
   return wasm_i8x16_shl(a, b);
 }
 
-// CHECK-LABEL: @test_i8x16_shr(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[B:%.*]] to i8
+// CHECK-LABEL: define hidden <4 x i32> @test_i8x16_shr(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], i32 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[B]] to i8
 // CHECK-NEXT:    [[TMP2:%.*]] = and i8 [[TMP1]], 7
 // CHECK-NEXT:    [[TMP3:%.*]] = insertelement <16 x i8> poison, i8 [[TMP2]], i64 0
 // CHECK-NEXT:    [[SH_PROM_I:%.*]] = shufflevector <16 x i8> [[TMP3]], <16 x i8> poison, <16 x i32> zeroinitializer
@@ -1610,10 +1754,11 @@ v128_t test_i8x16_shr(v128_t a, uint32_t b) {
   return wasm_i8x16_shr(a, b);
 }
 
-// CHECK-LABEL: @test_u8x16_shr(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[B:%.*]] to i8
+// CHECK-LABEL: define hidden <4 x i32> @test_u8x16_shr(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], i32 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[B]] to i8
 // CHECK-NEXT:    [[TMP2:%.*]] = and i8 [[TMP1]], 7
 // CHECK-NEXT:    [[TMP3:%.*]] = insertelement <16 x i8> poison, i8 [[TMP2]], i64 0
 // CHECK-NEXT:    [[SH_PROM_I:%.*]] = shufflevector <16 x i8> [[TMP3]], <16 x i8> poison, <16 x i32> zeroinitializer
@@ -1625,10 +1770,11 @@ v128_t test_u8x16_shr(v128_t a, uint32_t b) {
   return wasm_u8x16_shr(a, b);
 }
 
-// CHECK-LABEL: @test_i8x16_add(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_i8x16_add(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
 // CHECK-NEXT:    [[ADD_I:%.*]] = add <16 x i8> [[TMP1]], [[TMP0]]
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[ADD_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
@@ -1637,34 +1783,37 @@ v128_t test_i8x16_add(v128_t a, v128_t b) {
   return wasm_i8x16_add(a, b);
 }
 
-// CHECK-LABEL: @test_i8x16_add_sat(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
-// CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_i8x16_add_sat(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT:    [[ELT_SAT_I:%.*]] = tail call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[ELT_SAT_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
 //
 v128_t test_i8x16_add_sat(v128_t a, v128_t b) {
   return wasm_i8x16_add_sat(a, b);
 }
 
-// CHECK-LABEL: @test_u8x16_add_sat(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
-// CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_u8x16_add_sat(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT:    [[ELT_SAT_I:%.*]] = tail call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[ELT_SAT_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
 //
 v128_t test_u8x16_add_sat(v128_t a, v128_t b) {
   return wasm_u8x16_add_sat(a, b);
 }
 
-// CHECK-LABEL: @test_i8x16_sub(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_i8x16_sub(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
 // CHECK-NEXT:    [[SUB_I:%.*]] = sub <16 x i8> [[TMP0]], [[TMP1]]
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[SUB_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
@@ -1673,82 +1822,89 @@ v128_t test_i8x16_sub(v128_t a, v128_t b) {
   return wasm_i8x16_sub(a, b);
 }
 
-// CHECK-LABEL: @test_i8x16_sub_sat(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
-// CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_i8x16_sub_sat(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT:    [[ELT_SAT_I:%.*]] = tail call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[ELT_SAT_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
 //
 v128_t test_i8x16_sub_sat(v128_t a, v128_t b) {
   return wasm_i8x16_sub_sat(a, b);
 }
 
-// CHECK-LABEL: @test_u8x16_sub_sat(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
-// CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_u8x16_sub_sat(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT:    [[ELT_SAT_I:%.*]] = tail call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[ELT_SAT_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
 //
 v128_t test_u8x16_sub_sat(v128_t a, v128_t b) {
   return wasm_u8x16_sub_sat(a, b);
 }
 
-// CHECK-LABEL: @test_i8x16_min(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.smin.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
-// CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_i8x16_min(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT:    [[ELT_MIN_I:%.*]] = tail call <16 x i8> @llvm.smin.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[ELT_MIN_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
 //
 v128_t test_i8x16_min(v128_t a, v128_t b) {
   return wasm_i8x16_min(a, b);
 }
 
-// CHECK-LABEL: @test_u8x16_min(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.umin.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
-// CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_u8x16_min(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT:    [[ELT_MIN_I:%.*]] = tail call <16 x i8> @llvm.umin.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[ELT_MIN_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
 //
 v128_t test_u8x16_min(v128_t a, v128_t b) {
   return wasm_u8x16_min(a, b);
 }
 
-// CHECK-LABEL: @test_i8x16_max(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.smax.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
-// CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_i8x16_max(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT:    [[ELT_MAX_I:%.*]] = tail call <16 x i8> @llvm.smax.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[ELT_MAX_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
 //
 v128_t test_i8x16_max(v128_t a, v128_t b) {
   return wasm_i8x16_max(a, b);
 }
 
-// CHECK-LABEL: @test_u8x16_max(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.umax.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
-// CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_u8x16_max(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT:    [[ELT_MAX_I:%.*]] = tail call <16 x i8> @llvm.umax.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[ELT_MAX_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
 //
 v128_t test_u8x16_max(v128_t a, v128_t b) {
   return wasm_u8x16_max(a, b);
 }
 
-// CHECK-LABEL: @test_u8x16_avgr(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
+// CHECK-LABEL: define hidden <4 x i32> @test_u8x16_avgr(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.avgr.unsigned.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
@@ -1757,9 +1913,10 @@ v128_t test_u8x16_avgr(v128_t a, v128_t b) {
   return wasm_u8x16_avgr(a, b);
 }
 
-// CHECK-LABEL: @test_i16x8_abs(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_i16x8_abs(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16>
 // CHECK-NEXT:    [[ABS_I:%.*]] = tail call <8 x i16> @llvm.abs.v8i16(<8 x i16> [[TMP0]], i1 false)
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> [[ABS_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
@@ -1768,9 +1925,10 @@ v128_t test_i16x8_abs(v128_t a) {
   return wasm_i16x8_abs(a);
 }
 
-// CHECK-LABEL: @test_i16x8_neg(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_i16x8_neg(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16>
 // CHECK-NEXT:    [[SUB_I:%.*]] = sub <8 x i16> zeroinitializer, [[TMP0]]
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> [[SUB_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
@@ -1779,9 +1937,10 @@ v128_t test_i16x8_neg(v128_t a) {
   return wasm_i16x8_neg(a);
 }
 
-// CHECK-LABEL: @test_i16x8_all_true(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
+// CHECK-LABEL: define hidden zeroext i1 @test_i16x8_all_true(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16>
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.wasm.alltrue.v8i16(<8 x i16> [[TMP0]])
 // CHECK-NEXT:    [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP1]], 0
 // CHECK-NEXT:    ret i1 [[TOBOOL_I]]
@@ -1790,9 +1949,10 @@ bool test_i16x8_all_true(v128_t a) {
   return wasm_i16x8_all_true(a);
 }
 
-// CHECK-LABEL: @test_i16x8_bitmask(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
+// CHECK-LABEL: define hidden i32 @test_i16x8_bitmask(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16>
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.wasm.bitmask.v8i16(<8 x i16> [[TMP0]])
 // CHECK-NEXT:    ret i32 [[TMP1]]
 //
@@ -1800,10 +1960,11 @@ uint32_t test_i16x8_bitmask(v128_t a) {
   return wasm_i16x8_bitmask(a);
 }
 
-// CHECK-LABEL: @test_i16x8_shl(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
-// CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[B:%.*]] to i16
+// CHECK-LABEL: define hidden <4 x i32> @test_i16x8_shl(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], i32 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16>
+// CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[B]] to i16
 // CHECK-NEXT:    [[TMP2:%.*]] = and i16 [[TMP1]], 15
 // CHECK-NEXT:    [[TMP3:%.*]] = insertelement <8 x i16> poison, i16 [[TMP2]], i64 0
 // CHECK-NEXT:    [[SH_PROM_I:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> poison, <8 x i32> zeroinitializer
@@ -1815,10 +1976,11 @@ v128_t test_i16x8_shl(v128_t a, uint32_t b) {
   return wasm_i16x8_shl(a, b);
 }
 
-// CHECK-LABEL: @test_i16x8_shr(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
-// CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[B:%.*]] to i16
+// CHECK-LABEL: define hidden <4 x i32> @test_i16x8_shr(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], i32 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16>
+// CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[B]] to i16
 // CHECK-NEXT:    [[TMP2:%.*]] = and i16 [[TMP1]], 15
 // CHECK-NEXT:    [[TMP3:%.*]] = insertelement <8 x i16> poison, i16 [[TMP2]], i64 0
 // CHECK-NEXT:    [[SH_PROM_I:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> poison, <8 x i32> zeroinitializer
@@ -1830,10 +1992,11 @@ v128_t test_i16x8_shr(v128_t a, uint32_t b) {
   return wasm_i16x8_shr(a, b);
 }
 
-// CHECK-LABEL: @test_u16x8_shr(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
-// CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[B:%.*]] to i16
+// CHECK-LABEL: define hidden <4 x i32> @test_u16x8_shr(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], i32 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16>
+// CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[B]] to i16
 // CHECK-NEXT:    [[TMP2:%.*]] = and i16 [[TMP1]], 15
 // CHECK-NEXT:    [[TMP3:%.*]] = insertelement <8 x i16> poison, i16 [[TMP2]], i64 0
 // CHECK-NEXT:    [[SH_PROM_I:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> poison, <8 x i32> zeroinitializer
@@ -1845,10 +2008,11 @@ v128_t test_u16x8_shr(v128_t a, uint32_t b) {
   return wasm_u16x8_shr(a, b);
 }
 
-// CHECK-LABEL: @test_i16x8_add(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_i16x8_add(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <8 x i16>
 // CHECK-NEXT:    [[ADD_I:%.*]] = add <8 x i16> [[TMP1]], [[TMP0]]
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[ADD_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
@@ -1857,34 +2021,37 @@ v128_t test_i16x8_add(v128_t a, v128_t b) {
   return wasm_i16x8_add(a, b);
 }
 
-// CHECK-LABEL: @test_i16x8_add_sat(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
-// CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_i16x8_add_sat(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <8 x i16>
+// CHECK-NEXT:    [[ELT_SAT_I:%.*]] = tail call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[ELT_SAT_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
 //
 v128_t test_i16x8_add_sat(v128_t a, v128_t b) {
   return wasm_i16x8_add_sat(a, b);
 }
 
-// CHECK-LABEL: @test_u16x8_add_sat(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
-// CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_u16x8_add_sat(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <8 x i16>
+// CHECK-NEXT:    [[ELT_SAT_I:%.*]] = tail call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[ELT_SAT_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
 //
 v128_t test_u16x8_add_sat(v128_t a, v128_t b) {
   return wasm_u16x8_add_sat(a, b);
 }
 
-// CHECK-LABEL: @test_i16x8_sub(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_i16x8_sub(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <8 x i16>
 // CHECK-NEXT:    [[SUB_I:%.*]] = sub <8 x i16> [[TMP0]], [[TMP1]]
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[SUB_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
@@ -1893,34 +2060,37 @@ v128_t test_i16x8_sub(v128_t a, v128_t b) {
   return wasm_i16x8_sub(a, b);
 }
 
-// CHECK-LABEL: @test_i16x8_sub_sat(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
-// CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_i16x8_sub_sat(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <8 x i16>
+// CHECK-NEXT:    [[ELT_SAT_I:%.*]] = tail call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[ELT_SAT_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
 //
 v128_t test_i16x8_sub_sat(v128_t a, v128_t b) {
   return wasm_i16x8_sub_sat(a, b);
 }
 
-// CHECK-LABEL: @test_u16x8_sub_sat(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
-// CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_u16x8_sub_sat(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <8 x i16>
+// CHECK-NEXT:    [[ELT_SAT_I:%.*]] = tail call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[ELT_SAT_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
 //
 v128_t test_u16x8_sub_sat(v128_t a, v128_t b) {
   return wasm_u16x8_sub_sat(a, b);
 }
 
-// CHECK-LABEL: @test_i16x8_mul(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_i16x8_mul(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <8 x i16>
 // CHECK-NEXT:    [[MUL_I:%.*]] = mul <8 x i16> [[TMP1]], [[TMP0]]
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[MUL_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
@@ -1929,58 +2099,63 @@ v128_t test_i16x8_mul(v128_t a, v128_t b) {
   return wasm_i16x8_mul(a, b);
 }
 
-// CHECK-LABEL: @test_i16x8_min(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.smin.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
-// CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_i16x8_min(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <8 x i16>
+// CHECK-NEXT:    [[ELT_MIN_I:%.*]] = tail call <8 x i16> @llvm.smin.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[ELT_MIN_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
 //
 v128_t test_i16x8_min(v128_t a, v128_t b) {
   return wasm_i16x8_min(a, b);
 }
 
-// CHECK-LABEL: @test_u16x8_min(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.umin.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
-// CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_u16x8_min(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <8 x i16>
+// CHECK-NEXT:    [[ELT_MIN_I:%.*]] = tail call <8 x i16> @llvm.umin.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[ELT_MIN_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
 //
 v128_t test_u16x8_min(v128_t a, v128_t b) {
   return wasm_u16x8_min(a, b);
 }
 
-// CHECK-LABEL: @test_i16x8_max(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.smax.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
-// CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_i16x8_max(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <8 x i16>
+// CHECK-NEXT:    [[ELT_MAX_I:%.*]] = tail call <8 x i16> @llvm.smax.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[ELT_MAX_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
 //
 v128_t test_i16x8_max(v128_t a, v128_t b) {
   return wasm_i16x8_max(a, b);
 }
 
-// CHECK-LABEL: @test_u16x8_max(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.umax.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
-// CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_u16x8_max(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <8 x i16>
+// CHECK-NEXT:    [[ELT_MAX_I:%.*]] = tail call <8 x i16> @llvm.umax.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[ELT_MAX_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
 //
 v128_t test_u16x8_max(v128_t a, v128_t b) {
   return wasm_u16x8_max(a, b);
 }
 
-// CHECK-LABEL: @test_u16x8_avgr(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
+// CHECK-LABEL: define hidden <4 x i32> @test_u16x8_avgr(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <8 x i16>
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.wasm.avgr.unsigned.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
@@ -1989,27 +2164,30 @@ v128_t test_u16x8_avgr(v128_t a, v128_t b) {
   return wasm_u16x8_avgr(a, b);
 }
 
-// CHECK-LABEL: @test_i32x4_abs(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[ABS_I:%.*]] = tail call <4 x i32> @llvm.abs.v4i32(<4 x i32> [[A:%.*]], i1 false)
+// CHECK-LABEL: define hidden noundef range(i32 0, -2147483647) <4 x i32> @test_i32x4_abs(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[ABS_I:%.*]] = tail call <4 x i32> @llvm.abs.v4i32(<4 x i32> [[A]], i1 false)
 // CHECK-NEXT:    ret <4 x i32> [[ABS_I]]
 //
 v128_t test_i32x4_abs(v128_t a) {
   return wasm_i32x4_abs(a);
 }
 
-// CHECK-LABEL: @test_i32x4_neg(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SUB_I:%.*]] = sub <4 x i32> zeroinitializer, [[A:%.*]]
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_i32x4_neg(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[SUB_I:%.*]] = sub <4 x i32> zeroinitializer, [[A]]
 // CHECK-NEXT:    ret <4 x i32> [[SUB_I]]
 //
 v128_t test_i32x4_neg(v128_t a) {
   return wasm_i32x4_neg(a);
 }
 
-// CHECK-LABEL: @test_i32x4_all_true(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.wasm.alltrue.v4i32(<4 x i32> [[A:%.*]])
+// CHECK-LABEL: define hidden zeroext i1 @test_i32x4_all_true(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.wasm.alltrue.v4i32(<4 x i32> [[A]])
 // CHECK-NEXT:    [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP0]], 0
 // CHECK-NEXT:    ret i1 [[TOBOOL_I]]
 //
@@ -2017,118 +2195,130 @@ bool test_i32x4_all_true(v128_t a) {
   return wasm_i32x4_all_true(a);
 }
 
-// CHECK-LABEL: @test_i32x4_bitmask(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.wasm.bitmask.v4i32(<4 x i32> [[A:%.*]])
+// CHECK-LABEL: define hidden i32 @test_i32x4_bitmask(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.wasm.bitmask.v4i32(<4 x i32> [[A]])
 // CHECK-NEXT:    ret i32 [[TMP0]]
 //
 uint32_t test_i32x4_bitmask(v128_t a) {
   return wasm_i32x4_bitmask(a);
 }
 
-// CHECK-LABEL: @test_i32x4_shl(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[AND_I:%.*]] = and i32 [[B:%.*]], 31
+// CHECK-LABEL: define hidden <4 x i32> @test_i32x4_shl(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], i32 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[AND_I:%.*]] = and i32 [[B]], 31
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <4 x i32> poison, i32 [[AND_I]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT_I:%.*]] = shufflevector <4 x i32> [[SPLAT_SPLATINSERT_I]], <4 x i32> poison, <4 x i32> zeroinitializer
-// CHECK-NEXT:    [[SHL_I:%.*]] = shl <4 x i32> [[A:%.*]], [[SPLAT_SPLAT_I]]
+// CHECK-NEXT:    [[SHL_I:%.*]] = shl <4 x i32> [[A]], [[SPLAT_SPLAT_I]]
 // CHECK-NEXT:    ret <4 x i32> [[SHL_I]]
 //
 v128_t test_i32x4_shl(v128_t a, uint32_t b) {
   return wasm_i32x4_shl(a, b);
 }
 
-// CHECK-LABEL: @test_i32x4_shr(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[AND_I:%.*]] = and i32 [[B:%.*]], 31
+// CHECK-LABEL: define hidden <4 x i32> @test_i32x4_shr(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], i32 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[AND_I:%.*]] = and i32 [[B]], 31
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <4 x i32> poison, i32 [[AND_I]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT_I:%.*]] = shufflevector <4 x i32> [[SPLAT_SPLATINSERT_I]], <4 x i32> poison, <4 x i32> zeroinitializer
-// CHECK-NEXT:    [[SHR_I:%.*]] = ashr <4 x i32> [[A:%.*]], [[SPLAT_SPLAT_I]]
+// CHECK-NEXT:    [[SHR_I:%.*]] = ashr <4 x i32> [[A]], [[SPLAT_SPLAT_I]]
 // CHECK-NEXT:    ret <4 x i32> [[SHR_I]]
 //
 v128_t test_i32x4_shr(v128_t a, uint32_t b) {
   return wasm_i32x4_shr(a, b);
 }
 
-// CHECK-LABEL: @test_u32x4_shr(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[AND_I:%.*]] = and i32 [[B:%.*]], 31
+// CHECK-LABEL: define hidden <4 x i32> @test_u32x4_shr(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], i32 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[AND_I:%.*]] = and i32 [[B]], 31
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <4 x i32> poison, i32 [[AND_I]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT_I:%.*]] = shufflevector <4 x i32> [[SPLAT_SPLATINSERT_I]], <4 x i32> poison, <4 x i32> zeroinitializer
-// CHECK-NEXT:    [[SHR_I:%.*]] = lshr <4 x i32> [[A:%.*]], [[SPLAT_SPLAT_I]]
+// CHECK-NEXT:    [[SHR_I:%.*]] = lshr <4 x i32> [[A]], [[SPLAT_SPLAT_I]]
 // CHECK-NEXT:    ret <4 x i32> [[SHR_I]]
 //
 v128_t test_u32x4_shr(v128_t a, uint32_t b) {
   return wasm_u32x4_shr(a, b);
 }
 
-// CHECK-LABEL: @test_i32x4_add(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[ADD_I:%.*]] = add <4 x i32> [[B:%.*]], [[A:%.*]]
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_i32x4_add(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[ADD_I:%.*]] = add <4 x i32> [[B]], [[A]]
 // CHECK-NEXT:    ret <4 x i32> [[ADD_I]]
 //
 v128_t test_i32x4_add(v128_t a, v128_t b) {
   return wasm_i32x4_add(a, b);
 }
 
-// CHECK-LABEL: @test_i32x4_sub(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SUB_I:%.*]] = sub <4 x i32> [[A:%.*]], [[B:%.*]]
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_i32x4_sub(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[SUB_I:%.*]] = sub <4 x i32> [[A]], [[B]]
 // CHECK-NEXT:    ret <4 x i32> [[SUB_I]]
 //
 v128_t test_i32x4_sub(v128_t a, v128_t b) {
   return wasm_i32x4_sub(a, b);
 }
 
-// CHECK-LABEL: @test_i32x4_mul(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[MUL_I:%.*]] = mul <4 x i32> [[B:%.*]], [[A:%.*]]
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_i32x4_mul(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[MUL_I:%.*]] = mul <4 x i32> [[B]], [[A]]
 // CHECK-NEXT:    ret <4 x i32> [[MUL_I]]
 //
 v128_t test_i32x4_mul(v128_t a, v128_t b) {
   return wasm_i32x4_mul(a, b);
 }
 
-// CHECK-LABEL: @test_i32x4_min(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]])
-// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_i32x4_min(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[ELT_MIN_I:%.*]] = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[A]], <4 x i32> [[B]])
+// CHECK-NEXT:    ret <4 x i32> [[ELT_MIN_I]]
 //
 v128_t test_i32x4_min(v128_t a, v128_t b) {
   return wasm_i32x4_min(a, b);
 }
 
-// CHECK-LABEL: @test_u32x4_min(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <4 x i32> @llvm.umin.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]])
-// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_u32x4_min(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[ELT_MIN_I:%.*]] = tail call <4 x i32> @llvm.umin.v4i32(<4 x i32> [[A]], <4 x i32> [[B]])
+// CHECK-NEXT:    ret <4 x i32> [[ELT_MIN_I]]
 //
 v128_t test_u32x4_min(v128_t a, v128_t b) {
   return wasm_u32x4_min(a, b);
 }
 
-// CHECK-LABEL: @test_i32x4_max(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]])
-// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_i32x4_max(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[ELT_MAX_I:%.*]] = tail call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[A]], <4 x i32> [[B]])
+// CHECK-NEXT:    ret <4 x i32> [[ELT_MAX_I]]
 //
 v128_t test_i32x4_max(v128_t a, v128_t b) {
   return wasm_i32x4_max(a, b);
 }
 
-// CHECK-LABEL: @test_u32x4_max(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <4 x i32> @llvm.umax.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]])
-// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_u32x4_max(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[ELT_MAX_I:%.*]] = tail call <4 x i32> @llvm.umax.v4i32(<4 x i32> [[A]], <4 x i32> [[B]])
+// CHECK-NEXT:    ret <4 x i32> [[ELT_MAX_I]]
 //
 v128_t test_u32x4_max(v128_t a, v128_t b) {
   return wasm_u32x4_max(a, b);
 }
 
-// CHECK-LABEL: @test_i32x4_dot_i16x8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
+// CHECK-LABEL: define hidden <4 x i32> @test_i32x4_dot_i16x8(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <8 x i16>
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.wasm.dot(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
 //
@@ -2136,9 +2326,10 @@ v128_t test_i32x4_dot_i16x8(v128_t a, v128_t b) {
   return wasm_i32x4_dot_i16x8(a, b);
 }
 
-// CHECK-LABEL: @test_i64x2_abs(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_i64x2_abs(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x i64>
 // CHECK-NEXT:    [[ABS_I:%.*]] = tail call <2 x i64> @llvm.abs.v2i64(<2 x i64> [[TMP0]], i1 false)
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[ABS_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
@@ -2147,9 +2338,10 @@ v128_t test_i64x2_abs(v128_t a) {
   return wasm_i64x2_abs(a);
 }
 
-// CHECK-LABEL: @test_i64x2_neg(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_i64x2_neg(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x i64>
 // CHECK-NEXT:    [[SUB_I:%.*]] = sub <2 x i64> zeroinitializer, [[TMP0]]
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[SUB_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
@@ -2158,9 +2350,10 @@ v128_t test_i64x2_neg(v128_t a) {
   return wasm_i64x2_neg(a);
 }
 
-// CHECK-LABEL: @test_i64x2_all_true(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
+// CHECK-LABEL: define hidden zeroext i1 @test_i64x2_all_true(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x i64>
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.wasm.alltrue.v2i64(<2 x i64> [[TMP0]])
 // CHECK-NEXT:    [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP1]], 0
 // CHECK-NEXT:    ret i1 [[TOBOOL_I]]
@@ -2169,9 +2362,10 @@ bool test_i64x2_all_true(v128_t a) {
   return wasm_i64x2_all_true(a);
 }
 
-// CHECK-LABEL: @test_i64x2_bitmask(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
+// CHECK-LABEL: define hidden i32 @test_i64x2_bitmask(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x i64>
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.wasm.bitmask.v2i64(<2 x i64> [[TMP0]])
 // CHECK-NEXT:    ret i32 [[TMP1]]
 //
@@ -2179,10 +2373,11 @@ uint32_t test_i64x2_bitmask(v128_t a) {
   return wasm_i64x2_bitmask(a);
 }
 
-// CHECK-LABEL: @test_i64x2_shl(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
-// CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[B:%.*]], 63
+// CHECK-LABEL: define hidden <4 x i32> @test_i64x2_shl(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], i32 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x i64>
+// CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[B]], 63
 // CHECK-NEXT:    [[AND_I:%.*]] = zext nneg i32 [[TMP1]] to i64
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i64> poison, i64 [[AND_I]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT_I:%.*]] = shufflevector <2 x i64> [[SPLAT_SPLATINSERT_I]], <2 x i64> poison, <2 x i32> zeroinitializer
@@ -2194,10 +2389,11 @@ v128_t test_i64x2_shl(v128_t a, uint32_t b) {
   return wasm_i64x2_shl(a, b);
 }
 
-// CHECK-LABEL: @test_i64x2_shr(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
-// CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[B:%.*]], 63
+// CHECK-LABEL: define hidden <4 x i32> @test_i64x2_shr(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], i32 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x i64>
+// CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[B]], 63
 // CHECK-NEXT:    [[AND_I:%.*]] = zext nneg i32 [[TMP1]] to i64
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i64> poison, i64 [[AND_I]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT_I:%.*]] = shufflevector <2 x i64> [[SPLAT_SPLATINSERT_I]], <2 x i64> poison, <2 x i32> zeroinitializer
@@ -2209,10 +2405,11 @@ v128_t test_i64x2_shr(v128_t a, uint32_t b) {
   return wasm_i64x2_shr(a, b);
 }
 
-// CHECK-LABEL: @test_u64x2_shr(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
-// CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[B:%.*]], 63
+// CHECK-LABEL: define hidden <4 x i32> @test_u64x2_shr(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], i32 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x i64>
+// CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[B]], 63
 // CHECK-NEXT:    [[AND_I:%.*]] = zext nneg i32 [[TMP1]] to i64
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i64> poison, i64 [[AND_I]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT_I:%.*]] = shufflevector <2 x i64> [[SPLAT_SPLATINSERT_I]], <2 x i64> poison, <2 x i32> zeroinitializer
@@ -2224,10 +2421,11 @@ v128_t test_u64x2_shr(v128_t a, uint32_t b) {
   return wasm_u64x2_shr(a, b);
 }
 
-// CHECK-LABEL: @test_i64x2_add(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x i64>
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_i64x2_add(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x i64>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <2 x i64>
 // CHECK-NEXT:    [[ADD_I:%.*]] = add <2 x i64> [[TMP1]], [[TMP0]]
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[ADD_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
@@ -2236,10 +2434,11 @@ v128_t test_i64x2_add(v128_t a, v128_t b) {
   return wasm_i64x2_add(a, b);
 }
 
-// CHECK-LABEL: @test_i64x2_sub(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x i64>
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_i64x2_sub(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x i64>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <2 x i64>
 // CHECK-NEXT:    [[SUB_I:%.*]] = sub <2 x i64> [[TMP0]], [[TMP1]]
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[SUB_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
@@ -2248,10 +2447,11 @@ v128_t test_i64x2_sub(v128_t a, v128_t b) {
   return wasm_i64x2_sub(a, b);
 }
 
-// CHECK-LABEL: @test_i64x2_mul(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x i64>
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_i64x2_mul(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x i64>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <2 x i64>
 // CHECK-NEXT:    [[MUL_I:%.*]] = mul <2 x i64> [[TMP1]], [[TMP0]]
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[MUL_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
@@ -2260,9 +2460,10 @@ v128_t test_i64x2_mul(v128_t a, v128_t b) {
   return wasm_i64x2_mul(a, b);
 }
 
-// CHECK-LABEL: @test_f32x4_abs(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_f32x4_abs(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <4 x float>
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]])
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
@@ -2271,9 +2472,10 @@ v128_t test_f32x4_abs(v128_t a) {
   return wasm_f32x4_abs(a);
 }
 
-// CHECK-LABEL: @test_f32x4_neg(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_f32x4_neg(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <4 x float>
 // CHECK-NEXT:    [[FNEG_I:%.*]] = fneg <4 x float> [[TMP0]]
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x float> [[FNEG_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
@@ -2282,9 +2484,10 @@ v128_t test_f32x4_neg(v128_t a) {
   return wasm_f32x4_neg(a);
 }
 
-// CHECK-LABEL: @test_f32x4_sqrt(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_f32x4_sqrt(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <4 x float>
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]])
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
@@ -2293,9 +2496,10 @@ v128_t test_f32x4_sqrt(v128_t a) {
   return wasm_f32x4_sqrt(a);
 }
 
-// CHECK-LABEL: @test_f32x4_ceil(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_f32x4_ceil(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <4 x float>
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.ceil.v4f32(<4 x float> [[TMP0]])
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
@@ -2304,9 +2508,10 @@ v128_t test_f32x4_ceil(v128_t a) {
   return wasm_f32x4_ceil(a);
 }
 
-// CHECK-LABEL: @test_f32x4_floor(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_f32x4_floor(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <4 x float>
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.floor.v4f32(<4 x float> [[TMP0]])
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
@@ -2315,9 +2520,10 @@ v128_t test_f32x4_floor(v128_t a) {
   return wasm_f32x4_floor(a);
 }
 
-// CHECK-LABEL: @test_f32x4_trunc(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_f32x4_trunc(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <4 x float>
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.trunc.v4f32(<4 x float> [[TMP0]])
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
@@ -2326,9 +2532,10 @@ v128_t test_f32x4_trunc(v128_t a) {
   return wasm_f32x4_trunc(a);
 }
 
-// CHECK-LABEL: @test_f32x4_nearest(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_f32x4_nearest(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <4 x float>
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.nearbyint.v4f32(<4 x float> [[TMP0]])
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
@@ -2337,10 +2544,11 @@ v128_t test_f32x4_nearest(v128_t a) {
   return wasm_f32x4_nearest(a);
 }
 
-// CHECK-LABEL: @test_f32x4_add(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_f32x4_add(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <4 x float>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <4 x float>
 // CHECK-NEXT:    [[ADD_I:%.*]] = fadd <4 x float> [[TMP0]], [[TMP1]]
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x float> [[ADD_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
@@ -2349,10 +2557,11 @@ v128_t test_f32x4_add(v128_t a, v128_t b) {
   return wasm_f32x4_add(a, b);
 }
 
-// CHECK-LABEL: @test_f32x4_sub(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_f32x4_sub(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <4 x float>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <4 x float>
 // CHECK-NEXT:    [[SUB_I:%.*]] = fsub <4 x float> [[TMP0]], [[TMP1]]
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x float> [[SUB_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
@@ -2361,10 +2570,11 @@ v128_t test_f32x4_sub(v128_t a, v128_t b) {
   return wasm_f32x4_sub(a, b);
 }
 
-// CHECK-LABEL: @test_f32x4_mul(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_f32x4_mul(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <4 x float>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <4 x float>
 // CHECK-NEXT:    [[MUL_I:%.*]] = fmul <4 x float> [[TMP0]], [[TMP1]]
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x float> [[MUL_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
@@ -2373,10 +2583,11 @@ v128_t test_f32x4_mul(v128_t a, v128_t b) {
   return wasm_f32x4_mul(a, b);
 }
 
-// CHECK-LABEL: @test_f32x4_div(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_f32x4_div(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <4 x float>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <4 x float>
 // CHECK-NEXT:    [[DIV_I:%.*]] = fdiv <4 x float> [[TMP0]], [[TMP1]]
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x float> [[DIV_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
@@ -2385,10 +2596,11 @@ v128_t test_f32x4_div(v128_t a, v128_t b) {
   return wasm_f32x4_div(a, b);
 }
 
-// CHECK-LABEL: @test_f32x4_min(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_f32x4_min(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <4 x float>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <4 x float>
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x float> @llvm.minimum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
@@ -2397,10 +2609,11 @@ v128_t test_f32x4_min(v128_t a, v128_t b) {
   return wasm_f32x4_min(a, b);
 }
 
-// CHECK-LABEL: @test_f32x4_max(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_f32x4_max(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <4 x float>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <4 x float>
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x float> @llvm.maximum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
@@ -2409,10 +2622,11 @@ v128_t test_f32x4_max(v128_t a, v128_t b) {
   return wasm_f32x4_max(a, b);
 }
 
-// CHECK-LABEL: @test_f32x4_pmin(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
+// CHECK-LABEL: define hidden <4 x i32> @test_f32x4_pmin(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <4 x float>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <4 x float>
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x float> @llvm.wasm.pmin.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
@@ -2421,10 +2635,11 @@ v128_t test_f32x4_pmin(v128_t a, v128_t b) {
   return wasm_f32x4_pmin(a, b);
 }
 
-// CHECK-LABEL: @test_f32x4_pmax(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
+// CHECK-LABEL: define hidden <4 x i32> @test_f32x4_pmax(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <4 x float>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <4 x float>
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x float> @llvm.wasm.pmax.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
@@ -2433,9 +2648,10 @@ v128_t test_f32x4_pmax(v128_t a, v128_t b) {
   return wasm_f32x4_pmax(a, b);
 }
 
-// CHECK-LABEL: @test_f64x2_abs(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_f64x2_abs(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x double>
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.fabs.v2f64(<2 x double> [[TMP0]])
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
@@ -2444,9 +2660,10 @@ v128_t test_f64x2_abs(v128_t a) {
   return wasm_f64x2_abs(a);
 }
 
-// CHECK-LABEL: @test_f64x2_neg(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_f64x2_neg(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x double>
 // CHECK-NEXT:    [[FNEG_I:%.*]] = fneg <2 x double> [[TMP0]]
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x double> [[FNEG_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
@@ -2455,9 +2672,10 @@ v128_t test_f64x2_neg(v128_t a) {
   return wasm_f64x2_neg(a);
 }
 
-// CHECK-LABEL: @test_f64x2_sqrt(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_f64x2_sqrt(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x double>
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.sqrt.v2f64(<2 x double> [[TMP0]])
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
@@ -2466,9 +2684,10 @@ v128_t test_f64x2_sqrt(v128_t a) {
   return wasm_f64x2_sqrt(a);
 }
 
-// CHECK-LABEL: @test_f64x2_ceil(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_f64x2_ceil(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x double>
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.ceil.v2f64(<2 x double> [[TMP0]])
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
@@ -2477,9 +2696,10 @@ v128_t test_f64x2_ceil(v128_t a) {
   return wasm_f64x2_ceil(a);
 }
 
-// CHECK-LABEL: @test_f64x2_floor(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_f64x2_floor(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x double>
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.floor.v2f64(<2 x double> [[TMP0]])
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
@@ -2488,9 +2708,10 @@ v128_t test_f64x2_floor(v128_t a) {
   return wasm_f64x2_floor(a);
 }
 
-// CHECK-LABEL: @test_f64x2_trunc(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_f64x2_trunc(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x double>
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.trunc.v2f64(<2 x double> [[TMP0]])
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
@@ -2499,9 +2720,10 @@ v128_t test_f64x2_trunc(v128_t a) {
   return wasm_f64x2_trunc(a);
 }
 
-// CHECK-LABEL: @test_f64x2_nearest(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_f64x2_nearest(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x double>
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.nearbyint.v2f64(<2 x double> [[TMP0]])
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
@@ -2510,10 +2732,11 @@ v128_t test_f64x2_nearest(v128_t a) {
   return wasm_f64x2_nearest(a);
 }
 
-// CHECK-LABEL: @test_f64x2_add(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_f64x2_add(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x double>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <2 x double>
 // CHECK-NEXT:    [[ADD_I:%.*]] = fadd <2 x double> [[TMP0]], [[TMP1]]
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x double> [[ADD_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
@@ -2522,10 +2745,11 @@ v128_t test_f64x2_add(v128_t a, v128_t b) {
   return wasm_f64x2_add(a, b);
 }
 
-// CHECK-LABEL: @test_f64x2_sub(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_f64x2_sub(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x double>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <2 x double>
 // CHECK-NEXT:    [[SUB_I:%.*]] = fsub <2 x double> [[TMP0]], [[TMP1]]
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x double> [[SUB_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
@@ -2534,10 +2758,11 @@ v128_t test_f64x2_sub(v128_t a, v128_t b) {
   return wasm_f64x2_sub(a, b);
 }
 
-// CHECK-LABEL: @test_f64x2_mul(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_f64x2_mul(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x double>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <2 x double>
 // CHECK-NEXT:    [[MUL_I:%.*]] = fmul <2 x double> [[TMP0]], [[TMP1]]
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x double> [[MUL_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
@@ -2546,10 +2771,11 @@ v128_t test_f64x2_mul(v128_t a, v128_t b) {
   return wasm_f64x2_mul(a, b);
 }
 
-// CHECK-LABEL: @test_f64x2_div(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_f64x2_div(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x double>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <2 x double>
 // CHECK-NEXT:    [[DIV_I:%.*]] = fdiv <2 x double> [[TMP0]], [[TMP1]]
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x double> [[DIV_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
@@ -2558,10 +2784,11 @@ v128_t test_f64x2_div(v128_t a, v128_t b) {
   return wasm_f64x2_div(a, b);
 }
 
-// CHECK-LABEL: @test_f64x2_min(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_f64x2_min(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x double>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <2 x double>
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x double> @llvm.minimum.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
@@ -2570,10 +2797,11 @@ v128_t test_f64x2_min(v128_t a, v128_t b) {
   return wasm_f64x2_min(a, b);
 }
 
-// CHECK-LABEL: @test_f64x2_max(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_f64x2_max(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x double>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <2 x double>
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x double> @llvm.maximum.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
@@ -2582,10 +2810,11 @@ v128_t test_f64x2_max(v128_t a, v128_t b) {
   return wasm_f64x2_max(a, b);
 }
 
-// CHECK-LABEL: @test_f64x2_pmin(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
+// CHECK-LABEL: define hidden <4 x i32> @test_f64x2_pmin(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x double>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <2 x double>
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x double> @llvm.wasm.pmin.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
@@ -2594,10 +2823,11 @@ v128_t test_f64x2_pmin(v128_t a, v128_t b) {
   return wasm_f64x2_pmin(a, b);
 }
 
-// CHECK-LABEL: @test_f64x2_pmax(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
+// CHECK-LABEL: define hidden <4 x i32> @test_f64x2_pmax(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x double>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <2 x double>
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x double> @llvm.wasm.pmax.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
@@ -2606,9 +2836,10 @@ v128_t test_f64x2_pmax(v128_t a, v128_t b) {
   return wasm_f64x2_pmax(a, b);
 }
 
-// CHECK-LABEL: @test_i32x4_trunc_sat_f32x4(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_i32x4_trunc_sat_f32x4(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <4 x float>
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> [[TMP0]])
 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
 //
@@ -2616,9 +2847,10 @@ v128_t test_i32x4_trunc_sat_f32x4(v128_t a) {
   return wasm_i32x4_trunc_sat_f32x4(a);
 }
 
-// CHECK-LABEL: @test_u32x4_trunc_sat_f32x4(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_u32x4_trunc_sat_f32x4(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <4 x float>
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> [[TMP0]])
 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
 //
@@ -2626,9 +2858,10 @@ v128_t test_u32x4_trunc_sat_f32x4(v128_t a) {
   return wasm_u32x4_trunc_sat_f32x4(a);
 }
 
-// CHECK-LABEL: @test_f32x4_convert_i32x4(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[CONV_I:%.*]] = sitofp <4 x i32> [[A:%.*]] to <4 x float>
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_f32x4_convert_i32x4(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[CONV_I:%.*]] = sitofp <4 x i32> [[A]] to <4 x float>
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x float> [[CONV_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
 //
@@ -2636,9 +2869,10 @@ v128_t test_f32x4_convert_i32x4(v128_t a) {
   return wasm_f32x4_convert_i32x4(a);
 }
 
-// CHECK-LABEL: @test_f32x4_convert_u32x4(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[CONV_I:%.*]] = uitofp <4 x i32> [[A:%.*]] to <4 x float>
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_f32x4_convert_u32x4(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[CONV_I:%.*]] = uitofp <4 x i32> [[A]] to <4 x float>
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x float> [[CONV_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
 //
@@ -2646,9 +2880,10 @@ v128_t test_f32x4_convert_u32x4(v128_t a) {
   return wasm_f32x4_convert_u32x4(a);
 }
 
-// CHECK-LABEL: @test_f64x2_convert_low_i32x4(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VECINIT2_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> poison, <2 x i32> <i32 0, i32 1>
+// CHECK-LABEL: define hidden <4 x i32> @test_f64x2_convert_low_i32x4(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VECINIT2_I:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> poison, <2 x i32> <i32 0, i32 1>
 // CHECK-NEXT:    [[CONV_I:%.*]] = sitofp <2 x i32> [[VECINIT2_I]] to <2 x double>
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x double> [[CONV_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
@@ -2657,9 +2892,10 @@ v128_t test_f64x2_convert_low_i32x4(v128_t a) {
   return wasm_f64x2_convert_low_i32x4(a);
 }
 
-// CHECK-LABEL: @test_f64x2_convert_low_u32x4(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VECINIT2_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> poison, <2 x i32> <i32 0, i32 1>
+// CHECK-LABEL: define hidden <4 x i32> @test_f64x2_convert_low_u32x4(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VECINIT2_I:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> poison, <2 x i32> <i32 0, i32 1>
 // CHECK-NEXT:    [[CONV_I:%.*]] = uitofp <2 x i32> [[VECINIT2_I]] to <2 x double>
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x double> [[CONV_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
@@ -2668,9 +2904,10 @@ v128_t test_f64x2_convert_low_u32x4(v128_t a) {
   return wasm_f64x2_convert_low_u32x4(a);
 }
 
-// CHECK-LABEL: @test_i32x4_trunc_sat_f64x2_zero(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_i32x4_trunc_sat_f64x2_zero(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x double>
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i32> @llvm.fptosi.sat.v2i32.v2f64(<2 x double> [[TMP0]])
 // CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
@@ -2679,9 +2916,10 @@ v128_t test_i32x4_trunc_sat_f64x2_zero(v128_t a) {
   return wasm_i32x4_trunc_sat_f64x2_zero(a);
 }
 
-// CHECK-LABEL: @test_u32x4_trunc_sat_f64x2_zero(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_u32x4_trunc_sat_f64x2_zero(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x double>
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i32> @llvm.fptoui.sat.v2i32.v2f64(<2 x double> [[TMP0]])
 // CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
@@ -2690,9 +2928,10 @@ v128_t test_u32x4_trunc_sat_f64x2_zero(v128_t a) {
   return wasm_u32x4_trunc_sat_f64x2_zero(a);
 }
 
-// CHECK-LABEL: @test_f32x4_demote_f64x2_zero(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
+// CHECK-LABEL: define hidden noundef <4 x i32> @test_f32x4_demote_f64x2_zero(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x double>
 // CHECK-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 // CHECK-NEXT:    [[CONV_I:%.*]] = fptrunc <4 x double> [[SHUFFLE_I]] to <4 x float>
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x float> [[CONV_I]] to <4 x i32>
@@ -2702,9 +2941,10 @@ v128_t test_f32x4_demote_f64x2_zero(v128_t a) {
   return wasm_f32x4_demote_f64x2_zero(a);
 }
 
-// CHECK-LABEL: @test_f64x2_promote_low_f32x4(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
+// CHECK-LABEL: define hidden <4 x i32> @test_f64x2_promote_low_f32x4(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <4 x float>
 // CHECK-NEXT:    [[VECINIT2_I:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
 // CHECK-NEXT:    [[CONV_I:%.*]] = fpext <2 x float> [[VECINIT2_I]] to <2 x double>
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x double> [[CONV_I]] to <4 x i32>
@@ -2714,10 +2954,11 @@ v128_t test_f64x2_promote_low_f32x4(v128_t a) {
   return wasm_f64x2_promote_low_f32x4(a);
 }
 
-// CHECK-LABEL: @test_i8x16_shuffle(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
+// CHECK-LABEL: define hidden <4 x i32> @test_i8x16_shuffle(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.shuffle(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0)
 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
@@ -2726,10 +2967,11 @@ v128_t test_i8x16_shuffle(v128_t a, v128_t b) {
   return wasm_i8x16_shuffle(a, b, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
 }
 
-// CHECK-LABEL: @test_i16x8_shuffle(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
+// CHECK-LABEL: define hidden <4 x i32> @test_i16x8_shuffle(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.shuffle(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 14, i32 15, i32 12, i32 13, i32 10, i32 11, i32 8, i32 9, i32 6, i32 7, i32 4, i32 5, i32 2, i32 3, i32 0, i32 1)
 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
@@ -2738,10 +2980,11 @@ v128_t test_i16x8_shuffle(v128_t a, v128_t b) {
   return wasm_i16x8_shuffle(a, b, 7, 6, 5, 4, 3, 2, 1, 0);
 }
 
-// CHECK-LABEL: @test_i32x4_shuffle(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
+// CHECK-LABEL: define hidden <4 x i32> @test_i32x4_shuffle(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.shuffle(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3)
 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
@@ -2750,10 +2993,11 @@ v128_t test_i32x4_shuffle(v128_t a, v128_t b) {
   return wasm_i32x4_shuffle(a, b, 3, 2, 1, 0);
 }
 
-// CHECK-LABEL: @test_i64x2_shuffle(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
+// CHECK-LABEL: define hidden <4 x i32> @test_i64x2_shuffle(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.shuffle(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7)
 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
@@ -2762,10 +3006,11 @@ v128_t test_i64x2_shuffle(v128_t a, v128_t b) {
   return wasm_i64x2_shuffle(a, b, 1, 0);
 }
 
-// CHECK-LABEL: @test_i8x16_swizzle(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
+// CHECK-LABEL: define hidden <4 x i32> @test_i8x16_swizzle(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.swizzle(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
@@ -2774,10 +3019,11 @@ v128_t test_i8x16_swizzle(v128_t a, v128_t b) {
   return wasm_i8x16_swizzle(a, b);
 }
 
-// CHECK-LABEL: @test_i8x16_narrow_i16x8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
+// CHECK-LABEL: define hidden <4 x i32> @test_i8x16_narrow_i16x8(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <8 x i16>
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.narrow.signed.v16i8.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
@@ -2786,10 +3032,11 @@ v128_t test_i8x16_narrow_i16x8(v128_t a, v128_t b) {
   return wasm_i8x16_narrow_i16x8(a, b);
 }
 
-// CHECK-LABEL: @test_u8x16_narrow_i16x8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
+// CHECK-LABEL: define hidden <4 x i32> @test_u8x16_narrow_i16x8(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <8 x i16>
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.narrow.unsigned.v16i8.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
@@ -2798,9 +3045,10 @@ v128_t test_u8x16_narrow_i16x8(v128_t a, v128_t b) {
   return wasm_u8x16_narrow_i16x8(a, b);
 }
 
-// CHECK-LABEL: @test_i16x8_narrow_i32x4(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <8 x i16> @llvm.wasm.narrow.signed.v8i16.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]])
+// CHECK-LABEL: define hidden <4 x i32> @test_i16x8_narrow_i32x4(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <8 x i16> @llvm.wasm.narrow.signed.v8i16.v4i32(<4 x i32> [[A]], <4 x i32> [[B]])
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
 //
@@ -2808,9 +3056,10 @@ v128_t test_i16x8_narrow_i32x4(v128_t a, v128_t b) {
   return wasm_i16x8_narrow_i32x4(a, b);
 }
 
-// CHECK-LABEL: @test_u16x8_narrow_i32x4(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <8 x i16> @llvm.wasm.narrow.unsigned.v8i16.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]])
+// CHECK-LABEL: define hidden <4 x i32> @test_u16x8_narrow_i32x4(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <8 x i16> @llvm.wasm.narrow.unsigned.v8i16.v4i32(<4 x i32> [[A]], <4 x i32> [[B]])
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
 //
@@ -2818,9 +3067,10 @@ v128_t test_u16x8_narrow_i32x4(v128_t a, v128_t b) {
   return wasm_u16x8_narrow_i32x4(a, b);
 }
 
-// CHECK-LABEL: @test_i16x8_extend_low_i8x16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
+// CHECK-LABEL: define hidden <4 x i32> @test_i16x8_extend_low_i8x16(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
 // CHECK-NEXT:    [[VECINIT14_I:%.*]] = shufflevector <16 x i8> [[TMP0]], <16 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 // CHECK-NEXT:    [[CONV_I:%.*]] = sext <8 x i8> [[VECINIT14_I]] to <8 x i16>
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> [[CONV_I]] to <4 x i32>
@@ -2830,9 +3080,10 @@ v128_t test_i16x8_extend_low_i8x16(v128_t a) {
   return wasm_i16x8_extend_low_i8x16(a);
 }
 
-// CHECK-LABEL: @test_i16x8_extend_high_i8x16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
+// CHECK-LABEL: define hidden <4 x i32> @test_i16x8_extend_high_i8x16(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
 // CHECK-NEXT:    [[VECINIT14_I:%.*]] = shufflevector <16 x i8> [[TMP0]], <16 x i8> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 // CHECK-NEXT:    [[CONV_I:%.*]] = sext <8 x i8> [[VECINIT14_I]] to <8 x i16>
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> [[CONV_I]] to <4 x i32>
@@ -2842,9 +3093,10 @@ v128_t test_i16x8_extend_high_i8x16(v128_t a) {
   return wasm_i16x8_extend_high_i8x16(a);
 }
 
-// CHECK-LABEL: @test_u16x8_extend_low_u8x16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
+// CHECK-LABEL: define hidden <4 x i32> @test_u16x8_extend_low_u8x16(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
 // CHECK-NEXT:    [[VECINIT14_I:%.*]] = shufflevector <16 x i8> [[TMP0]], <16 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 // CHECK-NEXT:    [[CONV_I:%.*]] = zext <8 x i8> [[VECINIT14_I]] to <8 x i16>
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> [[CONV_I]] to <4 x i32>
@@ -2854,9 +3106,10 @@ v128_t test_u16x8_extend_low_u8x16(v128_t a) {
   return wasm_u16x8_extend_low_u8x16(a);
 }
 
-// CHECK-LABEL: @test_u16x8_extend_high_u8x16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
+// CHECK-LABEL: define hidden <4 x i32> @test_u16x8_extend_high_u8x16(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
 // CHECK-NEXT:    [[VECINIT14_I:%.*]] = shufflevector <16 x i8> [[TMP0]], <16 x i8> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 // CHECK-NEXT:    [[CONV_I:%.*]] = zext <8 x i8> [[VECINIT14_I]] to <8 x i16>
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> [[CONV_I]] to <4 x i32>
@@ -2866,9 +3119,10 @@ v128_t test_u16x8_extend_high_u8x16(v128_t a) {
   return wasm_u16x8_extend_high_u8x16(a);
 }
 
-// CHECK-LABEL: @test_i32x4_extend_low_i16x8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
+// CHECK-LABEL: define hidden range(i32 -32768, 32768) <4 x i32> @test_i32x4_extend_low_i16x8(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16>
 // CHECK-NEXT:    [[VECINIT6_I:%.*]] = shufflevector <8 x i16> [[TMP0]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 // CHECK-NEXT:    [[CONV_I:%.*]] = sext <4 x i16> [[VECINIT6_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[CONV_I]]
@@ -2877,9 +3131,10 @@ v128_t test_i32x4_extend_low_i16x8(v128_t a) {
   return wasm_i32x4_extend_low_i16x8(a);
 }
 
-// CHECK-LABEL: @test_i32x4_extend_high_i16x8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
+// CHECK-LABEL: define hidden range(i32 -32768, 32768) <4 x i32> @test_i32x4_extend_high_i16x8(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16>
 // CHECK-NEXT:    [[VECINIT6_I:%.*]] = shufflevector <8 x i16> [[TMP0]], <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
 // CHECK-NEXT:    [[CONV_I:%.*]] = sext <4 x i16> [[VECINIT6_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[CONV_I]]
@@ -2888,9 +3143,10 @@ v128_t test_i32x4_extend_high_i16x8(v128_t a) {
   return wasm_i32x4_extend_high_i16x8(a);
 }
 
-// CHECK-LABEL: @test_u32x4_extend_low_u16x8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
+// CHECK-LABEL: define hidden range(i32 0, 65536) <4 x i32> @test_u32x4_extend_low_u16x8(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16>
 // CHECK-NEXT:    [[VECINIT6_I:%.*]] = shufflevector <8 x i16> [[TMP0]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 // CHECK-NEXT:    [[CONV_I:%.*]] = zext <4 x i16> [[VECINIT6_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[CONV_I]]
@@ -2899,9 +3155,10 @@ v128_t test_u32x4_extend_low_u16x8(v128_t a) {
   return wasm_u32x4_extend_low_u16x8(a);
 }
 
-// CHECK-LABEL: @test_u32x4_extend_high_u16x8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
+// CHECK-LABEL: define hidden range(i32 0, 65536) <4 x i32> @test_u32x4_extend_high_u16x8(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16>
 // CHECK-NEXT:    [[VECINIT6_I:%.*]] = shufflevector <8 x i16> [[TMP0]], <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
 // CHECK-NEXT:    [[CONV_I:%.*]] = zext <4 x i16> [[VECINIT6_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[CONV_I]]
@@ -2910,9 +3167,10 @@ v128_t test_u32x4_extend_high_u16x8(v128_t a) {
   return wasm_u32x4_extend_high_u16x8(a);
 }
 
-// CHECK-LABEL: @test_i64x2_extend_low_i32x4(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VECINIT2_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> poison, <2 x i32> <i32 0, i32 1>
+// CHECK-LABEL: define hidden <4 x i32> @test_i64x2_extend_low_i32x4(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VECINIT2_I:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> poison, <2 x i32> <i32 0, i32 1>
 // CHECK-NEXT:    [[CONV_I:%.*]] = sext <2 x i32> [[VECINIT2_I]] to <2 x i64>
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[CONV_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
@@ -2921,9 +3179,10 @@ v128_t test_i64x2_extend_low_i32x4(v128_t a) {
   return wasm_i64x2_extend_low_i32x4(a);
 }
 
-// CHECK-LABEL: @test_i64x2_extend_high_i32x4(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VECINIT2_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> poison, <2 x i32> <i32 2, i32 3>
+// CHECK-LABEL: define hidden <4 x i32> @test_i64x2_extend_high_i32x4(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VECINIT2_I:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> poison, <2 x i32> <i32 2, i32 3>
 // CHECK-NEXT:    [[CONV_I:%.*]] = sext <2 x i32> [[VECINIT2_I]] to <2 x i64>
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[CONV_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
@@ -2932,9 +3191,10 @@ v128_t test_i64x2_extend_high_i32x4(v128_t a) {
   return wasm_i64x2_extend_high_i32x4(a);
 }
 
-// CHECK-LABEL: @test_u64x2_extend_low_u32x4(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VECINIT2_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> poison, <2 x i32> <i32 0, i32 1>
+// CHECK-LABEL: define hidden <4 x i32> @test_u64x2_extend_low_u32x4(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VECINIT2_I:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> poison, <2 x i32> <i32 0, i32 1>
 // CHECK-NEXT:    [[CONV_I:%.*]] = zext <2 x i32> [[VECINIT2_I]] to <2 x i64>
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[CONV_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
@@ -2943,9 +3203,10 @@ v128_t test_u64x2_extend_low_u32x4(v128_t a) {
   return wasm_u64x2_extend_low_u32x4(a);
 }
 
-// CHECK-LABEL: @test_u64x2_extend_high_u32x4(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VECINIT2_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> poison, <2 x i32> <i32 2, i32 3>
+// CHECK-LABEL: define hidden <4 x i32> @test_u64x2_extend_high_u32x4(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VECINIT2_I:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> poison, <2 x i32> <i32 2, i32 3>
 // CHECK-NEXT:    [[CONV_I:%.*]] = zext <2 x i32> [[VECINIT2_I]] to <2 x i64>
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[CONV_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
@@ -2954,9 +3215,10 @@ v128_t test_u64x2_extend_high_u32x4(v128_t a) {
   return wasm_u64x2_extend_high_u32x4(a);
 }
 
-// CHECK-LABEL: @test_i16x8_extadd_pairwise_i8x16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
+// CHECK-LABEL: define hidden <4 x i32> @test_i16x8_extadd_pairwise_i8x16(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.wasm.extadd.pairwise.signed.v8i16(<16 x i8> [[TMP0]])
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
@@ -2965,9 +3227,10 @@ v128_t test_i16x8_extadd_pairwise_i8x16(v128_t a) {
   return wasm_i16x8_extadd_pairwise_i8x16(a);
 }
 
-// CHECK-LABEL: @test_u16x8_extadd_pairwise_u8x16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
+// CHECK-LABEL: define hidden <4 x i32> @test_u16x8_extadd_pairwise_u8x16(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.wasm.extadd.pairwise.unsigned.v8i16(<16 x i8> [[TMP0]])
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
@@ -2976,9 +3239,10 @@ v128_t test_u16x8_extadd_pairwise_u8x16(v128_t a) {
   return wasm_u16x8_extadd_pairwise_u8x16(a);
 }
 
-// CHECK-LABEL: @test_i32x4_extadd_pairwise_i16x8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
+// CHECK-LABEL: define hidden <4 x i32> @test_i32x4_extadd_pairwise_i16x8(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16>
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.wasm.extadd.pairwise.signed.v4i32(<8 x i16> [[TMP0]])
 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
 //
@@ -2986,9 +3250,10 @@ v128_t test_i32x4_extadd_pairwise_i16x8(v128_t a) {
   return wasm_i32x4_extadd_pairwise_i16x8(a);
 }
 
-// CHECK-LABEL: @test_u32x4_extadd_pairwise_u16x8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
+// CHECK-LABEL: define hidden <4 x i32> @test_u32x4_extadd_pairwise_u16x8(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16>
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.wasm.extadd.pairwise.unsigned.v4i32(<8 x i16> [[TMP0]])
 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
 //
@@ -2996,12 +3261,13 @@ v128_t test_u32x4_extadd_pairwise_u16x8(v128_t a) {
   return wasm_u32x4_extadd_pairwise_u16x8(a);
 }
 
-// CHECK-LABEL: @test_i16x8_extmul_low_i8x16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
+// CHECK-LABEL: define hidden <4 x i32> @test_i16x8_extmul_low_i8x16(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
 // CHECK-NEXT:    [[VECINIT14_I2_I:%.*]] = shufflevector <16 x i8> [[TMP0]], <16 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 // CHECK-NEXT:    [[CONV_I3_I:%.*]] = sext <8 x i8> [[VECINIT14_I2_I]] to <8 x i16>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
 // CHECK-NEXT:    [[VECINIT14_I_I:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 // CHECK-NEXT:    [[CONV_I_I:%.*]] = sext <8 x i8> [[VECINIT14_I_I]] to <8 x i16>
 // CHECK-NEXT:    [[MUL_I:%.*]] = mul nsw <8 x i16> [[CONV_I_I]], [[CONV_I3_I]]
@@ -3012,12 +3278,13 @@ v128_t test_i16x8_extmul_low_i8x16(v128_t a, v128_t b) {
   return wasm_i16x8_extmul_low_i8x16(a, b);
 }
 
-// CHECK-LABEL: @test_i16x8_extmul_high_i8x16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
+// CHECK-LABEL: define hidden <4 x i32> @test_i16x8_extmul_high_i8x16(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
 // CHECK-NEXT:    [[VECINIT14_I2_I:%.*]] = shufflevector <16 x i8> [[TMP0]], <16 x i8> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 // CHECK-NEXT:    [[CONV_I3_I:%.*]] = sext <8 x i8> [[VECINIT14_I2_I]] to <8 x i16>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
 // CHECK-NEXT:    [[VECINIT14_I_I:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 // CHECK-NEXT:    [[CONV_I_I:%.*]] = sext <8 x i8> [[VECINIT14_I_I]] to <8 x i16>
 // CHECK-NEXT:    [[MUL_I:%.*]] = mul nsw <8 x i16> [[CONV_I_I]], [[CONV_I3_I]]
@@ -3028,12 +3295,13 @@ v128_t test_i16x8_extmul_high_i8x16(v128_t a, v128_t b) {
   return wasm_i16x8_extmul_high_i8x16(a, b);
 }
 
-// CHECK-LABEL: @test_u16x8_extmul_low_u8x16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
+// CHECK-LABEL: define hidden <4 x i32> @test_u16x8_extmul_low_u8x16(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
 // CHECK-NEXT:    [[VECINIT14_I2_I:%.*]] = shufflevector <16 x i8> [[TMP0]], <16 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 // CHECK-NEXT:    [[CONV_I3_I:%.*]] = zext <8 x i8> [[VECINIT14_I2_I]] to <8 x i16>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
 // CHECK-NEXT:    [[VECINIT14_I_I:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 // CHECK-NEXT:    [[CONV_I_I:%.*]] = zext <8 x i8> [[VECINIT14_I_I]] to <8 x i16>
 // CHECK-NEXT:    [[MUL_I:%.*]] = mul nuw <8 x i16> [[CONV_I_I]], [[CONV_I3_I]]
@@ -3044,12 +3312,13 @@ v128_t test_u16x8_extmul_low_u8x16(v128_t a, v128_t b) {
   return wasm_u16x8_extmul_low_u8x16(a, b);
 }
 
-// CHECK-LABEL: @test_u16x8_extmul_high_u8x16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
+// CHECK-LABEL: define hidden <4 x i32> @test_u16x8_extmul_high_u8x16(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
 // CHECK-NEXT:    [[VECINIT14_I2_I:%.*]] = shufflevector <16 x i8> [[TMP0]], <16 x i8> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 // CHECK-NEXT:    [[CONV_I3_I:%.*]] = zext <8 x i8> [[VECINIT14_I2_I]] to <8 x i16>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
 // CHECK-NEXT:    [[VECINIT14_I_I:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 // CHECK-NEXT:    [[CONV_I_I:%.*]] = zext <8 x i8> [[VECINIT14_I_I]] to <8 x i16>
 // CHECK-NEXT:    [[MUL_I:%.*]] = mul nuw <8 x i16> [[CONV_I_I]], [[CONV_I3_I]]
@@ -3060,12 +3329,13 @@ v128_t test_u16x8_extmul_high_u8x16(v128_t a, v128_t b) {
   return wasm_u16x8_extmul_high_u8x16(a, b);
 }
 
-// CHECK-LABEL: @test_i32x4_extmul_low_i16x8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
+// CHECK-LABEL: define hidden range(i32 -1073709056, 1073741825) <4 x i32> @test_i32x4_extmul_low_i16x8(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16>
 // CHECK-NEXT:    [[VECINIT6_I2_I:%.*]] = shufflevector <8 x i16> [[TMP0]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 // CHECK-NEXT:    [[CONV_I3_I:%.*]] = sext <4 x i16> [[VECINIT6_I2_I]] to <4 x i32>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <8 x i16>
 // CHECK-NEXT:    [[VECINIT6_I_I:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 // CHECK-NEXT:    [[CONV_I_I:%.*]] = sext <4 x i16> [[VECINIT6_I_I]] to <4 x i32>
 // CHECK-NEXT:    [[MUL_I:%.*]] = mul nsw <4 x i32> [[CONV_I_I]], [[CONV_I3_I]]
@@ -3075,12 +3345,13 @@ v128_t test_i32x4_extmul_low_i16x8(v128_t a, v128_t b) {
   return wasm_i32x4_extmul_low_i16x8(a, b);
 }
 
-// CHECK-LABEL: @test_i32x4_extmul_high_i16x8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
+// CHECK-LABEL: define hidden range(i32 -1073709056, 1073741825) <4 x i32> @test_i32x4_extmul_high_i16x8(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16>
 // CHECK-NEXT:    [[VECINIT6_I2_I:%.*]] = shufflevector <8 x i16> [[TMP0]], <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
 // CHECK-NEXT:    [[CONV_I3_I:%.*]] = sext <4 x i16> [[VECINIT6_I2_I]] to <4 x i32>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <8 x i16>
 // CHECK-NEXT:    [[VECINIT6_I_I:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
 // CHECK-NEXT:    [[CONV_I_I:%.*]] = sext <4 x i16> [[VECINIT6_I_I]] to <4 x i32>
 // CHECK-NEXT:    [[MUL_I:%.*]] = mul nsw <4 x i32> [[CONV_I_I]], [[CONV_I3_I]]
@@ -3090,12 +3361,13 @@ v128_t test_i32x4_extmul_high_i16x8(v128_t a, v128_t b) {
   return wasm_i32x4_extmul_high_i16x8(a, b);
 }
 
-// CHECK-LABEL: @test_u32x4_extmul_low_u16x8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
+// CHECK-LABEL: define hidden range(i32 0, -131070) <4 x i32> @test_u32x4_extmul_low_u16x8(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16>
 // CHECK-NEXT:    [[VECINIT6_I2_I:%.*]] = shufflevector <8 x i16> [[TMP0]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 // CHECK-NEXT:    [[CONV_I3_I:%.*]] = zext <4 x i16> [[VECINIT6_I2_I]] to <4 x i32>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <8 x i16>
 // CHECK-NEXT:    [[VECINIT6_I_I:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 // CHECK-NEXT:    [[CONV_I_I:%.*]] = zext <4 x i16> [[VECINIT6_I_I]] to <4 x i32>
 // CHECK-NEXT:    [[MUL_I:%.*]] = mul nuw <4 x i32> [[CONV_I_I]], [[CONV_I3_I]]
@@ -3105,12 +3377,13 @@ v128_t test_u32x4_extmul_low_u16x8(v128_t a, v128_t b) {
   return wasm_u32x4_extmul_low_u16x8(a, b);
 }
 
-// CHECK-LABEL: @test_u32x4_extmul_high_u16x8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
+// CHECK-LABEL: define hidden range(i32 0, -131070) <4 x i32> @test_u32x4_extmul_high_u16x8(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16>
 // CHECK-NEXT:    [[VECINIT6_I2_I:%.*]] = shufflevector <8 x i16> [[TMP0]], <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
 // CHECK-NEXT:    [[CONV_I3_I:%.*]] = zext <4 x i16> [[VECINIT6_I2_I]] to <4 x i32>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <8 x i16>
 // CHECK-NEXT:    [[VECINIT6_I_I:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
 // CHECK-NEXT:    [[CONV_I_I:%.*]] = zext <4 x i16> [[VECINIT6_I_I]] to <4 x i32>
 // CHECK-NEXT:    [[MUL_I:%.*]] = mul nuw <4 x i32> [[CONV_I_I]], [[CONV_I3_I]]
@@ -3120,11 +3393,12 @@ v128_t test_u32x4_extmul_high_u16x8(v128_t a, v128_t b) {
   return wasm_u32x4_extmul_high_u16x8(a, b);
 }
 
-// CHECK-LABEL: @test_i64x2_extmul_low_i32x4(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VECINIT2_I2_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> poison, <2 x i32> <i32 0, i32 1>
+// CHECK-LABEL: define hidden <4 x i32> @test_i64x2_extmul_low_i32x4(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VECINIT2_I2_I:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> poison, <2 x i32> <i32 0, i32 1>
 // CHECK-NEXT:    [[CONV_I3_I:%.*]] = sext <2 x i32> [[VECINIT2_I2_I]] to <2 x i64>
-// CHECK-NEXT:    [[VECINIT2_I_I:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> poison, <2 x i32> <i32 0, i32 1>
+// CHECK-NEXT:    [[VECINIT2_I_I:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> poison, <2 x i32> <i32 0, i32 1>
 // CHECK-NEXT:    [[CONV_I_I:%.*]] = sext <2 x i32> [[VECINIT2_I_I]] to <2 x i64>
 // CHECK-NEXT:    [[MUL_I:%.*]] = mul nsw <2 x i64> [[CONV_I_I]], [[CONV_I3_I]]
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[MUL_I]] to <4 x i32>
@@ -3134,11 +3408,12 @@ v128_t test_i64x2_extmul_low_i32x4(v128_t a, v128_t b) {
   return wasm_i64x2_extmul_low_i32x4(a, b);
 }
 
-// CHECK-LABEL: @test_i64x2_extmul_high_i32x4(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VECINIT2_I2_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> poison, <2 x i32> <i32 2, i32 3>
+// CHECK-LABEL: define hidden <4 x i32> @test_i64x2_extmul_high_i32x4(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VECINIT2_I2_I:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> poison, <2 x i32> <i32 2, i32 3>
 // CHECK-NEXT:    [[CONV_I3_I:%.*]] = sext <2 x i32> [[VECINIT2_I2_I]] to <2 x i64>
-// CHECK-NEXT:    [[VECINIT2_I_I:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> poison, <2 x i32> <i32 2, i32 3>
+// CHECK-NEXT:    [[VECINIT2_I_I:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> poison, <2 x i32> <i32 2, i32 3>
 // CHECK-NEXT:    [[CONV_I_I:%.*]] = sext <2 x i32> [[VECINIT2_I_I]] to <2 x i64>
 // CHECK-NEXT:    [[MUL_I:%.*]] = mul nsw <2 x i64> [[CONV_I_I]], [[CONV_I3_I]]
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[MUL_I]] to <4 x i32>
@@ -3148,11 +3423,12 @@ v128_t test_i64x2_extmul_high_i32x4(v128_t a, v128_t b) {
   return wasm_i64x2_extmul_high_i32x4(a, b);
 }
 
-// CHECK-LABEL: @test_u64x2_extmul_low_u32x4(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VECINIT2_I2_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> poison, <2 x i32> <i32 0, i32 1>
+// CHECK-LABEL: define hidden <4 x i32> @test_u64x2_extmul_low_u32x4(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VECINIT2_I2_I:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> poison, <2 x i32> <i32 0, i32 1>
 // CHECK-NEXT:    [[CONV_I3_I:%.*]] = zext <2 x i32> [[VECINIT2_I2_I]] to <2 x i64>
-// CHECK-NEXT:    [[VECINIT2_I_I:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> poison, <2 x i32> <i32 0, i32 1>
+// CHECK-NEXT:    [[VECINIT2_I_I:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> poison, <2 x i32> <i32 0, i32 1>
 // CHECK-NEXT:    [[CONV_I_I:%.*]] = zext <2 x i32> [[VECINIT2_I_I]] to <2 x i64>
 // CHECK-NEXT:    [[MUL_I:%.*]] = mul nuw <2 x i64> [[CONV_I_I]], [[CONV_I3_I]]
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[MUL_I]] to <4 x i32>
@@ -3162,11 +3438,12 @@ v128_t test_u64x2_extmul_low_u32x4(v128_t a, v128_t b) {
   return wasm_u64x2_extmul_low_u32x4(a, b);
 }
 
-// CHECK-LABEL: @test_u64x2_extmul_high_u32x4(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VECINIT2_I2_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> poison, <2 x i32> <i32 2, i32 3>
+// CHECK-LABEL: define hidden <4 x i32> @test_u64x2_extmul_high_u32x4(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VECINIT2_I2_I:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> poison, <2 x i32> <i32 2, i32 3>
 // CHECK-NEXT:    [[CONV_I3_I:%.*]] = zext <2 x i32> [[VECINIT2_I2_I]] to <2 x i64>
-// CHECK-NEXT:    [[VECINIT2_I_I:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> poison, <2 x i32> <i32 2, i32 3>
+// CHECK-NEXT:    [[VECINIT2_I_I:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> poison, <2 x i32> <i32 2, i32 3>
 // CHECK-NEXT:    [[CONV_I_I:%.*]] = zext <2 x i32> [[VECINIT2_I_I]] to <2 x i64>
 // CHECK-NEXT:    [[MUL_I:%.*]] = mul nuw <2 x i64> [[CONV_I_I]], [[CONV_I3_I]]
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[MUL_I]] to <4 x i32>
@@ -3176,10 +3453,11 @@ v128_t test_u64x2_extmul_high_u32x4(v128_t a, v128_t b) {
   return wasm_u64x2_extmul_high_u32x4(a, b);
 }
 
-// CHECK-LABEL: @test_i16x8_q15mulr_sat(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
+// CHECK-LABEL: define hidden <4 x i32> @test_i16x8_q15mulr_sat(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <8 x i16>
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.wasm.q15mulr.sat.signed(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
@@ -3187,3 +3465,8 @@ v128_t test_u64x2_extmul_high_u32x4(v128_t a, v128_t b) {
 v128_t test_i16x8_q15mulr_sat(v128_t a, v128_t b) {
   return wasm_i16x8_q15mulr_sat(a, b);
 }
+//.
+// CHECK: [[CHAR_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0}
+// CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0}
+// CHECK: [[META4]] = !{!"Simple C/C++ TBAA"}
+//.
diff --git a/clang/test/Lexer/cxx-features.cpp b/clang/test/Lexer/cxx-features.cpp
index 8c1867d..ced5bca 100644
--- a/clang/test/Lexer/cxx-features.cpp
+++ b/clang/test/Lexer/cxx-features.cpp
@@ -49,6 +49,10 @@
 #error "wrong value for __cpp_placeholder_variables"
 #endif
 
+#if check(trivial_relocatability, 202502, 202502, 202502, 202502, 202502, 202502, 202502)
+#error "wrong value for __cpp_trivial_relocatability"
+#endif
+
 // --- C++23 features ---
 
 #if check(auto_cast, 0, 0, 0, 0, 0, 202110, 202110)
diff --git a/clang/test/OpenMP/bug54082.c b/clang/test/OpenMP/bug54082.c
index bda4bd2..ef3e715 100644
--- a/clang/test/OpenMP/bug54082.c
+++ b/clang/test/OpenMP/bug54082.c
@@ -1,4 +1,4 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --prefix-filecheck-ir-name _
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --prefix-filecheck-ir-name _ --version 6
 // RUN: %clang_cc1 -fopenmp -O1 -x c -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK
 
 typedef enum omp_allocator_handle_t {
@@ -63,47 +63,47 @@ void foo() {
     (void)x;
   }
 }
-// CHECK-LABEL: define {{[^@]+}}@foo
-// CHECK-SAME: () local_unnamed_addr #[[ATTR0:[0-9]+]] {
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define void @foo(
+// CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[X_TRAITS:%.*]] = alloca [1 x %struct.omp_alloctrait_t], align 16
 // CHECK-NEXT:    [[X_ALLOC:%.*]] = alloca i64, align 8
 // CHECK-NEXT:    call void @llvm.lifetime.start.p0(ptr nonnull [[X_TRAITS]]) #[[ATTR5:[0-9]+]]
 // CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 16 dereferenceable(16) [[X_TRAITS]], ptr noundef nonnull align 16 dereferenceable(16) @__const.foo.x_traits, i64 16, i1 false)
 // CHECK-NEXT:    call void @llvm.lifetime.start.p0(ptr nonnull [[X_ALLOC]]) #[[ATTR5]]
 // CHECK-NEXT:    [[CALL:%.*]] = call i64 @omp_init_allocator(i64 noundef 0, i32 noundef 1, ptr noundef nonnull [[X_TRAITS]]) #[[ATTR5]]
-// CHECK-NEXT:    store i64 [[CALL]], ptr [[X_ALLOC]], align 8, !tbaa [[TBAA3:![0-9]+]]
+// CHECK-NEXT:    store i64 [[CALL]], ptr [[X_ALLOC]], align 8, !tbaa [[LONG_TBAA3:![0-9]+]]
 // CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @[[GLOB2:[0-9]+]], i32 1, ptr nonnull @foo.omp_outlined, ptr nonnull [[X_ALLOC]])
 // CHECK-NEXT:    call void @llvm.lifetime.end.p0(ptr nonnull [[X_ALLOC]]) #[[ATTR5]]
 // CHECK-NEXT:    call void @llvm.lifetime.end.p0(ptr nonnull [[X_TRAITS]]) #[[ATTR5]]
 // CHECK-NEXT:    ret void
 //
 //
-// CHECK-LABEL: define {{[^@]+}}@foo.omp_outlined
-// CHECK-SAME: (ptr noalias noundef readonly captures(none) [[DOTGLOBAL_TID_:%.*]], ptr noalias readnone captures(none) [[DOTBOUND_TID_:%.*]], ptr noundef nonnull readonly align 8 captures(none) dereferenceable(8) [[X_ALLOC:%.*]]) #[[ATTR4:[0-9]+]] {
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define internal void @foo.omp_outlined(
+// CHECK-SAME: ptr noalias noundef readonly captures(none) [[DOTGLOBAL_TID_:%.*]], ptr noalias readnone captures(none) [[DOTBOUND_TID_:%.*]], ptr noundef nonnull readonly align 8 captures(none) dereferenceable(8) [[X_ALLOC:%.*]]) #[[ATTR4:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[DOTOMP_LB:%.*]] = alloca i32, align 4
 // CHECK-NEXT:    [[DOTOMP_UB:%.*]] = alloca i32, align 4
 // CHECK-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
 // CHECK-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
 // CHECK-NEXT:    call void @llvm.lifetime.start.p0(ptr nonnull [[DOTOMP_LB]]) #[[ATTR5]]
-// CHECK-NEXT:    store i32 0, ptr [[DOTOMP_LB]], align 4, !tbaa [[TBAA7:![0-9]+]]
+// CHECK-NEXT:    store i32 0, ptr [[DOTOMP_LB]], align 4, !tbaa [[INT_TBAA7:![0-9]+]]
 // CHECK-NEXT:    call void @llvm.lifetime.start.p0(ptr nonnull [[DOTOMP_UB]]) #[[ATTR5]]
-// CHECK-NEXT:    store i32 1023, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    store i32 1023, ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA7]]
 // CHECK-NEXT:    call void @llvm.lifetime.start.p0(ptr nonnull [[DOTOMP_STRIDE]]) #[[ATTR5]]
-// CHECK-NEXT:    store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !tbaa [[INT_TBAA7]]
 // CHECK-NEXT:    call void @llvm.lifetime.start.p0(ptr nonnull [[DOTOMP_IS_LAST]]) #[[ATTR5]]
-// CHECK-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !tbaa [[TBAA7]]
-// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA7]]
-// CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr [[X_ALLOC]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !tbaa [[INT_TBAA7]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[INT_TBAA7]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr [[X_ALLOC]], align 8, !tbaa [[LONG_TBAA3]]
 // CHECK-NEXT:    [[CONV:%.*]] = inttoptr i64 [[TMP1]] to ptr
 // CHECK-NEXT:    [[DOTX__VOID_ADDR:%.*]] = tail call ptr @__kmpc_alloc(i32 [[TMP0]], i64 8, ptr [[CONV]])
 // CHECK-NEXT:    call void @__kmpc_for_static_init_4(ptr nonnull @[[GLOB1:[0-9]+]], i32 [[TMP0]], i32 34, ptr nonnull [[DOTOMP_IS_LAST]], ptr nonnull [[DOTOMP_LB]], ptr nonnull [[DOTOMP_UB]], ptr nonnull [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA7]]
 // CHECK-NEXT:    [[COND:%.*]] = call i32 @llvm.smin.i32(i32 [[TMP2]], i32 1023)
-// CHECK-NEXT:    store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA7]]
 // CHECK-NEXT:    call void @__kmpc_for_static_fini(ptr nonnull @[[GLOB1]], i32 [[TMP0]])
-// CHECK-NEXT:    [[TMP3:%.*]] = load i64, ptr [[X_ALLOC]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP3:%.*]] = load i64, ptr [[X_ALLOC]], align 8, !tbaa [[LONG_TBAA3]]
 // CHECK-NEXT:    [[CONV5:%.*]] = inttoptr i64 [[TMP3]] to ptr
 // CHECK-NEXT:    call void @__kmpc_free(i32 [[TMP0]], ptr [[DOTX__VOID_ADDR]], ptr [[CONV5]])
 // CHECK-NEXT:    call void @llvm.lifetime.end.p0(ptr nonnull [[DOTOMP_IS_LAST]]) #[[ATTR5]]
@@ -112,3 +112,11 @@ void foo() {
 // CHECK-NEXT:    call void @llvm.lifetime.end.p0(ptr nonnull [[DOTOMP_LB]]) #[[ATTR5]]
 // CHECK-NEXT:    ret void
 //
+//.
+// CHECK: [[LONG_TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0}
+// CHECK: [[META4]] = !{!"long", [[META5:![0-9]+]], i64 0}
+// CHECK: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0}
+// CHECK: [[META6]] = !{!"Simple C/C++ TBAA"}
+// CHECK: [[INT_TBAA7]] = !{[[META8:![0-9]+]], [[META8]], i64 0}
+// CHECK: [[META8]] = !{!"int", [[META5]], i64 0}
+//.
diff --git a/clang/test/OpenMP/bug56913.c b/clang/test/OpenMP/bug56913.c
index fad9e17..fa5e46d 100644
--- a/clang/test/OpenMP/bug56913.c
+++ b/clang/test/OpenMP/bug56913.c
@@ -1,4 +1,4 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --prefix-filecheck-ir-name _
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --prefix-filecheck-ir-name _ --version 6
 // RUN: %clang_cc1 -fopenmp-simd -O1 -x c -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK
 
 int j;
@@ -12,21 +12,31 @@ void loop(int n) {
     u = &j;
   }
 }
-// CHECK-LABEL: define {{[^@]+}}@loop
-// CHECK-SAME: (i32 noundef [[N:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define void @loop(
+// CHECK-SAME: i32 noundef [[N:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[J:%.*]] = alloca i32, align 4
 // CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[N]], 0
-// CHECK-NEXT:    br i1 [[CMP]], label [[SIMD_IF_THEN:%.*]], label [[SIMD_IF_END:%.*]]
-// CHECK:       simd.if.then:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr @j, align 4, !tbaa [[TBAA2:![0-9]+]]
+// CHECK-NEXT:    br i1 [[CMP]], label %[[SIMD_IF_THEN:.*]], label %[[SIMD_IF_END:.*]]
+// CHECK:       [[SIMD_IF_THEN]]:
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr @j, align 4, !tbaa [[INT_TBAA2:![0-9]+]]
 // CHECK-NEXT:    call void @llvm.lifetime.start.p0(ptr nonnull [[J]]) #[[ATTR2:[0-9]+]]
-// CHECK-NEXT:    store ptr [[J]], ptr @u, align 8, !tbaa [[TBAA6:![0-9]+]], !llvm.access.group [[ACC_GRP8:![0-9]+]]
+// CHECK-NEXT:    store ptr [[J]], ptr @u, align 8, !tbaa [[INTPTR_TBAA6:![0-9]+]], !llvm.access.group [[ACC_GRP9:![0-9]+]]
 // CHECK-NEXT:    [[INC_LE:%.*]] = add i32 [[TMP0]], [[N]]
-// CHECK-NEXT:    store i32 [[INC_LE]], ptr [[J]], align 4, !tbaa [[TBAA2]]
-// CHECK-NEXT:    store i32 [[INC_LE]], ptr @j, align 4, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store i32 [[INC_LE]], ptr [[J]], align 4, !tbaa [[INT_TBAA2]]
+// CHECK-NEXT:    store i32 [[INC_LE]], ptr @j, align 4, !tbaa [[INT_TBAA2]]
 // CHECK-NEXT:    call void @llvm.lifetime.end.p0(ptr nonnull [[J]]) #[[ATTR2]]
-// CHECK-NEXT:    br label [[SIMD_IF_END]]
-// CHECK:       simd.if.end:
+// CHECK-NEXT:    br label %[[SIMD_IF_END]]
+// CHECK:       [[SIMD_IF_END]]:
 // CHECK-NEXT:    ret void
 //
+//.
+// CHECK: [[INT_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0}
+// CHECK: [[META3]] = !{!"int", [[META4:![0-9]+]], i64 0}
+// CHECK: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0}
+// CHECK: [[META5]] = !{!"Simple C/C++ TBAA"}
+// CHECK: [[INTPTR_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0}
+// CHECK: [[META7]] = !{!"p1 int", [[META8:![0-9]+]], i64 0}
+// CHECK: [[META8]] = !{!"any pointer", [[META4]], i64 0}
+// CHECK: [[ACC_GRP9]] = distinct !{}
+//.
diff --git a/clang/test/OpenMP/bug57757.cpp b/clang/test/OpenMP/bug57757.cpp
index caf53a5b..5b61e14 100644
--- a/clang/test/OpenMP/bug57757.cpp
+++ b/clang/test/OpenMP/bug57757.cpp
@@ -1,4 +1,4 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --prefix-filecheck-ir-name _
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --prefix-filecheck-ir-name _ --version 6
 // RUN: %clang_cc1 -fopenmp -O1 -x c++ -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK
 
 template <class Function, class... Args>
@@ -14,42 +14,42 @@ void foo() {
   float b;
   run_task(bar, a, b);
 }
-// CHECK-LABEL: define {{[^@]+}}@_Z3foov
-// CHECK-SAME: () local_unnamed_addr #[[ATTR0:[0-9]+]] {
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define void @_Z3foov(
+// CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1:[0-9]+]])
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call ptr @__kmpc_omp_task_alloc(ptr nonnull @[[GLOB1]], i32 [[TMP0]], i32 0, i64 56, i64 1, ptr nonnull @.omp_task_entry.)
 // CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 40
-// CHECK-NEXT:    store ptr @_Z3barif, ptr [[TMP2]], align 8, !tbaa [[TBAA3:![0-9]+]]
+// CHECK-NEXT:    store ptr @_Z3barif, ptr [[TMP2]], align 8, !tbaa [[ANYPTR_TBAA3:![0-9]+]]
 // CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 16
-// CHECK-NEXT:    store i32 0, ptr [[TMP3]], align 8, !tbaa [[TBAA12:![0-9]+]]
+// CHECK-NEXT:    store i32 0, ptr [[TMP3]], align 8, !tbaa [[INT_TBAA12:![0-9]+]]
 // CHECK-NEXT:    [[TMP4:%.*]] = tail call i32 @__kmpc_omp_task(ptr nonnull @[[GLOB1]], i32 [[TMP0]], ptr [[TMP1]])
 // CHECK-NEXT:    ret void
 //
 //
-// CHECK-LABEL: define {{[^@]+}}@.omp_task_entry.
-// CHECK-SAME: (i32 noundef [[TMP0:%.*]], ptr noalias noundef [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] {
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define internal noundef i32 @.omp_task_entry.(
+// CHECK-SAME: i32 noundef [[TMP0:%.*]], ptr noalias noundef [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 16
 // CHECK-NEXT:    tail call void @llvm.experimental.noalias.scope.decl(metadata [[META13:![0-9]+]])
-// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !tbaa [[TBAA16:![0-9]+]], !alias.scope [[META13]], !noalias [[META17:![0-9]+]]
-// CHECK-NEXT:    switch i32 [[TMP3]], label [[DOTOMP_OUTLINED__EXIT:%.*]] [
-// CHECK-NEXT:      i32 0, label [[DOTUNTIED_JMP__I:%.*]]
-// CHECK-NEXT:      i32 1, label [[DOTUNTIED_NEXT__I:%.*]]
+// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !tbaa [[INT_TBAA16:![0-9]+]], !alias.scope [[META13]], !noalias [[META17:![0-9]+]]
+// CHECK-NEXT:    switch i32 [[TMP3]], [[DOTOMP_OUTLINED__EXIT:label %.*]] [
+// CHECK-NEXT:      i32 0, [[DOTUNTIED_JMP__I:label %.*]]
+// CHECK-NEXT:      i32 1, [[DOTUNTIED_NEXT__I:label %.*]]
 // CHECK-NEXT:    ]
-// CHECK:       .untied.jmp..i:
-// CHECK-NEXT:    store i32 1, ptr [[TMP2]], align 4, !tbaa [[TBAA16]], !alias.scope [[META13]], !noalias [[META17]]
+// CHECK:       [[_UNTIED_JMP__I:.*:]]
+// CHECK-NEXT:    store i32 1, ptr [[TMP2]], align 4, !tbaa [[INT_TBAA16]], !alias.scope [[META13]], !noalias [[META17]]
 // CHECK-NEXT:    [[TMP4:%.*]] = tail call i32 @__kmpc_omp_task(ptr nonnull @[[GLOB1]], i32 [[TMP0]], ptr nonnull [[TMP1]]), !noalias [[META13]]
-// CHECK-NEXT:    br label [[DOTOMP_OUTLINED__EXIT]]
-// CHECK:       .untied.next..i:
+// CHECK-NEXT:    br [[DOTOMP_OUTLINED__EXIT]]
+// CHECK:       [[_UNTIED_NEXT__I:.*:]]
 // CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 40
 // CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 52
 // CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 48
-// CHECK-NEXT:    [[TMP8:%.*]] = load ptr, ptr [[TMP5]], align 8, !tbaa [[TBAA19:![0-9]+]], !noalias [[META13]]
-// CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 8, !tbaa [[TBAA16]], !noalias [[META13]]
-// CHECK-NEXT:    [[TMP10:%.*]] = load float, ptr [[TMP6]], align 4, !tbaa [[TBAA20:![0-9]+]], !noalias [[META13]]
+// CHECK-NEXT:    [[TMP8:%.*]] = load ptr, ptr [[TMP5]], align 8, !tbaa [[ANYPTR_TBAA19:![0-9]+]], !noalias [[META13]]
+// CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 8, !tbaa [[INT_TBAA16]], !noalias [[META13]]
+// CHECK-NEXT:    [[TMP10:%.*]] = load float, ptr [[TMP6]], align 4, !tbaa [[FLOAT_TBAA20:![0-9]+]], !noalias [[META13]]
 // CHECK-NEXT:    tail call void [[TMP8]](i32 noundef [[TMP9]], float noundef [[TMP10]]) #[[ATTR2:[0-9]+]], !noalias [[META13]]
-// CHECK-NEXT:    br label [[DOTOMP_OUTLINED__EXIT]]
-// CHECK:       .omp_outlined..exit:
+// CHECK-NEXT:    br [[DOTOMP_OUTLINED__EXIT]]
+// CHECK:       [[_OMP_OUTLINED__EXIT:.*:]]
 // CHECK-NEXT:    ret i32 0
 //
diff --git a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp
index 20e344f..46c87eb 100644
--- a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp
+++ b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp
@@ -1,4 +1,4 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ --version 6
 // RUN: %clang_cc1 -no-enable-noundef-analysis -x c++ -O1 -disable-llvm-optzns -verify -fopenmp -internal-isystem %S/../Headers/Inputs/include -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
 // RUN: %clang_cc1 -no-enable-noundef-analysis -x c++ -O1 -disable-llvm-optzns -verify -fopenmp -internal-isystem %S/../Headers/Inputs/include -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -triple nvptx64-unknown-unknown -aux-triple powerpc64le-unknown-unknown  -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK1
 // RUN: %clang_cc1 -no-enable-noundef-analysis -x c++ -O1 -disable-llvm-optzns -verify -fopenmp -internal-isystem %S/../Headers/Inputs/include -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
@@ -30,30 +30,30 @@ void test() {
   complex_reduction<double>();
 }
 #endif
-// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIfEvv_l16
-// CHECK1-SAME: (ptr noalias [[DYN_PTR:%.*]]) #[[ATTR0:[0-9]+]] {
-// CHECK1-NEXT:  entry:
+// CHECK1-LABEL: define weak_odr protected ptx_kernel void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIfEvv_l16(
+// CHECK1-SAME: ptr noalias [[DYN_PTR:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK1-NEXT:  [[ENTRY:.*:]]
 // CHECK1-NEXT:    [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
 // CHECK1-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK1-NEXT:    store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8, !tbaa [[TBAA10:![0-9]+]]
+// CHECK1-NEXT:    store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8, !tbaa [[ANYPTR_TBAA6:![0-9]+]]
 // CHECK1-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIfEvv_l16_kernel_environment, ptr [[DYN_PTR]])
 // CHECK1-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-// CHECK1-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
-// CHECK1:       user_code.entry:
+// CHECK1-NEXT:    br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[WORKER_EXIT:.*]]
+// CHECK1:       [[USER_CODE_ENTRY]]:
 // CHECK1-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
 // CHECK1-NEXT:    store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK1-NEXT:    store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA15:![0-9]+]]
+// CHECK1-NEXT:    store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[INT_TBAA10:![0-9]+]]
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIfEvv_l16_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4:[0-9]+]]
 // CHECK1-NEXT:    call void @__kmpc_target_deinit()
 // CHECK1-NEXT:    ret void
-// CHECK1:       worker.exit:
+// CHECK1:       [[WORKER_EXIT]]:
 // CHECK1-NEXT:    ret void
 //
 //
-// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIfEvv_l16_omp_outlined
-// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] {
-// CHECK1-NEXT:  entry:
+// CHECK1-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIfEvv_l16_omp_outlined(
+// CHECK1-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] {
+// CHECK1-NEXT:  [[ENTRY:.*:]]
 // CHECK1-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
 // CHECK1-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 // CHECK1-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
@@ -66,82 +66,82 @@ void test() {
 // CHECK1-NEXT:    [[REF_TMP:%.*]] = alloca float, align 4
 // CHECK1-NEXT:    [[REF_TMP2:%.*]] = alloca float, align 4
 // CHECK1-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8
-// CHECK1-NEXT:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA17:![0-9]+]]
-// CHECK1-NEXT:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA17]]
+// CHECK1-NEXT:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[INTPTR_TBAA12:![0-9]+]]
+// CHECK1-NEXT:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[INTPTR_TBAA12]]
 // CHECK1-NEXT:    [[ISTART:%.*]] = call align 16 ptr @__kmpc_alloc_shared(i64 4)
 // CHECK1-NEXT:    [[IEND:%.*]] = call align 16 ptr @__kmpc_alloc_shared(i64 4)
 // CHECK1-NEXT:    [[PARTIAL_SUM:%.*]] = call align 16 ptr @__kmpc_alloc_shared(i64 8)
 // CHECK1-NEXT:    call void @llvm.lifetime.start.p0(ptr [[DOTOMP_IV]]) #[[ATTR4]]
 // CHECK1-NEXT:    call void @llvm.lifetime.start.p0(ptr [[DOTOMP_LB]]) #[[ATTR4]]
-// CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_LB]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_LB]], align 4, !tbaa [[INT_TBAA10]]
 // CHECK1-NEXT:    call void @llvm.lifetime.start.p0(ptr [[DOTOMP_UB]]) #[[ATTR4]]
-// CHECK1-NEXT:    store i32 99, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT:    store i32 99, ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA10]]
 // CHECK1-NEXT:    call void @llvm.lifetime.start.p0(ptr [[DOTOMP_STRIDE]]) #[[ATTR4]]
-// CHECK1-NEXT:    store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT:    store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !tbaa [[INT_TBAA10]]
 // CHECK1-NEXT:    call void @llvm.lifetime.start.p0(ptr [[DOTOMP_IS_LAST]]) #[[ATTR4]]
-// CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !tbaa [[INT_TBAA10]]
 // CHECK1-NEXT:    call void @llvm.lifetime.start.p0(ptr [[IB]]) #[[ATTR4]]
 // CHECK1-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4, !tbaa [[INT_TBAA10]]
 // CHECK1-NEXT:    call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK1-NEXT:    [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT:    [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA10]]
 // CHECK1-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99
-// CHECK1-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
-// CHECK1:       cond.true:
-// CHECK1-NEXT:    br label [[COND_END:%.*]]
-// CHECK1:       cond.false:
-// CHECK1-NEXT:    [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT:    br label [[COND_END]]
-// CHECK1:       cond.end:
-// CHECK1-NEXT:    [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK1-NEXT:    store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT:    store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
-// CHECK1:       omp.inner.for.cond:
-// CHECK1-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT:    br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// CHECK1:       [[COND_TRUE]]:
+// CHECK1-NEXT:    br label %[[COND_END:.*]]
+// CHECK1:       [[COND_FALSE]]:
+// CHECK1-NEXT:    [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA10]]
+// CHECK1-NEXT:    br label %[[COND_END]]
+// CHECK1:       [[COND_END]]:
+// CHECK1-NEXT:    [[COND:%.*]] = phi i32 [ 99, %[[COND_TRUE]] ], [ [[TMP3]], %[[COND_FALSE]] ]
+// CHECK1-NEXT:    store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA10]]
+// CHECK1-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !tbaa [[INT_TBAA10]]
+// CHECK1-NEXT:    store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA10]]
+// CHECK1-NEXT:    br label %[[OMP_INNER_FOR_COND:.*]]
+// CHECK1:       [[OMP_INNER_FOR_COND]]:
+// CHECK1-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA10]]
+// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA10]]
 // CHECK1-NEXT:    [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
-// CHECK1-NEXT:    br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]]
-// CHECK1:       omp.inner.for.cond.cleanup:
-// CHECK1-NEXT:    br label [[OMP_INNER_FOR_END:%.*]]
-// CHECK1:       omp.inner.for.body:
-// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT:    br i1 [[CMP1]], label %[[OMP_INNER_FOR_BODY:.*]], label %[[OMP_INNER_FOR_COND_CLEANUP:.*]]
+// CHECK1:       [[OMP_INNER_FOR_COND_CLEANUP]]:
+// CHECK1-NEXT:    br label %[[OMP_INNER_FOR_END:.*]]
+// CHECK1:       [[OMP_INNER_FOR_BODY]]:
+// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA10]]
 // CHECK1-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
 // CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK1-NEXT:    store i32 [[ADD]], ptr [[IB]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT:    store i32 [[ADD]], ptr [[IB]], align 4, !tbaa [[INT_TBAA10]]
 // CHECK1-NEXT:    call void @llvm.lifetime.start.p0(ptr [[REF_TMP]]) #[[ATTR4]]
-// CHECK1-NEXT:    store float 0.000000e+00, ptr [[REF_TMP]], align 4, !tbaa [[TBAA19:![0-9]+]]
+// CHECK1-NEXT:    store float 0.000000e+00, ptr [[REF_TMP]], align 4, !tbaa [[FLOAT_TBAA14:![0-9]+]]
 // CHECK1-NEXT:    call void @llvm.lifetime.start.p0(ptr [[REF_TMP2]]) #[[ATTR4]]
-// CHECK1-NEXT:    store float 0.000000e+00, ptr [[REF_TMP2]], align 4, !tbaa [[TBAA19]]
+// CHECK1-NEXT:    store float 0.000000e+00, ptr [[REF_TMP2]], align 4, !tbaa [[FLOAT_TBAA14]]
 // CHECK1-NEXT:    call void @_ZNSt7complexIfEC1ERKfS2_(ptr nonnull align 4 dereferenceable(8) [[PARTIAL_SUM]], ptr nonnull align 4 dereferenceable(4) [[REF_TMP]], ptr nonnull align 4 dereferenceable(4) [[REF_TMP2]]) #[[ATTR11:[0-9]+]]
 // CHECK1-NEXT:    call void @llvm.lifetime.end.p0(ptr [[REF_TMP2]]) #[[ATTR4]]
 // CHECK1-NEXT:    call void @llvm.lifetime.end.p0(ptr [[REF_TMP]]) #[[ATTR4]]
-// CHECK1-NEXT:    [[TMP8:%.*]] = load i32, ptr [[IB]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT:    [[TMP8:%.*]] = load i32, ptr [[IB]], align 4, !tbaa [[INT_TBAA10]]
 // CHECK1-NEXT:    [[MUL3:%.*]] = mul nsw i32 [[TMP8]], 4
-// CHECK1-NEXT:    store i32 [[MUL3]], ptr [[ISTART]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT:    [[TMP9:%.*]] = load i32, ptr [[IB]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT:    store i32 [[MUL3]], ptr [[ISTART]], align 4, !tbaa [[INT_TBAA10]]
+// CHECK1-NEXT:    [[TMP9:%.*]] = load i32, ptr [[IB]], align 4, !tbaa [[INT_TBAA10]]
 // CHECK1-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP9]], 1
 // CHECK1-NEXT:    [[MUL5:%.*]] = mul nsw i32 [[ADD4]], 4
-// CHECK1-NEXT:    store i32 [[MUL5]], ptr [[IEND]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT:    store i32 [[MUL5]], ptr [[IEND]], align 4, !tbaa [[INT_TBAA10]]
 // CHECK1-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
-// CHECK1-NEXT:    store ptr [[ISTART]], ptr [[TMP10]], align 8, !tbaa [[TBAA21:![0-9]+]]
+// CHECK1-NEXT:    store ptr [[ISTART]], ptr [[TMP10]], align 8, !tbaa [[ANYPTR_TBAA16:![0-9]+]]
 // CHECK1-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
-// CHECK1-NEXT:    store ptr [[IEND]], ptr [[TMP11]], align 8, !tbaa [[TBAA21]]
+// CHECK1-NEXT:    store ptr [[IEND]], ptr [[TMP11]], align 8, !tbaa [[ANYPTR_TBAA16]]
 // CHECK1-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
-// CHECK1-NEXT:    store ptr [[PARTIAL_SUM]], ptr [[TMP12]], align 8, !tbaa [[TBAA21]]
+// CHECK1-NEXT:    store ptr [[PARTIAL_SUM]], ptr [[TMP12]], align 8, !tbaa [[ANYPTR_TBAA16]]
 // CHECK1-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIfEvv_l16_omp_outlined_omp_outlined, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIfEvv_l16_omp_outlined_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 3)
-// CHECK1-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
-// CHECK1:       omp.body.continue:
-// CHECK1-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
-// CHECK1:       omp.inner.for.inc:
-// CHECK1-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT:    br label %[[OMP_BODY_CONTINUE:.*]]
+// CHECK1:       [[OMP_BODY_CONTINUE]]:
+// CHECK1-NEXT:    br label %[[OMP_INNER_FOR_INC:.*]]
+// CHECK1:       [[OMP_INNER_FOR_INC]]:
+// CHECK1-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA10]]
 // CHECK1-NEXT:    [[ADD6:%.*]] = add nsw i32 [[TMP13]], 1
-// CHECK1-NEXT:    store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT:    br label [[OMP_INNER_FOR_COND]]
-// CHECK1:       omp.inner.for.end:
-// CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
-// CHECK1:       omp.loop.exit:
+// CHECK1-NEXT:    store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA10]]
+// CHECK1-NEXT:    br label %[[OMP_INNER_FOR_COND]]
+// CHECK1:       [[OMP_INNER_FOR_END]]:
+// CHECK1-NEXT:    br label %[[OMP_LOOP_EXIT:.*]]
+// CHECK1:       [[OMP_LOOP_EXIT]]:
 // CHECK1-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]])
 // CHECK1-NEXT:    call void @llvm.lifetime.end.p0(ptr [[IB]]) #[[ATTR4]]
 // CHECK1-NEXT:    call void @llvm.lifetime.end.p0(ptr [[DOTOMP_IS_LAST]]) #[[ATTR4]]
@@ -155,15 +155,15 @@ void test() {
 // CHECK1-NEXT:    ret void
 //
 //
-// CHECK1-LABEL: define {{[^@]+}}@_ZNSt7complexIfEC1ERKfS2_
-// CHECK1-SAME: (ptr nonnull align 4 dereferenceable(8) [[THIS:%.*]], ptr nonnull align 4 dereferenceable(4) [[__RE:%.*]], ptr nonnull align 4 dereferenceable(4) [[__IM:%.*]]) unnamed_addr #[[ATTR5:[0-9]+]] comdat align 2 {
-// CHECK1-NEXT:  entry:
+// CHECK1-LABEL: define linkonce_odr hidden void @_ZNSt7complexIfEC1ERKfS2_(
+// CHECK1-SAME: ptr nonnull align 4 dereferenceable(8) [[THIS:%.*]], ptr nonnull align 4 dereferenceable(4) [[__RE:%.*]], ptr nonnull align 4 dereferenceable(4) [[__IM:%.*]]) unnamed_addr #[[ATTR5:[0-9]+]] comdat align 2 {
+// CHECK1-NEXT:  [[ENTRY:.*:]]
 // CHECK1-NEXT:    [[THIS_ADDR:%.*]] = alloca ptr, align 8
 // CHECK1-NEXT:    [[__RE_ADDR:%.*]] = alloca ptr, align 8
 // CHECK1-NEXT:    [[__IM_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8, !tbaa [[TBAA23:![0-9]+]]
-// CHECK1-NEXT:    store ptr [[__RE]], ptr [[__RE_ADDR]], align 8, !tbaa [[TBAA25:![0-9]+]]
-// CHECK1-NEXT:    store ptr [[__IM]], ptr [[__IM_ADDR]], align 8, !tbaa [[TBAA25]]
+// CHECK1-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8, !tbaa [[_ZTSST7COMPLEXIFEPTR_TBAA18:![0-9]+]]
+// CHECK1-NEXT:    store ptr [[__RE]], ptr [[__RE_ADDR]], align 8, !tbaa [[FLOATPTR_TBAA20:![0-9]+]]
+// CHECK1-NEXT:    store ptr [[__IM]], ptr [[__IM_ADDR]], align 8, !tbaa [[FLOATPTR_TBAA20]]
 // CHECK1-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[__RE_ADDR]], align 8
 // CHECK1-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[__IM_ADDR]], align 8
@@ -171,9 +171,9 @@ void test() {
 // CHECK1-NEXT:    ret void
 //
 //
-// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIfEvv_l16_omp_outlined_omp_outlined
-// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[ISTART:%.*]], ptr nonnull align 4 dereferenceable(4) [[IEND:%.*]], ptr nonnull align 4 dereferenceable(8) [[PARTIAL_SUM:%.*]]) #[[ATTR1]] {
-// CHECK1-NEXT:  entry:
+// CHECK1-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIfEvv_l16_omp_outlined_omp_outlined(
+// CHECK1-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[ISTART:%.*]], ptr nonnull align 4 dereferenceable(4) [[IEND:%.*]], ptr nonnull align 4 dereferenceable(8) [[PARTIAL_SUM:%.*]]) #[[ATTR1]] {
+// CHECK1-NEXT:  [[ENTRY:.*:]]
 // CHECK1-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
 // CHECK1-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 // CHECK1-NEXT:    [[ISTART_ADDR:%.*]] = alloca ptr, align 8
@@ -197,155 +197,155 @@ void test() {
 // CHECK1-NEXT:    [[REF_TMP15:%.*]] = alloca float, align 4
 // CHECK1-NEXT:    [[REF_TMP16:%.*]] = alloca float, align 4
 // CHECK1-NEXT:    [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8
-// CHECK1-NEXT:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA17]]
-// CHECK1-NEXT:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA17]]
-// CHECK1-NEXT:    store ptr [[ISTART]], ptr [[ISTART_ADDR]], align 8, !tbaa [[TBAA17]]
-// CHECK1-NEXT:    store ptr [[IEND]], ptr [[IEND_ADDR]], align 8, !tbaa [[TBAA17]]
-// CHECK1-NEXT:    store ptr [[PARTIAL_SUM]], ptr [[PARTIAL_SUM_ADDR]], align 8, !tbaa [[TBAA23]]
-// CHECK1-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ISTART_ADDR]], align 8, !tbaa [[TBAA17]]
-// CHECK1-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[IEND_ADDR]], align 8, !tbaa [[TBAA17]]
-// CHECK1-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[PARTIAL_SUM_ADDR]], align 8, !tbaa [[TBAA23]]
+// CHECK1-NEXT:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[INTPTR_TBAA12]]
+// CHECK1-NEXT:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[INTPTR_TBAA12]]
+// CHECK1-NEXT:    store ptr [[ISTART]], ptr [[ISTART_ADDR]], align 8, !tbaa [[INTPTR_TBAA12]]
+// CHECK1-NEXT:    store ptr [[IEND]], ptr [[IEND_ADDR]], align 8, !tbaa [[INTPTR_TBAA12]]
+// CHECK1-NEXT:    store ptr [[PARTIAL_SUM]], ptr [[PARTIAL_SUM_ADDR]], align 8, !tbaa [[_ZTSST7COMPLEXIFEPTR_TBAA18]]
+// CHECK1-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ISTART_ADDR]], align 8, !tbaa [[INTPTR_TBAA12]], !nonnull [[META22:![0-9]+]], !align [[META23:![0-9]+]]
+// CHECK1-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[IEND_ADDR]], align 8, !tbaa [[INTPTR_TBAA12]], !nonnull [[META22]], !align [[META23]]
+// CHECK1-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[PARTIAL_SUM_ADDR]], align 8, !tbaa [[_ZTSST7COMPLEXIFEPTR_TBAA18]], !nonnull [[META22]], !align [[META23]]
 // CHECK1-NEXT:    call void @llvm.lifetime.start.p0(ptr [[DOTOMP_IV]]) #[[ATTR4]]
 // CHECK1-NEXT:    call void @llvm.lifetime.start.p0(ptr [[DOTCAPTURE_EXPR_]]) #[[ATTR4]]
-// CHECK1-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT:    store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4, !tbaa [[INT_TBAA10]]
+// CHECK1-NEXT:    store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[INT_TBAA10]]
 // CHECK1-NEXT:    call void @llvm.lifetime.start.p0(ptr [[DOTCAPTURE_EXPR_1]]) #[[ATTR4]]
-// CHECK1-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT:    store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[INT_TBAA10]]
+// CHECK1-NEXT:    store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[INT_TBAA10]]
 // CHECK1-NEXT:    call void @llvm.lifetime.start.p0(ptr [[DOTCAPTURE_EXPR_2]]) #[[ATTR4]]
-// CHECK1-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[INT_TBAA10]]
+// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[INT_TBAA10]]
 // CHECK1-NEXT:    [[SUB:%.*]] = sub i32 [[TMP5]], [[TMP6]]
 // CHECK1-NEXT:    [[SUB3:%.*]] = sub i32 [[SUB]], 1
 // CHECK1-NEXT:    [[ADD:%.*]] = add i32 [[SUB3]], 1
 // CHECK1-NEXT:    [[DIV:%.*]] = udiv i32 [[ADD]], 1
 // CHECK1-NEXT:    [[SUB4:%.*]] = sub i32 [[DIV]], 1
-// CHECK1-NEXT:    store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT:    store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[INT_TBAA10]]
 // CHECK1-NEXT:    call void @llvm.lifetime.start.p0(ptr [[I]]) #[[ATTR4]]
-// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT:    store i32 [[TMP7]], ptr [[I]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[INT_TBAA10]]
+// CHECK1-NEXT:    store i32 [[TMP7]], ptr [[I]], align 4, !tbaa [[INT_TBAA10]]
 // CHECK1-NEXT:    call void @llvm.lifetime.end.p0(ptr [[I]]) #[[ATTR4]]
-// CHECK1-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[INT_TBAA10]]
+// CHECK1-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[INT_TBAA10]]
 // CHECK1-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP8]], [[TMP9]]
-// CHECK1-NEXT:    br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
-// CHECK1:       omp.precond.then:
+// CHECK1-NEXT:    br i1 [[CMP]], label %[[OMP_PRECOND_THEN:.*]], label %[[OMP_PRECOND_END:.*]]
+// CHECK1:       [[OMP_PRECOND_THEN]]:
 // CHECK1-NEXT:    call void @llvm.lifetime.start.p0(ptr [[DOTOMP_LB]]) #[[ATTR4]]
-// CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_LB]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_LB]], align 4, !tbaa [[INT_TBAA10]]
 // CHECK1-NEXT:    call void @llvm.lifetime.start.p0(ptr [[DOTOMP_UB]]) #[[ATTR4]]
-// CHECK1-NEXT:    [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT:    store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT:    [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[INT_TBAA10]]
+// CHECK1-NEXT:    store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA10]]
 // CHECK1-NEXT:    call void @llvm.lifetime.start.p0(ptr [[DOTOMP_STRIDE]]) #[[ATTR4]]
-// CHECK1-NEXT:    store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT:    store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !tbaa [[INT_TBAA10]]
 // CHECK1-NEXT:    call void @llvm.lifetime.start.p0(ptr [[DOTOMP_IS_LAST]]) #[[ATTR4]]
-// CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !tbaa [[INT_TBAA10]]
 // CHECK1-NEXT:    call void @llvm.lifetime.start.p0(ptr [[PARTIAL_SUM5]]) #[[ATTR4]]
 // CHECK1-NEXT:    call void @llvm.lifetime.start.p0(ptr [[REF_TMP]]) #[[ATTR4]]
-// CHECK1-NEXT:    store float 0.000000e+00, ptr [[REF_TMP]], align 4, !tbaa [[TBAA19]]
+// CHECK1-NEXT:    store float 0.000000e+00, ptr [[REF_TMP]], align 4, !tbaa [[FLOAT_TBAA14]]
 // CHECK1-NEXT:    call void @llvm.lifetime.start.p0(ptr [[REF_TMP6]]) #[[ATTR4]]
-// CHECK1-NEXT:    store float 0.000000e+00, ptr [[REF_TMP6]], align 4, !tbaa [[TBAA19]]
+// CHECK1-NEXT:    store float 0.000000e+00, ptr [[REF_TMP6]], align 4, !tbaa [[FLOAT_TBAA14]]
 // CHECK1-NEXT:    call void @_ZNSt7complexIfEC1ERKfS2_(ptr nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]], ptr nonnull align 4 dereferenceable(4) [[REF_TMP]], ptr nonnull align 4 dereferenceable(4) [[REF_TMP6]]) #[[ATTR11]]
 // CHECK1-NEXT:    call void @llvm.lifetime.end.p0(ptr [[REF_TMP6]]) #[[ATTR4]]
 // CHECK1-NEXT:    call void @llvm.lifetime.end.p0(ptr [[REF_TMP]]) #[[ATTR4]]
 // CHECK1-NEXT:    call void @llvm.lifetime.start.p0(ptr [[I7]]) #[[ATTR4]]
 // CHECK1-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT:    [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT:    [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4, !tbaa [[INT_TBAA10]]
 // CHECK1-NEXT:    call void @__kmpc_for_static_init_4u(ptr @[[GLOB3:[0-9]+]], i32 [[TMP12]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK1-NEXT:    br label [[OMP_DISPATCH_COND:%.*]]
-// CHECK1:       omp.dispatch.cond:
-// CHECK1-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT:    [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT:    br label %[[OMP_DISPATCH_COND:.*]]
+// CHECK1:       [[OMP_DISPATCH_COND]]:
+// CHECK1-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA10]]
+// CHECK1-NEXT:    [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[INT_TBAA10]]
 // CHECK1-NEXT:    [[CMP8:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]]
-// CHECK1-NEXT:    br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
-// CHECK1:       cond.true:
-// CHECK1-NEXT:    [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT:    br label [[COND_END:%.*]]
-// CHECK1:       cond.false:
-// CHECK1-NEXT:    [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT:    br label [[COND_END]]
-// CHECK1:       cond.end:
-// CHECK1-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ]
-// CHECK1-NEXT:    store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT:    [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT:    store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT:    [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT:    [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT:    br i1 [[CMP8]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// CHECK1:       [[COND_TRUE]]:
+// CHECK1-NEXT:    [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[INT_TBAA10]]
+// CHECK1-NEXT:    br label %[[COND_END:.*]]
+// CHECK1:       [[COND_FALSE]]:
+// CHECK1-NEXT:    [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA10]]
+// CHECK1-NEXT:    br label %[[COND_END]]
+// CHECK1:       [[COND_END]]:
+// CHECK1-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP15]], %[[COND_TRUE]] ], [ [[TMP16]], %[[COND_FALSE]] ]
+// CHECK1-NEXT:    store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA10]]
+// CHECK1-NEXT:    [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !tbaa [[INT_TBAA10]]
+// CHECK1-NEXT:    store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA10]]
+// CHECK1-NEXT:    [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA10]]
+// CHECK1-NEXT:    [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA10]]
 // CHECK1-NEXT:    [[ADD9:%.*]] = add i32 [[TMP19]], 1
 // CHECK1-NEXT:    [[CMP10:%.*]] = icmp ult i32 [[TMP18]], [[ADD9]]
-// CHECK1-NEXT:    br i1 [[CMP10]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_CLEANUP:%.*]]
-// CHECK1:       omp.dispatch.cleanup:
-// CHECK1-NEXT:    br label [[OMP_DISPATCH_END:%.*]]
-// CHECK1:       omp.dispatch.body:
-// CHECK1-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
-// CHECK1:       omp.inner.for.cond:
-// CHECK1-NEXT:    [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT:    br i1 [[CMP10]], label %[[OMP_DISPATCH_BODY:.*]], label %[[OMP_DISPATCH_CLEANUP:.*]]
+// CHECK1:       [[OMP_DISPATCH_CLEANUP]]:
+// CHECK1-NEXT:    br label %[[OMP_DISPATCH_END:.*]]
+// CHECK1:       [[OMP_DISPATCH_BODY]]:
+// CHECK1-NEXT:    br label %[[OMP_INNER_FOR_COND:.*]]
+// CHECK1:       [[OMP_INNER_FOR_COND]]:
+// CHECK1-NEXT:    [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA10]]
+// CHECK1-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA10]]
 // CHECK1-NEXT:    [[ADD11:%.*]] = add i32 [[TMP21]], 1
 // CHECK1-NEXT:    [[CMP12:%.*]] = icmp ult i32 [[TMP20]], [[ADD11]]
-// CHECK1-NEXT:    br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]]
-// CHECK1:       omp.inner.for.cond.cleanup:
-// CHECK1-NEXT:    br label [[OMP_INNER_FOR_END:%.*]]
-// CHECK1:       omp.inner.for.body:
-// CHECK1-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT:    [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT:    br i1 [[CMP12]], label %[[OMP_INNER_FOR_BODY:.*]], label %[[OMP_INNER_FOR_COND_CLEANUP:.*]]
+// CHECK1:       [[OMP_INNER_FOR_COND_CLEANUP]]:
+// CHECK1-NEXT:    br label %[[OMP_INNER_FOR_END:.*]]
+// CHECK1:       [[OMP_INNER_FOR_BODY]]:
+// CHECK1-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[INT_TBAA10]]
+// CHECK1-NEXT:    [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA10]]
 // CHECK1-NEXT:    [[MUL:%.*]] = mul i32 [[TMP23]], 1
 // CHECK1-NEXT:    [[ADD13:%.*]] = add i32 [[TMP22]], [[MUL]]
-// CHECK1-NEXT:    store i32 [[ADD13]], ptr [[I7]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT:    store i32 [[ADD13]], ptr [[I7]], align 4, !tbaa [[INT_TBAA10]]
 // CHECK1-NEXT:    call void @llvm.lifetime.start.p0(ptr [[REF_TMP14]]) #[[ATTR4]]
 // CHECK1-NEXT:    call void @llvm.lifetime.start.p0(ptr [[REF_TMP15]]) #[[ATTR4]]
-// CHECK1-NEXT:    [[TMP24:%.*]] = load i32, ptr [[I7]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT:    [[TMP24:%.*]] = load i32, ptr [[I7]], align 4, !tbaa [[INT_TBAA10]]
 // CHECK1-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP24]] to float
-// CHECK1-NEXT:    store float [[CONV]], ptr [[REF_TMP15]], align 4, !tbaa [[TBAA19]]
+// CHECK1-NEXT:    store float [[CONV]], ptr [[REF_TMP15]], align 4, !tbaa [[FLOAT_TBAA14]]
 // CHECK1-NEXT:    call void @llvm.lifetime.start.p0(ptr [[REF_TMP16]]) #[[ATTR4]]
-// CHECK1-NEXT:    [[TMP25:%.*]] = load i32, ptr [[I7]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT:    [[TMP25:%.*]] = load i32, ptr [[I7]], align 4, !tbaa [[INT_TBAA10]]
 // CHECK1-NEXT:    [[CONV17:%.*]] = sitofp i32 [[TMP25]] to float
-// CHECK1-NEXT:    store float [[CONV17]], ptr [[REF_TMP16]], align 4, !tbaa [[TBAA19]]
+// CHECK1-NEXT:    store float [[CONV17]], ptr [[REF_TMP16]], align 4, !tbaa [[FLOAT_TBAA14]]
 // CHECK1-NEXT:    call void @_ZNSt7complexIfEC1ERKfS2_(ptr nonnull align 4 dereferenceable(8) [[REF_TMP14]], ptr nonnull align 4 dereferenceable(4) [[REF_TMP15]], ptr nonnull align 4 dereferenceable(4) [[REF_TMP16]]) #[[ATTR11]]
 // CHECK1-NEXT:    [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(ptr nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP14]]) #[[ATTR11]]
 // CHECK1-NEXT:    call void @llvm.lifetime.end.p0(ptr [[REF_TMP16]]) #[[ATTR4]]
 // CHECK1-NEXT:    call void @llvm.lifetime.end.p0(ptr [[REF_TMP15]]) #[[ATTR4]]
 // CHECK1-NEXT:    call void @llvm.lifetime.end.p0(ptr [[REF_TMP14]]) #[[ATTR4]]
-// CHECK1-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
-// CHECK1:       omp.body.continue:
-// CHECK1-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
-// CHECK1:       omp.inner.for.inc:
-// CHECK1-NEXT:    [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT:    br label %[[OMP_BODY_CONTINUE:.*]]
+// CHECK1:       [[OMP_BODY_CONTINUE]]:
+// CHECK1-NEXT:    br label %[[OMP_INNER_FOR_INC:.*]]
+// CHECK1:       [[OMP_INNER_FOR_INC]]:
+// CHECK1-NEXT:    [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA10]]
 // CHECK1-NEXT:    [[ADD18:%.*]] = add i32 [[TMP26]], 1
-// CHECK1-NEXT:    store i32 [[ADD18]], ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT:    br label [[OMP_INNER_FOR_COND]]
-// CHECK1:       omp.inner.for.end:
-// CHECK1-NEXT:    br label [[OMP_DISPATCH_INC:%.*]]
-// CHECK1:       omp.dispatch.inc:
-// CHECK1-NEXT:    [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT:    [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT:    store i32 [[ADD18]], ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA10]]
+// CHECK1-NEXT:    br label %[[OMP_INNER_FOR_COND]]
+// CHECK1:       [[OMP_INNER_FOR_END]]:
+// CHECK1-NEXT:    br label %[[OMP_DISPATCH_INC:.*]]
+// CHECK1:       [[OMP_DISPATCH_INC]]:
+// CHECK1-NEXT:    [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !tbaa [[INT_TBAA10]]
+// CHECK1-NEXT:    [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !tbaa [[INT_TBAA10]]
 // CHECK1-NEXT:    [[ADD19:%.*]] = add i32 [[TMP27]], [[TMP28]]
-// CHECK1-NEXT:    store i32 [[ADD19]], ptr [[DOTOMP_LB]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT:    [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT:    [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT:    store i32 [[ADD19]], ptr [[DOTOMP_LB]], align 4, !tbaa [[INT_TBAA10]]
+// CHECK1-NEXT:    [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA10]]
+// CHECK1-NEXT:    [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !tbaa [[INT_TBAA10]]
 // CHECK1-NEXT:    [[ADD20:%.*]] = add i32 [[TMP29]], [[TMP30]]
-// CHECK1-NEXT:    store i32 [[ADD20]], ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT:    br label [[OMP_DISPATCH_COND]]
-// CHECK1:       omp.dispatch.end:
+// CHECK1-NEXT:    store i32 [[ADD20]], ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA10]]
+// CHECK1-NEXT:    br label %[[OMP_DISPATCH_COND]]
+// CHECK1:       [[OMP_DISPATCH_END]]:
 // CHECK1-NEXT:    [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT:    [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT:    [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4, !tbaa [[INT_TBAA10]]
 // CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP32]])
 // CHECK1-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
 // CHECK1-NEXT:    store ptr [[PARTIAL_SUM5]], ptr [[TMP33]], align 8
 // CHECK1-NEXT:    [[TMP34:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB1]], i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func, ptr @_omp_reduction_inter_warp_copy_func)
 // CHECK1-NEXT:    [[TMP35:%.*]] = icmp eq i32 [[TMP34]], 1
-// CHECK1-NEXT:    br i1 [[TMP35]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]]
-// CHECK1:       .omp.reduction.then:
+// CHECK1-NEXT:    br i1 [[TMP35]], [[DOTOMP_REDUCTION_THEN:label %.*]], [[DOTOMP_REDUCTION_DONE:label %.*]]
+// CHECK1:       [[_OMP_REDUCTION_THEN:.*:]]
 // CHECK1-NEXT:    [[CALL21:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(ptr nonnull align 4 dereferenceable(8) [[TMP2]], ptr nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]]) #[[ATTR11]]
-// CHECK1-NEXT:    br label [[DOTOMP_REDUCTION_DONE]]
-// CHECK1:       .omp.reduction.done:
+// CHECK1-NEXT:    br [[DOTOMP_REDUCTION_DONE]]
+// CHECK1:       [[_OMP_REDUCTION_DONE:.*:]]
 // CHECK1-NEXT:    call void @llvm.lifetime.end.p0(ptr [[I7]]) #[[ATTR4]]
 // CHECK1-NEXT:    call void @llvm.lifetime.end.p0(ptr [[PARTIAL_SUM5]]) #[[ATTR4]]
 // CHECK1-NEXT:    call void @llvm.lifetime.end.p0(ptr [[DOTOMP_IS_LAST]]) #[[ATTR4]]
 // CHECK1-NEXT:    call void @llvm.lifetime.end.p0(ptr [[DOTOMP_STRIDE]]) #[[ATTR4]]
 // CHECK1-NEXT:    call void @llvm.lifetime.end.p0(ptr [[DOTOMP_UB]]) #[[ATTR4]]
 // CHECK1-NEXT:    call void @llvm.lifetime.end.p0(ptr [[DOTOMP_LB]]) #[[ATTR4]]
-// CHECK1-NEXT:    br label [[OMP_PRECOND_END]]
-// CHECK1:       omp.precond.end:
+// CHECK1-NEXT:    br label %[[OMP_PRECOND_END]]
+// CHECK1:       [[OMP_PRECOND_END]]:
 // CHECK1-NEXT:    call void @llvm.lifetime.end.p0(ptr [[DOTCAPTURE_EXPR_2]]) #[[ATTR4]]
 // CHECK1-NEXT:    call void @llvm.lifetime.end.p0(ptr [[DOTCAPTURE_EXPR_1]]) #[[ATTR4]]
 // CHECK1-NEXT:    call void @llvm.lifetime.end.p0(ptr [[DOTCAPTURE_EXPR_]]) #[[ATTR4]]
@@ -353,32 +353,32 @@ void test() {
 // CHECK1-NEXT:    ret void
 //
 //
-// CHECK1-LABEL: define {{[^@]+}}@_ZNSt7complexIfEpLIfEERS0_RKS_IT_E
-// CHECK1-SAME: (ptr nonnull align 4 dereferenceable(8) [[THIS:%.*]], ptr nonnull align 4 dereferenceable(8) [[__C:%.*]]) #[[ATTR5]] comdat align 2 {
-// CHECK1-NEXT:  entry:
+// CHECK1-LABEL: define linkonce_odr hidden nonnull align 4 dereferenceable(8) ptr @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(
+// CHECK1-SAME: ptr nonnull align 4 dereferenceable(8) [[THIS:%.*]], ptr nonnull align 4 dereferenceable(8) [[__C:%.*]]) #[[ATTR5]] comdat align 2 {
+// CHECK1-NEXT:  [[ENTRY:.*:]]
 // CHECK1-NEXT:    [[THIS_ADDR:%.*]] = alloca ptr, align 8
 // CHECK1-NEXT:    [[__C_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8, !tbaa [[TBAA23]]
-// CHECK1-NEXT:    store ptr [[__C]], ptr [[__C_ADDR]], align 8, !tbaa [[TBAA23]]
+// CHECK1-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8, !tbaa [[_ZTSST7COMPLEXIFEPTR_TBAA18]]
+// CHECK1-NEXT:    store ptr [[__C]], ptr [[__C_ADDR]], align 8, !tbaa [[_ZTSST7COMPLEXIFEPTR_TBAA18]]
 // CHECK1-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
-// CHECK1-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[__C_ADDR]], align 8, !tbaa [[TBAA23]]
+// CHECK1-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[__C_ADDR]], align 8, !tbaa [[_ZTSST7COMPLEXIFEPTR_TBAA18]], !nonnull [[META22]], !align [[META23]]
 // CHECK1-NEXT:    [[CALL:%.*]] = call float @_ZNKSt7complexIfE4realEv(ptr nonnull align 4 dereferenceable(8) [[TMP0]]) #[[ATTR11]]
 // CHECK1-NEXT:    [[__RE_:%.*]] = getelementptr inbounds nuw %"class.std::complex", ptr [[THIS1]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP1:%.*]] = load float, ptr [[__RE_]], align 4, !tbaa [[TBAA27:![0-9]+]]
+// CHECK1-NEXT:    [[TMP1:%.*]] = load float, ptr [[__RE_]], align 4, !tbaa [[FLOAT_TBAA24:![0-9]+]]
 // CHECK1-NEXT:    [[ADD:%.*]] = fadd float [[TMP1]], [[CALL]]
-// CHECK1-NEXT:    store float [[ADD]], ptr [[__RE_]], align 4, !tbaa [[TBAA27]]
-// CHECK1-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[__C_ADDR]], align 8, !tbaa [[TBAA23]]
+// CHECK1-NEXT:    store float [[ADD]], ptr [[__RE_]], align 4, !tbaa [[FLOAT_TBAA24]]
+// CHECK1-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[__C_ADDR]], align 8, !tbaa [[_ZTSST7COMPLEXIFEPTR_TBAA18]], !nonnull [[META22]], !align [[META23]]
 // CHECK1-NEXT:    [[CALL2:%.*]] = call float @_ZNKSt7complexIfE4imagEv(ptr nonnull align 4 dereferenceable(8) [[TMP2]]) #[[ATTR11]]
 // CHECK1-NEXT:    [[__IM_:%.*]] = getelementptr inbounds nuw %"class.std::complex", ptr [[THIS1]], i32 0, i32 1
-// CHECK1-NEXT:    [[TMP3:%.*]] = load float, ptr [[__IM_]], align 4, !tbaa [[TBAA29:![0-9]+]]
+// CHECK1-NEXT:    [[TMP3:%.*]] = load float, ptr [[__IM_]], align 4, !tbaa [[FLOAT_TBAA26:![0-9]+]]
 // CHECK1-NEXT:    [[ADD3:%.*]] = fadd float [[TMP3]], [[CALL2]]
-// CHECK1-NEXT:    store float [[ADD3]], ptr [[__IM_]], align 4, !tbaa [[TBAA29]]
+// CHECK1-NEXT:    store float [[ADD3]], ptr [[__IM_]], align 4, !tbaa [[FLOAT_TBAA26]]
 // CHECK1-NEXT:    ret ptr [[THIS1]]
 //
 //
-// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func
-// CHECK1-SAME: (ptr noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR1]] {
-// CHECK1-NEXT:  entry:
+// CHECK1-LABEL: define internal void @_omp_reduction_shuffle_and_reduce_func(
+// CHECK1-SAME: ptr noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR1]] {
+// CHECK1-NEXT:  [[ENTRY:.*:]]
 // CHECK1-NEXT:    [[DOTADDR:%.*]] = alloca ptr, align 8
 // CHECK1-NEXT:    [[DOTADDR1:%.*]] = alloca i16, align 2
 // CHECK1-NEXT:    [[DOTADDR2:%.*]] = alloca i16, align 2
@@ -417,33 +417,33 @@ void test() {
 // CHECK1-NEXT:    [[TMP27:%.*]] = and i1 [[TMP25]], [[TMP26]]
 // CHECK1-NEXT:    [[TMP28:%.*]] = or i1 [[TMP18]], [[TMP21]]
 // CHECK1-NEXT:    [[TMP29:%.*]] = or i1 [[TMP28]], [[TMP27]]
-// CHECK1-NEXT:    br i1 [[TMP29]], label [[THEN:%.*]], label [[ELSE:%.*]]
-// CHECK1:       then:
+// CHECK1-NEXT:    br i1 [[TMP29]], label %[[THEN:.*]], label %[[ELSE:.*]]
+// CHECK1:       [[THEN]]:
 // CHECK1-NEXT:    call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIfEvv_l16_omp_outlined_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]]) #[[ATTR4]]
-// CHECK1-NEXT:    br label [[IFCONT:%.*]]
-// CHECK1:       else:
-// CHECK1-NEXT:    br label [[IFCONT]]
-// CHECK1:       ifcont:
+// CHECK1-NEXT:    br label %[[IFCONT:.*]]
+// CHECK1:       [[ELSE]]:
+// CHECK1-NEXT:    br label %[[IFCONT]]
+// CHECK1:       [[IFCONT]]:
 // CHECK1-NEXT:    [[TMP30:%.*]] = icmp eq i16 [[TMP7]], 1
 // CHECK1-NEXT:    [[TMP31:%.*]] = icmp uge i16 [[TMP5]], [[TMP6]]
 // CHECK1-NEXT:    [[TMP32:%.*]] = and i1 [[TMP30]], [[TMP31]]
-// CHECK1-NEXT:    br i1 [[TMP32]], label [[THEN4:%.*]], label [[ELSE5:%.*]]
-// CHECK1:       then4:
+// CHECK1-NEXT:    br i1 [[TMP32]], label %[[THEN4:.*]], label %[[ELSE5:.*]]
+// CHECK1:       [[THEN4]]:
 // CHECK1-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0
 // CHECK1-NEXT:    [[TMP34:%.*]] = load ptr, ptr [[TMP33]], align 8
 // CHECK1-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0
 // CHECK1-NEXT:    [[TMP36:%.*]] = load ptr, ptr [[TMP35]], align 8
 // CHECK1-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP36]], ptr align 8 [[TMP34]], i64 8, i1 false)
-// CHECK1-NEXT:    br label [[IFCONT6:%.*]]
-// CHECK1:       else5:
-// CHECK1-NEXT:    br label [[IFCONT6]]
-// CHECK1:       ifcont6:
+// CHECK1-NEXT:    br label %[[IFCONT6:.*]]
+// CHECK1:       [[ELSE5]]:
+// CHECK1-NEXT:    br label %[[IFCONT6]]
+// CHECK1:       [[IFCONT6]]:
 // CHECK1-NEXT:    ret void
 //
 //
-// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func
-// CHECK1-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR1]] {
-// CHECK1-NEXT:  entry:
+// CHECK1-LABEL: define internal void @_omp_reduction_inter_warp_copy_func(
+// CHECK1-SAME: ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR1]] {
+// CHECK1-NEXT:  [[ENTRY:.*:]]
 // CHECK1-NEXT:    [[DOTADDR:%.*]] = alloca ptr, align 8
 // CHECK1-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTCNT_ADDR:%.*]] = alloca i32, align 4
@@ -456,96 +456,96 @@ void test() {
 // CHECK1-NEXT:    [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP4]], 5
 // CHECK1-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[DOTADDR]], align 8
 // CHECK1-NEXT:    store i32 0, ptr [[DOTCNT_ADDR]], align 4
-// CHECK1-NEXT:    br label [[PRECOND:%.*]]
-// CHECK1:       precond:
+// CHECK1-NEXT:    br label %[[PRECOND:.*]]
+// CHECK1:       [[PRECOND]]:
 // CHECK1-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTCNT_ADDR]], align 4
 // CHECK1-NEXT:    [[TMP7:%.*]] = icmp ult i32 [[TMP6]], 2
-// CHECK1-NEXT:    br i1 [[TMP7]], label [[BODY:%.*]], label [[EXIT:%.*]]
-// CHECK1:       body:
+// CHECK1-NEXT:    br i1 [[TMP7]], label %[[BODY:.*]], label %[[EXIT:.*]]
+// CHECK1:       [[BODY]]:
 // CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
 // CHECK1-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB4:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 // CHECK1-NEXT:    [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0
-// CHECK1-NEXT:    br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]]
-// CHECK1:       then:
+// CHECK1-NEXT:    br i1 [[WARP_MASTER]], label %[[THEN:.*]], label %[[ELSE:.*]]
+// CHECK1:       [[THEN]]:
 // CHECK1-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP5]], i64 0, i64 0
 // CHECK1-NEXT:    [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8
 // CHECK1-NEXT:    [[TMP10:%.*]] = getelementptr i32, ptr [[TMP9]], i32 [[TMP6]]
 // CHECK1-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
 // CHECK1-NEXT:    [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4
 // CHECK1-NEXT:    store volatile i32 [[TMP12]], ptr addrspace(3) [[TMP11]], align 4
-// CHECK1-NEXT:    br label [[IFCONT:%.*]]
-// CHECK1:       else:
-// CHECK1-NEXT:    br label [[IFCONT]]
-// CHECK1:       ifcont:
+// CHECK1-NEXT:    br label %[[IFCONT:.*]]
+// CHECK1:       [[ELSE]]:
+// CHECK1-NEXT:    br label %[[IFCONT]]
+// CHECK1:       [[IFCONT]]:
 // CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
 // CHECK1-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
 // CHECK1-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTADDR1]], align 4
 // CHECK1-NEXT:    [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP2]], [[TMP13]]
-// CHECK1-NEXT:    br i1 [[IS_ACTIVE_THREAD]], label [[THEN3:%.*]], label [[ELSE4:%.*]]
-// CHECK1:       then3:
+// CHECK1-NEXT:    br i1 [[IS_ACTIVE_THREAD]], label %[[THEN3:.*]], label %[[ELSE4:.*]]
+// CHECK1:       [[THEN3]]:
 // CHECK1-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP2]]
 // CHECK1-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP5]], i64 0, i64 0
 // CHECK1-NEXT:    [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8
 // CHECK1-NEXT:    [[TMP17:%.*]] = getelementptr i32, ptr [[TMP16]], i32 [[TMP6]]
 // CHECK1-NEXT:    [[TMP18:%.*]] = load volatile i32, ptr addrspace(3) [[TMP14]], align 4
 // CHECK1-NEXT:    store i32 [[TMP18]], ptr [[TMP17]], align 4
-// CHECK1-NEXT:    br label [[IFCONT5:%.*]]
-// CHECK1:       else4:
-// CHECK1-NEXT:    br label [[IFCONT5]]
-// CHECK1:       ifcont5:
+// CHECK1-NEXT:    br label %[[IFCONT5:.*]]
+// CHECK1:       [[ELSE4]]:
+// CHECK1-NEXT:    br label %[[IFCONT5]]
+// CHECK1:       [[IFCONT5]]:
 // CHECK1-NEXT:    [[TMP19:%.*]] = add nsw i32 [[TMP6]], 1
 // CHECK1-NEXT:    store i32 [[TMP19]], ptr [[DOTCNT_ADDR]], align 4
-// CHECK1-NEXT:    br label [[PRECOND]]
-// CHECK1:       exit:
+// CHECK1-NEXT:    br label %[[PRECOND]]
+// CHECK1:       [[EXIT]]:
 // CHECK1-NEXT:    ret void
 //
 //
-// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIfEvv_l16_omp_outlined_omp_outlined_wrapper
-// CHECK1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR8:[0-9]+]] {
-// CHECK1-NEXT:  entry:
+// CHECK1-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIfEvv_l16_omp_outlined_omp_outlined_wrapper(
+// CHECK1-SAME: i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR8:[0-9]+]] {
+// CHECK1-NEXT:  [[ENTRY:.*:]]
 // CHECK1-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
 // CHECK1-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT:    store i16 [[TMP0]], ptr [[DOTADDR]], align 2, !tbaa [[TBAA30:![0-9]+]]
-// CHECK1-NEXT:    store i32 [[TMP1]], ptr [[DOTADDR1]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT:    store i16 [[TMP0]], ptr [[DOTADDR]], align 2, !tbaa [[SHORT_TBAA27:![0-9]+]]
+// CHECK1-NEXT:    store i32 [[TMP1]], ptr [[DOTADDR1]], align 4, !tbaa [[INT_TBAA10]]
 // CHECK1-NEXT:    store i32 0, ptr [[DOTZERO_ADDR]], align 4
 // CHECK1-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
 // CHECK1-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
 // CHECK1-NEXT:    [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 0
-// CHECK1-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8, !tbaa [[TBAA32:![0-9]+]]
+// CHECK1-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8, !tbaa [[ANYPTR_TBAA6]]
 // CHECK1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 1
-// CHECK1-NEXT:    [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8, !tbaa [[TBAA32]]
+// CHECK1-NEXT:    [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8, !tbaa [[ANYPTR_TBAA6]]
 // CHECK1-NEXT:    [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 2
-// CHECK1-NEXT:    [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8, !tbaa [[TBAA34:![0-9]+]]
+// CHECK1-NEXT:    [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8, !tbaa [[ANYPTR_TBAA6]]
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIfEvv_l16_omp_outlined_omp_outlined(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP4]], ptr [[TMP6]], ptr [[TMP8]]) #[[ATTR4]]
 // CHECK1-NEXT:    ret void
 //
 //
-// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIdEvv_l16
-// CHECK1-SAME: (ptr noalias [[DYN_PTR:%.*]]) #[[ATTR0]] {
-// CHECK1-NEXT:  entry:
+// CHECK1-LABEL: define weak_odr protected ptx_kernel void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIdEvv_l16(
+// CHECK1-SAME: ptr noalias [[DYN_PTR:%.*]]) #[[ATTR0]] {
+// CHECK1-NEXT:  [[ENTRY:.*:]]
 // CHECK1-NEXT:    [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
 // CHECK1-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK1-NEXT:    store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8, !tbaa [[TBAA10]]
+// CHECK1-NEXT:    store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8, !tbaa [[ANYPTR_TBAA6]]
 // CHECK1-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIdEvv_l16_kernel_environment, ptr [[DYN_PTR]])
 // CHECK1-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-// CHECK1-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
-// CHECK1:       user_code.entry:
+// CHECK1-NEXT:    br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[WORKER_EXIT:.*]]
+// CHECK1:       [[USER_CODE_ENTRY]]:
 // CHECK1-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
 // CHECK1-NEXT:    store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK1-NEXT:    store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT:    store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[INT_TBAA10]]
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIdEvv_l16_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
 // CHECK1-NEXT:    call void @__kmpc_target_deinit()
 // CHECK1-NEXT:    ret void
-// CHECK1:       worker.exit:
+// CHECK1:       [[WORKER_EXIT]]:
 // CHECK1-NEXT:    ret void
 //
 //
-// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIdEvv_l16_omp_outlined
-// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
-// CHECK1-NEXT:  entry:
+// CHECK1-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIdEvv_l16_omp_outlined(
+// CHECK1-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
+// CHECK1-NEXT:  [[ENTRY:.*:]]
 // CHECK1-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
 // CHECK1-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 // CHECK1-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
@@ -558,82 +558,82 @@ void test() {
 // CHECK1-NEXT:    [[REF_TMP:%.*]] = alloca double, align 8
 // CHECK1-NEXT:    [[REF_TMP2:%.*]] = alloca double, align 8
 // CHECK1-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8
-// CHECK1-NEXT:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA17]]
-// CHECK1-NEXT:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA17]]
+// CHECK1-NEXT:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[INTPTR_TBAA12]]
+// CHECK1-NEXT:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[INTPTR_TBAA12]]
 // CHECK1-NEXT:    [[ISTART:%.*]] = call align 16 ptr @__kmpc_alloc_shared(i64 4)
 // CHECK1-NEXT:    [[IEND:%.*]] = call align 16 ptr @__kmpc_alloc_shared(i64 4)
 // CHECK1-NEXT:    [[PARTIAL_SUM:%.*]] = call align 16 ptr @__kmpc_alloc_shared(i64 16)
 // CHECK1-NEXT:    call void @llvm.lifetime.start.p0(ptr [[DOTOMP_IV]]) #[[ATTR4]]
 // CHECK1-NEXT:    call void @llvm.lifetime.start.p0(ptr [[DOTOMP_LB]]) #[[ATTR4]]
-// CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_LB]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_LB]], align 4, !tbaa [[INT_TBAA10]]
 // CHECK1-NEXT:    call void @llvm.lifetime.start.p0(ptr [[DOTOMP_UB]]) #[[ATTR4]]
-// CHECK1-NEXT:    store i32 99, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT:    store i32 99, ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA10]]
 // CHECK1-NEXT:    call void @llvm.lifetime.start.p0(ptr [[DOTOMP_STRIDE]]) #[[ATTR4]]
-// CHECK1-NEXT:    store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT:    store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !tbaa [[INT_TBAA10]]
 // CHECK1-NEXT:    call void @llvm.lifetime.start.p0(ptr [[DOTOMP_IS_LAST]]) #[[ATTR4]]
-// CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !tbaa [[INT_TBAA10]]
 // CHECK1-NEXT:    call void @llvm.lifetime.start.p0(ptr [[IB]]) #[[ATTR4]]
 // CHECK1-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4, !tbaa [[INT_TBAA10]]
 // CHECK1-NEXT:    call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK1-NEXT:    [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT:    [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA10]]
 // CHECK1-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99
-// CHECK1-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
-// CHECK1:       cond.true:
-// CHECK1-NEXT:    br label [[COND_END:%.*]]
-// CHECK1:       cond.false:
-// CHECK1-NEXT:    [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT:    br label [[COND_END]]
-// CHECK1:       cond.end:
-// CHECK1-NEXT:    [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK1-NEXT:    store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT:    store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
-// CHECK1:       omp.inner.for.cond:
-// CHECK1-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT:    br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// CHECK1:       [[COND_TRUE]]:
+// CHECK1-NEXT:    br label %[[COND_END:.*]]
+// CHECK1:       [[COND_FALSE]]:
+// CHECK1-NEXT:    [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA10]]
+// CHECK1-NEXT:    br label %[[COND_END]]
+// CHECK1:       [[COND_END]]:
+// CHECK1-NEXT:    [[COND:%.*]] = phi i32 [ 99, %[[COND_TRUE]] ], [ [[TMP3]], %[[COND_FALSE]] ]
+// CHECK1-NEXT:    store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA10]]
+// CHECK1-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !tbaa [[INT_TBAA10]]
+// CHECK1-NEXT:    store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA10]]
+// CHECK1-NEXT:    br label %[[OMP_INNER_FOR_COND:.*]]
+// CHECK1:       [[OMP_INNER_FOR_COND]]:
+// CHECK1-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA10]]
+// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA10]]
 // CHECK1-NEXT:    [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
-// CHECK1-NEXT:    br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]]
-// CHECK1:       omp.inner.for.cond.cleanup:
-// CHECK1-NEXT:    br label [[OMP_INNER_FOR_END:%.*]]
-// CHECK1:       omp.inner.for.body:
-// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT:    br i1 [[CMP1]], label %[[OMP_INNER_FOR_BODY:.*]], label %[[OMP_INNER_FOR_COND_CLEANUP:.*]]
+// CHECK1:       [[OMP_INNER_FOR_COND_CLEANUP]]:
+// CHECK1-NEXT:    br label %[[OMP_INNER_FOR_END:.*]]
+// CHECK1:       [[OMP_INNER_FOR_BODY]]:
+// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA10]]
 // CHECK1-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
 // CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK1-NEXT:    store i32 [[ADD]], ptr [[IB]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT:    store i32 [[ADD]], ptr [[IB]], align 4, !tbaa [[INT_TBAA10]]
 // CHECK1-NEXT:    call void @llvm.lifetime.start.p0(ptr [[REF_TMP]]) #[[ATTR4]]
-// CHECK1-NEXT:    store double 0.000000e+00, ptr [[REF_TMP]], align 8, !tbaa [[TBAA36:![0-9]+]]
+// CHECK1-NEXT:    store double 0.000000e+00, ptr [[REF_TMP]], align 8, !tbaa [[DOUBLE_TBAA29:![0-9]+]]
 // CHECK1-NEXT:    call void @llvm.lifetime.start.p0(ptr [[REF_TMP2]]) #[[ATTR4]]
-// CHECK1-NEXT:    store double 0.000000e+00, ptr [[REF_TMP2]], align 8, !tbaa [[TBAA36]]
+// CHECK1-NEXT:    store double 0.000000e+00, ptr [[REF_TMP2]], align 8, !tbaa [[DOUBLE_TBAA29]]
 // CHECK1-NEXT:    call void @_ZNSt7complexIdEC1ERKdS2_(ptr nonnull align 8 dereferenceable(16) [[PARTIAL_SUM]], ptr nonnull align 8 dereferenceable(8) [[REF_TMP]], ptr nonnull align 8 dereferenceable(8) [[REF_TMP2]]) #[[ATTR11]]
 // CHECK1-NEXT:    call void @llvm.lifetime.end.p0(ptr [[REF_TMP2]]) #[[ATTR4]]
 // CHECK1-NEXT:    call void @llvm.lifetime.end.p0(ptr [[REF_TMP]]) #[[ATTR4]]
-// CHECK1-NEXT:    [[TMP8:%.*]] = load i32, ptr [[IB]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT:    [[TMP8:%.*]] = load i32, ptr [[IB]], align 4, !tbaa [[INT_TBAA10]]
 // CHECK1-NEXT:    [[MUL3:%.*]] = mul nsw i32 [[TMP8]], 4
-// CHECK1-NEXT:    store i32 [[MUL3]], ptr [[ISTART]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT:    [[TMP9:%.*]] = load i32, ptr [[IB]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT:    store i32 [[MUL3]], ptr [[ISTART]], align 4, !tbaa [[INT_TBAA10]]
+// CHECK1-NEXT:    [[TMP9:%.*]] = load i32, ptr [[IB]], align 4, !tbaa [[INT_TBAA10]]
 // CHECK1-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP9]], 1
 // CHECK1-NEXT:    [[MUL5:%.*]] = mul nsw i32 [[ADD4]], 4
-// CHECK1-NEXT:    store i32 [[MUL5]], ptr [[IEND]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT:    store i32 [[MUL5]], ptr [[IEND]], align 4, !tbaa [[INT_TBAA10]]
 // CHECK1-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
-// CHECK1-NEXT:    store ptr [[ISTART]], ptr [[TMP10]], align 8, !tbaa [[TBAA21]]
+// CHECK1-NEXT:    store ptr [[ISTART]], ptr [[TMP10]], align 8, !tbaa [[ANYPTR_TBAA16]]
 // CHECK1-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
-// CHECK1-NEXT:    store ptr [[IEND]], ptr [[TMP11]], align 8, !tbaa [[TBAA21]]
+// CHECK1-NEXT:    store ptr [[IEND]], ptr [[TMP11]], align 8, !tbaa [[ANYPTR_TBAA16]]
 // CHECK1-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
-// CHECK1-NEXT:    store ptr [[PARTIAL_SUM]], ptr [[TMP12]], align 8, !tbaa [[TBAA21]]
+// CHECK1-NEXT:    store ptr [[PARTIAL_SUM]], ptr [[TMP12]], align 8, !tbaa [[ANYPTR_TBAA16]]
 // CHECK1-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIdEvv_l16_omp_outlined_omp_outlined, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIdEvv_l16_omp_outlined_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 3)
-// CHECK1-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
-// CHECK1:       omp.body.continue:
-// CHECK1-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
-// CHECK1:       omp.inner.for.inc:
-// CHECK1-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT:    br label %[[OMP_BODY_CONTINUE:.*]]
+// CHECK1:       [[OMP_BODY_CONTINUE]]:
+// CHECK1-NEXT:    br label %[[OMP_INNER_FOR_INC:.*]]
+// CHECK1:       [[OMP_INNER_FOR_INC]]:
+// CHECK1-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA10]]
 // CHECK1-NEXT:    [[ADD6:%.*]] = add nsw i32 [[TMP13]], 1
-// CHECK1-NEXT:    store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT:    br label [[OMP_INNER_FOR_COND]]
-// CHECK1:       omp.inner.for.end:
-// CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
-// CHECK1:       omp.loop.exit:
+// CHECK1-NEXT:    store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA10]]
+// CHECK1-NEXT:    br label %[[OMP_INNER_FOR_COND]]
+// CHECK1:       [[OMP_INNER_FOR_END]]:
+// CHECK1-NEXT:    br label %[[OMP_LOOP_EXIT:.*]]
+// CHECK1:       [[OMP_LOOP_EXIT]]:
 // CHECK1-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]])
 // CHECK1-NEXT:    call void @llvm.lifetime.end.p0(ptr [[IB]]) #[[ATTR4]]
 // CHECK1-NEXT:    call void @llvm.lifetime.end.p0(ptr [[DOTOMP_IS_LAST]]) #[[ATTR4]]
@@ -647,15 +647,15 @@ void test() {
 // CHECK1-NEXT:    ret void
 //
 //
-// CHECK1-LABEL: define {{[^@]+}}@_ZNSt7complexIdEC1ERKdS2_
-// CHECK1-SAME: (ptr nonnull align 8 dereferenceable(16) [[THIS:%.*]], ptr nonnull align 8 dereferenceable(8) [[__RE:%.*]], ptr nonnull align 8 dereferenceable(8) [[__IM:%.*]]) unnamed_addr #[[ATTR5]] comdat align 2 {
-// CHECK1-NEXT:  entry:
+// CHECK1-LABEL: define linkonce_odr hidden void @_ZNSt7complexIdEC1ERKdS2_(
+// CHECK1-SAME: ptr nonnull align 8 dereferenceable(16) [[THIS:%.*]], ptr nonnull align 8 dereferenceable(8) [[__RE:%.*]], ptr nonnull align 8 dereferenceable(8) [[__IM:%.*]]) unnamed_addr #[[ATTR5]] comdat align 2 {
+// CHECK1-NEXT:  [[ENTRY:.*:]]
 // CHECK1-NEXT:    [[THIS_ADDR:%.*]] = alloca ptr, align 8
 // CHECK1-NEXT:    [[__RE_ADDR:%.*]] = alloca ptr, align 8
 // CHECK1-NEXT:    [[__IM_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8, !tbaa [[TBAA38:![0-9]+]]
-// CHECK1-NEXT:    store ptr [[__RE]], ptr [[__RE_ADDR]], align 8, !tbaa [[TBAA40:![0-9]+]]
-// CHECK1-NEXT:    store ptr [[__IM]], ptr [[__IM_ADDR]], align 8, !tbaa [[TBAA40]]
+// CHECK1-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8, !tbaa [[_ZTSST7COMPLEXIDEPTR_TBAA31:![0-9]+]]
+// CHECK1-NEXT:    store ptr [[__RE]], ptr [[__RE_ADDR]], align 8, !tbaa [[DOUBLEPTR_TBAA33:![0-9]+]]
+// CHECK1-NEXT:    store ptr [[__IM]], ptr [[__IM_ADDR]], align 8, !tbaa [[DOUBLEPTR_TBAA33]]
 // CHECK1-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[__RE_ADDR]], align 8
 // CHECK1-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[__IM_ADDR]], align 8
@@ -663,9 +663,9 @@ void test() {
 // CHECK1-NEXT:    ret void
 //
 //
-// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIdEvv_l16_omp_outlined_omp_outlined
-// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[ISTART:%.*]], ptr nonnull align 4 dereferenceable(4) [[IEND:%.*]], ptr nonnull align 8 dereferenceable(16) [[PARTIAL_SUM:%.*]]) #[[ATTR1]] {
-// CHECK1-NEXT:  entry:
+// CHECK1-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIdEvv_l16_omp_outlined_omp_outlined(
+// CHECK1-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[ISTART:%.*]], ptr nonnull align 4 dereferenceable(4) [[IEND:%.*]], ptr nonnull align 8 dereferenceable(16) [[PARTIAL_SUM:%.*]]) #[[ATTR1]] {
+// CHECK1-NEXT:  [[ENTRY:.*:]]
 // CHECK1-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
 // CHECK1-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 // CHECK1-NEXT:    [[ISTART_ADDR:%.*]] = alloca ptr, align 8
@@ -689,155 +689,155 @@ void test() {
 // CHECK1-NEXT:    [[REF_TMP15:%.*]] = alloca double, align 8
 // CHECK1-NEXT:    [[REF_TMP16:%.*]] = alloca double, align 8
 // CHECK1-NEXT:    [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8
-// CHECK1-NEXT:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA17]]
-// CHECK1-NEXT:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA17]]
-// CHECK1-NEXT:    store ptr [[ISTART]], ptr [[ISTART_ADDR]], align 8, !tbaa [[TBAA17]]
-// CHECK1-NEXT:    store ptr [[IEND]], ptr [[IEND_ADDR]], align 8, !tbaa [[TBAA17]]
-// CHECK1-NEXT:    store ptr [[PARTIAL_SUM]], ptr [[PARTIAL_SUM_ADDR]], align 8, !tbaa [[TBAA38]]
-// CHECK1-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ISTART_ADDR]], align 8, !tbaa [[TBAA17]]
-// CHECK1-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[IEND_ADDR]], align 8, !tbaa [[TBAA17]]
-// CHECK1-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[PARTIAL_SUM_ADDR]], align 8, !tbaa [[TBAA38]]
+// CHECK1-NEXT:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[INTPTR_TBAA12]]
+// CHECK1-NEXT:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[INTPTR_TBAA12]]
+// CHECK1-NEXT:    store ptr [[ISTART]], ptr [[ISTART_ADDR]], align 8, !tbaa [[INTPTR_TBAA12]]
+// CHECK1-NEXT:    store ptr [[IEND]], ptr [[IEND_ADDR]], align 8, !tbaa [[INTPTR_TBAA12]]
+// CHECK1-NEXT:    store ptr [[PARTIAL_SUM]], ptr [[PARTIAL_SUM_ADDR]], align 8, !tbaa [[_ZTSST7COMPLEXIDEPTR_TBAA31]]
+// CHECK1-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ISTART_ADDR]], align 8, !tbaa [[INTPTR_TBAA12]], !nonnull [[META22]], !align [[META23]]
+// CHECK1-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[IEND_ADDR]], align 8, !tbaa [[INTPTR_TBAA12]], !nonnull [[META22]], !align [[META23]]
+// CHECK1-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[PARTIAL_SUM_ADDR]], align 8, !tbaa [[_ZTSST7COMPLEXIDEPTR_TBAA31]], !nonnull [[META22]], !align [[META35:![0-9]+]]
 // CHECK1-NEXT:    call void @llvm.lifetime.start.p0(ptr [[DOTOMP_IV]]) #[[ATTR4]]
 // CHECK1-NEXT:    call void @llvm.lifetime.start.p0(ptr [[DOTCAPTURE_EXPR_]]) #[[ATTR4]]
-// CHECK1-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT:    store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4, !tbaa [[INT_TBAA10]]
+// CHECK1-NEXT:    store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[INT_TBAA10]]
 // CHECK1-NEXT:    call void @llvm.lifetime.start.p0(ptr [[DOTCAPTURE_EXPR_1]]) #[[ATTR4]]
-// CHECK1-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT:    store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[INT_TBAA10]]
+// CHECK1-NEXT:    store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[INT_TBAA10]]
 // CHECK1-NEXT:    call void @llvm.lifetime.start.p0(ptr [[DOTCAPTURE_EXPR_2]]) #[[ATTR4]]
-// CHECK1-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[INT_TBAA10]]
+// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[INT_TBAA10]]
 // CHECK1-NEXT:    [[SUB:%.*]] = sub i32 [[TMP5]], [[TMP6]]
 // CHECK1-NEXT:    [[SUB3:%.*]] = sub i32 [[SUB]], 1
 // CHECK1-NEXT:    [[ADD:%.*]] = add i32 [[SUB3]], 1
 // CHECK1-NEXT:    [[DIV:%.*]] = udiv i32 [[ADD]], 1
 // CHECK1-NEXT:    [[SUB4:%.*]] = sub i32 [[DIV]], 1
-// CHECK1-NEXT:    store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT:    store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[INT_TBAA10]]
 // CHECK1-NEXT:    call void @llvm.lifetime.start.p0(ptr [[I]]) #[[ATTR4]]
-// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT:    store i32 [[TMP7]], ptr [[I]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[INT_TBAA10]]
+// CHECK1-NEXT:    store i32 [[TMP7]], ptr [[I]], align 4, !tbaa [[INT_TBAA10]]
 // CHECK1-NEXT:    call void @llvm.lifetime.end.p0(ptr [[I]]) #[[ATTR4]]
-// CHECK1-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[INT_TBAA10]]
+// CHECK1-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[INT_TBAA10]]
 // CHECK1-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP8]], [[TMP9]]
-// CHECK1-NEXT:    br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
-// CHECK1:       omp.precond.then:
+// CHECK1-NEXT:    br i1 [[CMP]], label %[[OMP_PRECOND_THEN:.*]], label %[[OMP_PRECOND_END:.*]]
+// CHECK1:       [[OMP_PRECOND_THEN]]:
 // CHECK1-NEXT:    call void @llvm.lifetime.start.p0(ptr [[DOTOMP_LB]]) #[[ATTR4]]
-// CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_LB]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_LB]], align 4, !tbaa [[INT_TBAA10]]
 // CHECK1-NEXT:    call void @llvm.lifetime.start.p0(ptr [[DOTOMP_UB]]) #[[ATTR4]]
-// CHECK1-NEXT:    [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT:    store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT:    [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[INT_TBAA10]]
+// CHECK1-NEXT:    store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA10]]
 // CHECK1-NEXT:    call void @llvm.lifetime.start.p0(ptr [[DOTOMP_STRIDE]]) #[[ATTR4]]
-// CHECK1-NEXT:    store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT:    store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !tbaa [[INT_TBAA10]]
 // CHECK1-NEXT:    call void @llvm.lifetime.start.p0(ptr [[DOTOMP_IS_LAST]]) #[[ATTR4]]
-// CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !tbaa [[INT_TBAA10]]
 // CHECK1-NEXT:    call void @llvm.lifetime.start.p0(ptr [[PARTIAL_SUM5]]) #[[ATTR4]]
 // CHECK1-NEXT:    call void @llvm.lifetime.start.p0(ptr [[REF_TMP]]) #[[ATTR4]]
-// CHECK1-NEXT:    store double 0.000000e+00, ptr [[REF_TMP]], align 8, !tbaa [[TBAA36]]
+// CHECK1-NEXT:    store double 0.000000e+00, ptr [[REF_TMP]], align 8, !tbaa [[DOUBLE_TBAA29]]
 // CHECK1-NEXT:    call void @llvm.lifetime.start.p0(ptr [[REF_TMP6]]) #[[ATTR4]]
-// CHECK1-NEXT:    store double 0.000000e+00, ptr [[REF_TMP6]], align 8, !tbaa [[TBAA36]]
+// CHECK1-NEXT:    store double 0.000000e+00, ptr [[REF_TMP6]], align 8, !tbaa [[DOUBLE_TBAA29]]
 // CHECK1-NEXT:    call void @_ZNSt7complexIdEC1ERKdS2_(ptr nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]], ptr nonnull align 8 dereferenceable(8) [[REF_TMP]], ptr nonnull align 8 dereferenceable(8) [[REF_TMP6]]) #[[ATTR11]]
 // CHECK1-NEXT:    call void @llvm.lifetime.end.p0(ptr [[REF_TMP6]]) #[[ATTR4]]
 // CHECK1-NEXT:    call void @llvm.lifetime.end.p0(ptr [[REF_TMP]]) #[[ATTR4]]
 // CHECK1-NEXT:    call void @llvm.lifetime.start.p0(ptr [[I7]]) #[[ATTR4]]
 // CHECK1-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT:    [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT:    [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4, !tbaa [[INT_TBAA10]]
 // CHECK1-NEXT:    call void @__kmpc_for_static_init_4u(ptr @[[GLOB3]], i32 [[TMP12]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK1-NEXT:    br label [[OMP_DISPATCH_COND:%.*]]
-// CHECK1:       omp.dispatch.cond:
-// CHECK1-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT:    [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT:    br label %[[OMP_DISPATCH_COND:.*]]
+// CHECK1:       [[OMP_DISPATCH_COND]]:
+// CHECK1-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA10]]
+// CHECK1-NEXT:    [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[INT_TBAA10]]
 // CHECK1-NEXT:    [[CMP8:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]]
-// CHECK1-NEXT:    br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
-// CHECK1:       cond.true:
-// CHECK1-NEXT:    [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT:    br label [[COND_END:%.*]]
-// CHECK1:       cond.false:
-// CHECK1-NEXT:    [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT:    br label [[COND_END]]
-// CHECK1:       cond.end:
-// CHECK1-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ]
-// CHECK1-NEXT:    store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT:    [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT:    store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT:    [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT:    [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT:    br i1 [[CMP8]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// CHECK1:       [[COND_TRUE]]:
+// CHECK1-NEXT:    [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[INT_TBAA10]]
+// CHECK1-NEXT:    br label %[[COND_END:.*]]
+// CHECK1:       [[COND_FALSE]]:
+// CHECK1-NEXT:    [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA10]]
+// CHECK1-NEXT:    br label %[[COND_END]]
+// CHECK1:       [[COND_END]]:
+// CHECK1-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP15]], %[[COND_TRUE]] ], [ [[TMP16]], %[[COND_FALSE]] ]
+// CHECK1-NEXT:    store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA10]]
+// CHECK1-NEXT:    [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !tbaa [[INT_TBAA10]]
+// CHECK1-NEXT:    store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA10]]
+// CHECK1-NEXT:    [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA10]]
+// CHECK1-NEXT:    [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA10]]
 // CHECK1-NEXT:    [[ADD9:%.*]] = add i32 [[TMP19]], 1
 // CHECK1-NEXT:    [[CMP10:%.*]] = icmp ult i32 [[TMP18]], [[ADD9]]
-// CHECK1-NEXT:    br i1 [[CMP10]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_CLEANUP:%.*]]
-// CHECK1:       omp.dispatch.cleanup:
-// CHECK1-NEXT:    br label [[OMP_DISPATCH_END:%.*]]
-// CHECK1:       omp.dispatch.body:
-// CHECK1-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
-// CHECK1:       omp.inner.for.cond:
-// CHECK1-NEXT:    [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT:    br i1 [[CMP10]], label %[[OMP_DISPATCH_BODY:.*]], label %[[OMP_DISPATCH_CLEANUP:.*]]
+// CHECK1:       [[OMP_DISPATCH_CLEANUP]]:
+// CHECK1-NEXT:    br label %[[OMP_DISPATCH_END:.*]]
+// CHECK1:       [[OMP_DISPATCH_BODY]]:
+// CHECK1-NEXT:    br label %[[OMP_INNER_FOR_COND:.*]]
+// CHECK1:       [[OMP_INNER_FOR_COND]]:
+// CHECK1-NEXT:    [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA10]]
+// CHECK1-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA10]]
 // CHECK1-NEXT:    [[ADD11:%.*]] = add i32 [[TMP21]], 1
 // CHECK1-NEXT:    [[CMP12:%.*]] = icmp ult i32 [[TMP20]], [[ADD11]]
-// CHECK1-NEXT:    br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]]
-// CHECK1:       omp.inner.for.cond.cleanup:
-// CHECK1-NEXT:    br label [[OMP_INNER_FOR_END:%.*]]
-// CHECK1:       omp.inner.for.body:
-// CHECK1-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT:    [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT:    br i1 [[CMP12]], label %[[OMP_INNER_FOR_BODY:.*]], label %[[OMP_INNER_FOR_COND_CLEANUP:.*]]
+// CHECK1:       [[OMP_INNER_FOR_COND_CLEANUP]]:
+// CHECK1-NEXT:    br label %[[OMP_INNER_FOR_END:.*]]
+// CHECK1:       [[OMP_INNER_FOR_BODY]]:
+// CHECK1-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[INT_TBAA10]]
+// CHECK1-NEXT:    [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA10]]
 // CHECK1-NEXT:    [[MUL:%.*]] = mul i32 [[TMP23]], 1
 // CHECK1-NEXT:    [[ADD13:%.*]] = add i32 [[TMP22]], [[MUL]]
-// CHECK1-NEXT:    store i32 [[ADD13]], ptr [[I7]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT:    store i32 [[ADD13]], ptr [[I7]], align 4, !tbaa [[INT_TBAA10]]
 // CHECK1-NEXT:    call void @llvm.lifetime.start.p0(ptr [[REF_TMP14]]) #[[ATTR4]]
 // CHECK1-NEXT:    call void @llvm.lifetime.start.p0(ptr [[REF_TMP15]]) #[[ATTR4]]
-// CHECK1-NEXT:    [[TMP24:%.*]] = load i32, ptr [[I7]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT:    [[TMP24:%.*]] = load i32, ptr [[I7]], align 4, !tbaa [[INT_TBAA10]]
 // CHECK1-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP24]] to double
-// CHECK1-NEXT:    store double [[CONV]], ptr [[REF_TMP15]], align 8, !tbaa [[TBAA36]]
+// CHECK1-NEXT:    store double [[CONV]], ptr [[REF_TMP15]], align 8, !tbaa [[DOUBLE_TBAA29]]
 // CHECK1-NEXT:    call void @llvm.lifetime.start.p0(ptr [[REF_TMP16]]) #[[ATTR4]]
-// CHECK1-NEXT:    [[TMP25:%.*]] = load i32, ptr [[I7]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT:    [[TMP25:%.*]] = load i32, ptr [[I7]], align 4, !tbaa [[INT_TBAA10]]
 // CHECK1-NEXT:    [[CONV17:%.*]] = sitofp i32 [[TMP25]] to double
-// CHECK1-NEXT:    store double [[CONV17]], ptr [[REF_TMP16]], align 8, !tbaa [[TBAA36]]
+// CHECK1-NEXT:    store double [[CONV17]], ptr [[REF_TMP16]], align 8, !tbaa [[DOUBLE_TBAA29]]
 // CHECK1-NEXT:    call void @_ZNSt7complexIdEC1ERKdS2_(ptr nonnull align 8 dereferenceable(16) [[REF_TMP14]], ptr nonnull align 8 dereferenceable(8) [[REF_TMP15]], ptr nonnull align 8 dereferenceable(8) [[REF_TMP16]]) #[[ATTR11]]
 // CHECK1-NEXT:    [[CALL:%.*]] = call nonnull align 8 dereferenceable(16) ptr @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(ptr nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]], ptr nonnull align 8 dereferenceable(16) [[REF_TMP14]]) #[[ATTR11]]
 // CHECK1-NEXT:    call void @llvm.lifetime.end.p0(ptr [[REF_TMP16]]) #[[ATTR4]]
 // CHECK1-NEXT:    call void @llvm.lifetime.end.p0(ptr [[REF_TMP15]]) #[[ATTR4]]
 // CHECK1-NEXT:    call void @llvm.lifetime.end.p0(ptr [[REF_TMP14]]) #[[ATTR4]]
-// CHECK1-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
-// CHECK1:       omp.body.continue:
-// CHECK1-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
-// CHECK1:       omp.inner.for.inc:
-// CHECK1-NEXT:    [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT:    br label %[[OMP_BODY_CONTINUE:.*]]
+// CHECK1:       [[OMP_BODY_CONTINUE]]:
+// CHECK1-NEXT:    br label %[[OMP_INNER_FOR_INC:.*]]
+// CHECK1:       [[OMP_INNER_FOR_INC]]:
+// CHECK1-NEXT:    [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA10]]
 // CHECK1-NEXT:    [[ADD18:%.*]] = add i32 [[TMP26]], 1
-// CHECK1-NEXT:    store i32 [[ADD18]], ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT:    br label [[OMP_INNER_FOR_COND]]
-// CHECK1:       omp.inner.for.end:
-// CHECK1-NEXT:    br label [[OMP_DISPATCH_INC:%.*]]
-// CHECK1:       omp.dispatch.inc:
-// CHECK1-NEXT:    [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT:    [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT:    store i32 [[ADD18]], ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA10]]
+// CHECK1-NEXT:    br label %[[OMP_INNER_FOR_COND]]
+// CHECK1:       [[OMP_INNER_FOR_END]]:
+// CHECK1-NEXT:    br label %[[OMP_DISPATCH_INC:.*]]
+// CHECK1:       [[OMP_DISPATCH_INC]]:
+// CHECK1-NEXT:    [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !tbaa [[INT_TBAA10]]
+// CHECK1-NEXT:    [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !tbaa [[INT_TBAA10]]
 // CHECK1-NEXT:    [[ADD19:%.*]] = add i32 [[TMP27]], [[TMP28]]
-// CHECK1-NEXT:    store i32 [[ADD19]], ptr [[DOTOMP_LB]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT:    [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT:    [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT:    store i32 [[ADD19]], ptr [[DOTOMP_LB]], align 4, !tbaa [[INT_TBAA10]]
+// CHECK1-NEXT:    [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA10]]
+// CHECK1-NEXT:    [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !tbaa [[INT_TBAA10]]
 // CHECK1-NEXT:    [[ADD20:%.*]] = add i32 [[TMP29]], [[TMP30]]
-// CHECK1-NEXT:    store i32 [[ADD20]], ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT:    br label [[OMP_DISPATCH_COND]]
-// CHECK1:       omp.dispatch.end:
+// CHECK1-NEXT:    store i32 [[ADD20]], ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA10]]
+// CHECK1-NEXT:    br label %[[OMP_DISPATCH_COND]]
+// CHECK1:       [[OMP_DISPATCH_END]]:
 // CHECK1-NEXT:    [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT:    [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT:    [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4, !tbaa [[INT_TBAA10]]
 // CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP32]])
 // CHECK1-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
 // CHECK1-NEXT:    store ptr [[PARTIAL_SUM5]], ptr [[TMP33]], align 8
 // CHECK1-NEXT:    [[TMP34:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB1]], i64 16, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func1, ptr @_omp_reduction_inter_warp_copy_func2)
 // CHECK1-NEXT:    [[TMP35:%.*]] = icmp eq i32 [[TMP34]], 1
-// CHECK1-NEXT:    br i1 [[TMP35]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]]
-// CHECK1:       .omp.reduction.then:
+// CHECK1-NEXT:    br i1 [[TMP35]], [[DOTOMP_REDUCTION_THEN:label %.*]], [[DOTOMP_REDUCTION_DONE:label %.*]]
+// CHECK1:       [[_OMP_REDUCTION_THEN:.*:]]
 // CHECK1-NEXT:    [[CALL21:%.*]] = call nonnull align 8 dereferenceable(16) ptr @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(ptr nonnull align 8 dereferenceable(16) [[TMP2]], ptr nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]]) #[[ATTR11]]
-// CHECK1-NEXT:    br label [[DOTOMP_REDUCTION_DONE]]
-// CHECK1:       .omp.reduction.done:
+// CHECK1-NEXT:    br [[DOTOMP_REDUCTION_DONE]]
+// CHECK1:       [[_OMP_REDUCTION_DONE:.*:]]
 // CHECK1-NEXT:    call void @llvm.lifetime.end.p0(ptr [[I7]]) #[[ATTR4]]
 // CHECK1-NEXT:    call void @llvm.lifetime.end.p0(ptr [[PARTIAL_SUM5]]) #[[ATTR4]]
 // CHECK1-NEXT:    call void @llvm.lifetime.end.p0(ptr [[DOTOMP_IS_LAST]]) #[[ATTR4]]
 // CHECK1-NEXT:    call void @llvm.lifetime.end.p0(ptr [[DOTOMP_STRIDE]]) #[[ATTR4]]
 // CHECK1-NEXT:    call void @llvm.lifetime.end.p0(ptr [[DOTOMP_UB]]) #[[ATTR4]]
 // CHECK1-NEXT:    call void @llvm.lifetime.end.p0(ptr [[DOTOMP_LB]]) #[[ATTR4]]
-// CHECK1-NEXT:    br label [[OMP_PRECOND_END]]
-// CHECK1:       omp.precond.end:
+// CHECK1-NEXT:    br label %[[OMP_PRECOND_END]]
+// CHECK1:       [[OMP_PRECOND_END]]:
 // CHECK1-NEXT:    call void @llvm.lifetime.end.p0(ptr [[DOTCAPTURE_EXPR_2]]) #[[ATTR4]]
 // CHECK1-NEXT:    call void @llvm.lifetime.end.p0(ptr [[DOTCAPTURE_EXPR_1]]) #[[ATTR4]]
 // CHECK1-NEXT:    call void @llvm.lifetime.end.p0(ptr [[DOTCAPTURE_EXPR_]]) #[[ATTR4]]
@@ -845,32 +845,32 @@ void test() {
 // CHECK1-NEXT:    ret void
 //
 //
-// CHECK1-LABEL: define {{[^@]+}}@_ZNSt7complexIdEpLIdEERS0_RKS_IT_E
-// CHECK1-SAME: (ptr nonnull align 8 dereferenceable(16) [[THIS:%.*]], ptr nonnull align 8 dereferenceable(16) [[__C:%.*]]) #[[ATTR5]] comdat align 2 {
-// CHECK1-NEXT:  entry:
+// CHECK1-LABEL: define linkonce_odr hidden nonnull align 8 dereferenceable(16) ptr @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(
+// CHECK1-SAME: ptr nonnull align 8 dereferenceable(16) [[THIS:%.*]], ptr nonnull align 8 dereferenceable(16) [[__C:%.*]]) #[[ATTR5]] comdat align 2 {
+// CHECK1-NEXT:  [[ENTRY:.*:]]
 // CHECK1-NEXT:    [[THIS_ADDR:%.*]] = alloca ptr, align 8
 // CHECK1-NEXT:    [[__C_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8, !tbaa [[TBAA38]]
-// CHECK1-NEXT:    store ptr [[__C]], ptr [[__C_ADDR]], align 8, !tbaa [[TBAA38]]
+// CHECK1-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8, !tbaa [[_ZTSST7COMPLEXIDEPTR_TBAA31]]
+// CHECK1-NEXT:    store ptr [[__C]], ptr [[__C_ADDR]], align 8, !tbaa [[_ZTSST7COMPLEXIDEPTR_TBAA31]]
 // CHECK1-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
-// CHECK1-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[__C_ADDR]], align 8, !tbaa [[TBAA38]]
+// CHECK1-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[__C_ADDR]], align 8, !tbaa [[_ZTSST7COMPLEXIDEPTR_TBAA31]], !nonnull [[META22]], !align [[META35]]
 // CHECK1-NEXT:    [[CALL:%.*]] = call double @_ZNKSt7complexIdE4realEv(ptr nonnull align 8 dereferenceable(16) [[TMP0]]) #[[ATTR11]]
 // CHECK1-NEXT:    [[__RE_:%.*]] = getelementptr inbounds nuw %"class.std::complex.0", ptr [[THIS1]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP1:%.*]] = load double, ptr [[__RE_]], align 8, !tbaa [[TBAA42:![0-9]+]]
+// CHECK1-NEXT:    [[TMP1:%.*]] = load double, ptr [[__RE_]], align 8, !tbaa [[DOUBLE_TBAA36:![0-9]+]]
 // CHECK1-NEXT:    [[ADD:%.*]] = fadd double [[TMP1]], [[CALL]]
-// CHECK1-NEXT:    store double [[ADD]], ptr [[__RE_]], align 8, !tbaa [[TBAA42]]
-// CHECK1-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[__C_ADDR]], align 8, !tbaa [[TBAA38]]
+// CHECK1-NEXT:    store double [[ADD]], ptr [[__RE_]], align 8, !tbaa [[DOUBLE_TBAA36]]
+// CHECK1-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[__C_ADDR]], align 8, !tbaa [[_ZTSST7COMPLEXIDEPTR_TBAA31]], !nonnull [[META22]], !align [[META35]]
 // CHECK1-NEXT:    [[CALL2:%.*]] = call double @_ZNKSt7complexIdE4imagEv(ptr nonnull align 8 dereferenceable(16) [[TMP2]]) #[[ATTR11]]
 // CHECK1-NEXT:    [[__IM_:%.*]] = getelementptr inbounds nuw %"class.std::complex.0", ptr [[THIS1]], i32 0, i32 1
-// CHECK1-NEXT:    [[TMP3:%.*]] = load double, ptr [[__IM_]], align 8, !tbaa [[TBAA44:![0-9]+]]
+// CHECK1-NEXT:    [[TMP3:%.*]] = load double, ptr [[__IM_]], align 8, !tbaa [[DOUBLE_TBAA38:![0-9]+]]
 // CHECK1-NEXT:    [[ADD3:%.*]] = fadd double [[TMP3]], [[CALL2]]
-// CHECK1-NEXT:    store double [[ADD3]], ptr [[__IM_]], align 8, !tbaa [[TBAA44]]
+// CHECK1-NEXT:    store double [[ADD3]], ptr [[__IM_]], align 8, !tbaa [[DOUBLE_TBAA38]]
 // CHECK1-NEXT:    ret ptr [[THIS1]]
 //
 //
-// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func1
-// CHECK1-SAME: (ptr noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR1]] {
-// CHECK1-NEXT:  entry:
+// CHECK1-LABEL: define internal void @_omp_reduction_shuffle_and_reduce_func1(
+// CHECK1-SAME: ptr noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR1]] {
+// CHECK1-NEXT:  [[ENTRY:.*]]:
 // CHECK1-NEXT:    [[DOTADDR:%.*]] = alloca ptr, align 8
 // CHECK1-NEXT:    [[DOTADDR1:%.*]] = alloca i16, align 2
 // CHECK1-NEXT:    [[DOTADDR2:%.*]] = alloca i16, align 2
@@ -889,17 +889,17 @@ void test() {
 // CHECK1-NEXT:    [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8
 // CHECK1-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0
 // CHECK1-NEXT:    [[TMP11:%.*]] = getelementptr %"class.std::complex.0", ptr [[TMP9]], i64 1
-// CHECK1-NEXT:    br label [[DOTSHUFFLE_PRE_COND:%.*]]
-// CHECK1:       .shuffle.pre_cond:
-// CHECK1-NEXT:    [[TMP12:%.*]] = phi ptr [ [[TMP9]], [[ENTRY:%.*]] ], [ [[TMP23:%.*]], [[DOTSHUFFLE_THEN:%.*]] ]
-// CHECK1-NEXT:    [[TMP13:%.*]] = phi ptr [ [[DOTOMP_REDUCTION_ELEMENT]], [[ENTRY]] ], [ [[TMP24:%.*]], [[DOTSHUFFLE_THEN]] ]
+// CHECK1-NEXT:    br [[DOTSHUFFLE_PRE_COND:label %.*]]
+// CHECK1:       [[_SHUFFLE_PRE_COND:.*:]]
+// CHECK1-NEXT:    [[TMP12:%.*]] = phi ptr [ [[TMP9]], %[[ENTRY]] ], [ [[TMP23:%.*]], %[[DOTSHUFFLE_THEN:.*]] ]
+// CHECK1-NEXT:    [[TMP13:%.*]] = phi ptr [ [[DOTOMP_REDUCTION_ELEMENT]], %[[ENTRY]] ], [ [[TMP24:%.*]], %[[DOTSHUFFLE_THEN]] ]
 // CHECK1-NEXT:    [[TMP14:%.*]] = ptrtoint ptr [[TMP11]] to i64
 // CHECK1-NEXT:    [[TMP15:%.*]] = ptrtoint ptr [[TMP12]] to i64
 // CHECK1-NEXT:    [[TMP16:%.*]] = sub i64 [[TMP14]], [[TMP15]]
 // CHECK1-NEXT:    [[TMP17:%.*]] = sdiv exact i64 [[TMP16]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
 // CHECK1-NEXT:    [[TMP18:%.*]] = icmp sgt i64 [[TMP17]], 7
-// CHECK1-NEXT:    br i1 [[TMP18]], label [[DOTSHUFFLE_THEN]], label [[DOTSHUFFLE_EXIT:%.*]]
-// CHECK1:       .shuffle.then:
+// CHECK1-NEXT:    br i1 [[TMP18]], label %[[DOTSHUFFLE_THEN]], [[DOTSHUFFLE_EXIT:label %.*]]
+// CHECK1:       [[_SHUFFLE_THEN:.*:]]
 // CHECK1-NEXT:    [[TMP19:%.*]] = load i64, ptr [[TMP12]], align 8
 // CHECK1-NEXT:    [[TMP20:%.*]] = call i32 @__kmpc_get_warp_size()
 // CHECK1-NEXT:    [[TMP21:%.*]] = trunc i32 [[TMP20]] to i16
@@ -907,8 +907,8 @@ void test() {
 // CHECK1-NEXT:    store i64 [[TMP22]], ptr [[TMP13]], align 8
 // CHECK1-NEXT:    [[TMP23]] = getelementptr i64, ptr [[TMP12]], i64 1
 // CHECK1-NEXT:    [[TMP24]] = getelementptr i64, ptr [[TMP13]], i64 1
-// CHECK1-NEXT:    br label [[DOTSHUFFLE_PRE_COND]]
-// CHECK1:       .shuffle.exit:
+// CHECK1-NEXT:    br [[DOTSHUFFLE_PRE_COND]]
+// CHECK1:       [[_SHUFFLE_EXIT:.*:]]
 // CHECK1-NEXT:    store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP10]], align 8
 // CHECK1-NEXT:    [[TMP25:%.*]] = icmp eq i16 [[TMP7]], 0
 // CHECK1-NEXT:    [[TMP26:%.*]] = icmp eq i16 [[TMP7]], 1
@@ -922,33 +922,33 @@ void test() {
 // CHECK1-NEXT:    [[TMP34:%.*]] = and i1 [[TMP32]], [[TMP33]]
 // CHECK1-NEXT:    [[TMP35:%.*]] = or i1 [[TMP25]], [[TMP28]]
 // CHECK1-NEXT:    [[TMP36:%.*]] = or i1 [[TMP35]], [[TMP34]]
-// CHECK1-NEXT:    br i1 [[TMP36]], label [[THEN:%.*]], label [[ELSE:%.*]]
-// CHECK1:       then:
+// CHECK1-NEXT:    br i1 [[TMP36]], label %[[THEN:.*]], label %[[ELSE:.*]]
+// CHECK1:       [[THEN]]:
 // CHECK1-NEXT:    call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIdEvv_l16_omp_outlined_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]]) #[[ATTR4]]
-// CHECK1-NEXT:    br label [[IFCONT:%.*]]
-// CHECK1:       else:
-// CHECK1-NEXT:    br label [[IFCONT]]
-// CHECK1:       ifcont:
+// CHECK1-NEXT:    br label %[[IFCONT:.*]]
+// CHECK1:       [[ELSE]]:
+// CHECK1-NEXT:    br label %[[IFCONT]]
+// CHECK1:       [[IFCONT]]:
 // CHECK1-NEXT:    [[TMP37:%.*]] = icmp eq i16 [[TMP7]], 1
 // CHECK1-NEXT:    [[TMP38:%.*]] = icmp uge i16 [[TMP5]], [[TMP6]]
 // CHECK1-NEXT:    [[TMP39:%.*]] = and i1 [[TMP37]], [[TMP38]]
-// CHECK1-NEXT:    br i1 [[TMP39]], label [[THEN4:%.*]], label [[ELSE5:%.*]]
-// CHECK1:       then4:
+// CHECK1-NEXT:    br i1 [[TMP39]], label %[[THEN4:.*]], label %[[ELSE5:.*]]
+// CHECK1:       [[THEN4]]:
 // CHECK1-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0
 // CHECK1-NEXT:    [[TMP41:%.*]] = load ptr, ptr [[TMP40]], align 8
 // CHECK1-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0
 // CHECK1-NEXT:    [[TMP43:%.*]] = load ptr, ptr [[TMP42]], align 8
 // CHECK1-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP43]], ptr align 8 [[TMP41]], i64 16, i1 false)
-// CHECK1-NEXT:    br label [[IFCONT6:%.*]]
-// CHECK1:       else5:
-// CHECK1-NEXT:    br label [[IFCONT6]]
-// CHECK1:       ifcont6:
+// CHECK1-NEXT:    br label %[[IFCONT6:.*]]
+// CHECK1:       [[ELSE5]]:
+// CHECK1-NEXT:    br label %[[IFCONT6]]
+// CHECK1:       [[IFCONT6]]:
 // CHECK1-NEXT:    ret void
 //
 //
-// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func2
-// CHECK1-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR1]] {
-// CHECK1-NEXT:  entry:
+// CHECK1-LABEL: define internal void @_omp_reduction_inter_warp_copy_func2(
+// CHECK1-SAME: ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR1]] {
+// CHECK1-NEXT:  [[ENTRY:.*:]]
 // CHECK1-NEXT:    [[DOTADDR:%.*]] = alloca ptr, align 8
 // CHECK1-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTCNT_ADDR:%.*]] = alloca i32, align 4
@@ -961,154 +961,189 @@ void test() {
 // CHECK1-NEXT:    [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP4]], 5
 // CHECK1-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[DOTADDR]], align 8
 // CHECK1-NEXT:    store i32 0, ptr [[DOTCNT_ADDR]], align 4
-// CHECK1-NEXT:    br label [[PRECOND:%.*]]
-// CHECK1:       precond:
+// CHECK1-NEXT:    br label %[[PRECOND:.*]]
+// CHECK1:       [[PRECOND]]:
 // CHECK1-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTCNT_ADDR]], align 4
 // CHECK1-NEXT:    [[TMP7:%.*]] = icmp ult i32 [[TMP6]], 4
-// CHECK1-NEXT:    br i1 [[TMP7]], label [[BODY:%.*]], label [[EXIT:%.*]]
-// CHECK1:       body:
+// CHECK1-NEXT:    br i1 [[TMP7]], label %[[BODY:.*]], label %[[EXIT:.*]]
+// CHECK1:       [[BODY]]:
 // CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
 // CHECK1-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 // CHECK1-NEXT:    [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0
-// CHECK1-NEXT:    br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]]
-// CHECK1:       then:
+// CHECK1-NEXT:    br i1 [[WARP_MASTER]], label %[[THEN:.*]], label %[[ELSE:.*]]
+// CHECK1:       [[THEN]]:
 // CHECK1-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP5]], i64 0, i64 0
 // CHECK1-NEXT:    [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8
 // CHECK1-NEXT:    [[TMP10:%.*]] = getelementptr i32, ptr [[TMP9]], i32 [[TMP6]]
 // CHECK1-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
 // CHECK1-NEXT:    [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4
 // CHECK1-NEXT:    store volatile i32 [[TMP12]], ptr addrspace(3) [[TMP11]], align 4
-// CHECK1-NEXT:    br label [[IFCONT:%.*]]
-// CHECK1:       else:
-// CHECK1-NEXT:    br label [[IFCONT]]
-// CHECK1:       ifcont:
+// CHECK1-NEXT:    br label %[[IFCONT:.*]]
+// CHECK1:       [[ELSE]]:
+// CHECK1-NEXT:    br label %[[IFCONT]]
+// CHECK1:       [[IFCONT]]:
 // CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
 // CHECK1-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
 // CHECK1-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTADDR1]], align 4
 // CHECK1-NEXT:    [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP2]], [[TMP13]]
-// CHECK1-NEXT:    br i1 [[IS_ACTIVE_THREAD]], label [[THEN3:%.*]], label [[ELSE4:%.*]]
-// CHECK1:       then3:
+// CHECK1-NEXT:    br i1 [[IS_ACTIVE_THREAD]], label %[[THEN3:.*]], label %[[ELSE4:.*]]
+// CHECK1:       [[THEN3]]:
 // CHECK1-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP2]]
 // CHECK1-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP5]], i64 0, i64 0
 // CHECK1-NEXT:    [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8
 // CHECK1-NEXT:    [[TMP17:%.*]] = getelementptr i32, ptr [[TMP16]], i32 [[TMP6]]
 // CHECK1-NEXT:    [[TMP18:%.*]] = load volatile i32, ptr addrspace(3) [[TMP14]], align 4
 // CHECK1-NEXT:    store i32 [[TMP18]], ptr [[TMP17]], align 4
-// CHECK1-NEXT:    br label [[IFCONT5:%.*]]
-// CHECK1:       else4:
-// CHECK1-NEXT:    br label [[IFCONT5]]
-// CHECK1:       ifcont5:
+// CHECK1-NEXT:    br label %[[IFCONT5:.*]]
+// CHECK1:       [[ELSE4]]:
+// CHECK1-NEXT:    br label %[[IFCONT5]]
+// CHECK1:       [[IFCONT5]]:
 // CHECK1-NEXT:    [[TMP19:%.*]] = add nsw i32 [[TMP6]], 1
 // CHECK1-NEXT:    store i32 [[TMP19]], ptr [[DOTCNT_ADDR]], align 4
-// CHECK1-NEXT:    br label [[PRECOND]]
-// CHECK1:       exit:
+// CHECK1-NEXT:    br label %[[PRECOND]]
+// CHECK1:       [[EXIT]]:
 // CHECK1-NEXT:    ret void
 //
 //
-// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIdEvv_l16_omp_outlined_omp_outlined_wrapper
-// CHECK1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR8]] {
-// CHECK1-NEXT:  entry:
+// CHECK1-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIdEvv_l16_omp_outlined_omp_outlined_wrapper(
+// CHECK1-SAME: i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR8]] {
+// CHECK1-NEXT:  [[ENTRY:.*:]]
 // CHECK1-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
 // CHECK1-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT:    store i16 [[TMP0]], ptr [[DOTADDR]], align 2, !tbaa [[TBAA30]]
-// CHECK1-NEXT:    store i32 [[TMP1]], ptr [[DOTADDR1]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT:    store i16 [[TMP0]], ptr [[DOTADDR]], align 2, !tbaa [[SHORT_TBAA27]]
+// CHECK1-NEXT:    store i32 [[TMP1]], ptr [[DOTADDR1]], align 4, !tbaa [[INT_TBAA10]]
 // CHECK1-NEXT:    store i32 0, ptr [[DOTZERO_ADDR]], align 4
 // CHECK1-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
 // CHECK1-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
 // CHECK1-NEXT:    [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 0
-// CHECK1-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8, !tbaa [[TBAA32]]
+// CHECK1-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8, !tbaa [[ANYPTR_TBAA6]]
 // CHECK1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 1
-// CHECK1-NEXT:    [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8, !tbaa [[TBAA32]]
+// CHECK1-NEXT:    [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8, !tbaa [[ANYPTR_TBAA6]]
 // CHECK1-NEXT:    [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 2
-// CHECK1-NEXT:    [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8, !tbaa [[TBAA45:![0-9]+]]
+// CHECK1-NEXT:    [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8, !tbaa [[ANYPTR_TBAA6]]
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIdEvv_l16_omp_outlined_omp_outlined(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP4]], ptr [[TMP6]], ptr [[TMP8]]) #[[ATTR4]]
 // CHECK1-NEXT:    ret void
 //
 //
-// CHECK1-LABEL: define {{[^@]+}}@_ZNSt7complexIfEC2ERKfS2_
-// CHECK1-SAME: (ptr nonnull align 4 dereferenceable(8) [[THIS:%.*]], ptr nonnull align 4 dereferenceable(4) [[__RE:%.*]], ptr nonnull align 4 dereferenceable(4) [[__IM:%.*]]) unnamed_addr #[[ATTR5]] comdat align 2 {
-// CHECK1-NEXT:  entry:
+// CHECK1-LABEL: define linkonce_odr hidden void @_ZNSt7complexIfEC2ERKfS2_(
+// CHECK1-SAME: ptr nonnull align 4 dereferenceable(8) [[THIS:%.*]], ptr nonnull align 4 dereferenceable(4) [[__RE:%.*]], ptr nonnull align 4 dereferenceable(4) [[__IM:%.*]]) unnamed_addr #[[ATTR5]] comdat align 2 {
+// CHECK1-NEXT:  [[ENTRY:.*:]]
 // CHECK1-NEXT:    [[THIS_ADDR:%.*]] = alloca ptr, align 8
 // CHECK1-NEXT:    [[__RE_ADDR:%.*]] = alloca ptr, align 8
 // CHECK1-NEXT:    [[__IM_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8, !tbaa [[TBAA23]]
-// CHECK1-NEXT:    store ptr [[__RE]], ptr [[__RE_ADDR]], align 8, !tbaa [[TBAA25]]
-// CHECK1-NEXT:    store ptr [[__IM]], ptr [[__IM_ADDR]], align 8, !tbaa [[TBAA25]]
+// CHECK1-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8, !tbaa [[_ZTSST7COMPLEXIFEPTR_TBAA18]]
+// CHECK1-NEXT:    store ptr [[__RE]], ptr [[__RE_ADDR]], align 8, !tbaa [[FLOATPTR_TBAA20]]
+// CHECK1-NEXT:    store ptr [[__IM]], ptr [[__IM_ADDR]], align 8, !tbaa [[FLOATPTR_TBAA20]]
 // CHECK1-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
 // CHECK1-NEXT:    [[__RE_:%.*]] = getelementptr inbounds nuw %"class.std::complex", ptr [[THIS1]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[__RE_ADDR]], align 8, !tbaa [[TBAA25]]
-// CHECK1-NEXT:    [[TMP1:%.*]] = load float, ptr [[TMP0]], align 4, !tbaa [[TBAA19]]
-// CHECK1-NEXT:    store float [[TMP1]], ptr [[__RE_]], align 4, !tbaa [[TBAA27]]
+// CHECK1-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[__RE_ADDR]], align 8, !tbaa [[FLOATPTR_TBAA20]], !nonnull [[META22]], !align [[META23]]
+// CHECK1-NEXT:    [[TMP1:%.*]] = load float, ptr [[TMP0]], align 4, !tbaa [[FLOAT_TBAA14]]
+// CHECK1-NEXT:    store float [[TMP1]], ptr [[__RE_]], align 4, !tbaa [[FLOAT_TBAA24]]
 // CHECK1-NEXT:    [[__IM_:%.*]] = getelementptr inbounds nuw %"class.std::complex", ptr [[THIS1]], i32 0, i32 1
-// CHECK1-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[__IM_ADDR]], align 8, !tbaa [[TBAA25]]
-// CHECK1-NEXT:    [[TMP3:%.*]] = load float, ptr [[TMP2]], align 4, !tbaa [[TBAA19]]
-// CHECK1-NEXT:    store float [[TMP3]], ptr [[__IM_]], align 4, !tbaa [[TBAA29]]
+// CHECK1-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[__IM_ADDR]], align 8, !tbaa [[FLOATPTR_TBAA20]], !nonnull [[META22]], !align [[META23]]
+// CHECK1-NEXT:    [[TMP3:%.*]] = load float, ptr [[TMP2]], align 4, !tbaa [[FLOAT_TBAA14]]
+// CHECK1-NEXT:    store float [[TMP3]], ptr [[__IM_]], align 4, !tbaa [[FLOAT_TBAA26]]
 // CHECK1-NEXT:    ret void
 //
 //
-// CHECK1-LABEL: define {{[^@]+}}@_ZNKSt7complexIfE4realEv
-// CHECK1-SAME: (ptr nonnull align 4 dereferenceable(8) [[THIS:%.*]]) #[[ATTR5]] comdat align 2 {
-// CHECK1-NEXT:  entry:
+// CHECK1-LABEL: define linkonce_odr hidden float @_ZNKSt7complexIfE4realEv(
+// CHECK1-SAME: ptr nonnull align 4 dereferenceable(8) [[THIS:%.*]]) #[[ATTR5]] comdat align 2 {
+// CHECK1-NEXT:  [[ENTRY:.*:]]
 // CHECK1-NEXT:    [[THIS_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8, !tbaa [[TBAA23]]
+// CHECK1-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8, !tbaa [[_ZTSST7COMPLEXIFEPTR_TBAA18]]
 // CHECK1-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
 // CHECK1-NEXT:    [[__RE_:%.*]] = getelementptr inbounds nuw %"class.std::complex", ptr [[THIS1]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP0:%.*]] = load float, ptr [[__RE_]], align 4, !tbaa [[TBAA27]]
+// CHECK1-NEXT:    [[TMP0:%.*]] = load float, ptr [[__RE_]], align 4, !tbaa [[FLOAT_TBAA24]]
 // CHECK1-NEXT:    ret float [[TMP0]]
 //
 //
-// CHECK1-LABEL: define {{[^@]+}}@_ZNKSt7complexIfE4imagEv
-// CHECK1-SAME: (ptr nonnull align 4 dereferenceable(8) [[THIS:%.*]]) #[[ATTR5]] comdat align 2 {
-// CHECK1-NEXT:  entry:
+// CHECK1-LABEL: define linkonce_odr hidden float @_ZNKSt7complexIfE4imagEv(
+// CHECK1-SAME: ptr nonnull align 4 dereferenceable(8) [[THIS:%.*]]) #[[ATTR5]] comdat align 2 {
+// CHECK1-NEXT:  [[ENTRY:.*:]]
 // CHECK1-NEXT:    [[THIS_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8, !tbaa [[TBAA23]]
+// CHECK1-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8, !tbaa [[_ZTSST7COMPLEXIFEPTR_TBAA18]]
 // CHECK1-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
 // CHECK1-NEXT:    [[__IM_:%.*]] = getelementptr inbounds nuw %"class.std::complex", ptr [[THIS1]], i32 0, i32 1
-// CHECK1-NEXT:    [[TMP0:%.*]] = load float, ptr [[__IM_]], align 4, !tbaa [[TBAA29]]
+// CHECK1-NEXT:    [[TMP0:%.*]] = load float, ptr [[__IM_]], align 4, !tbaa [[FLOAT_TBAA26]]
 // CHECK1-NEXT:    ret float [[TMP0]]
 //
 //
-// CHECK1-LABEL: define {{[^@]+}}@_ZNSt7complexIdEC2ERKdS2_
-// CHECK1-SAME: (ptr nonnull align 8 dereferenceable(16) [[THIS:%.*]], ptr nonnull align 8 dereferenceable(8) [[__RE:%.*]], ptr nonnull align 8 dereferenceable(8) [[__IM:%.*]]) unnamed_addr #[[ATTR5]] comdat align 2 {
-// CHECK1-NEXT:  entry:
+// CHECK1-LABEL: define linkonce_odr hidden void @_ZNSt7complexIdEC2ERKdS2_(
+// CHECK1-SAME: ptr nonnull align 8 dereferenceable(16) [[THIS:%.*]], ptr nonnull align 8 dereferenceable(8) [[__RE:%.*]], ptr nonnull align 8 dereferenceable(8) [[__IM:%.*]]) unnamed_addr #[[ATTR5]] comdat align 2 {
+// CHECK1-NEXT:  [[ENTRY:.*:]]
 // CHECK1-NEXT:    [[THIS_ADDR:%.*]] = alloca ptr, align 8
 // CHECK1-NEXT:    [[__RE_ADDR:%.*]] = alloca ptr, align 8
 // CHECK1-NEXT:    [[__IM_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8, !tbaa [[TBAA38]]
-// CHECK1-NEXT:    store ptr [[__RE]], ptr [[__RE_ADDR]], align 8, !tbaa [[TBAA40]]
-// CHECK1-NEXT:    store ptr [[__IM]], ptr [[__IM_ADDR]], align 8, !tbaa [[TBAA40]]
+// CHECK1-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8, !tbaa [[_ZTSST7COMPLEXIDEPTR_TBAA31]]
+// CHECK1-NEXT:    store ptr [[__RE]], ptr [[__RE_ADDR]], align 8, !tbaa [[DOUBLEPTR_TBAA33]]
+// CHECK1-NEXT:    store ptr [[__IM]], ptr [[__IM_ADDR]], align 8, !tbaa [[DOUBLEPTR_TBAA33]]
 // CHECK1-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
 // CHECK1-NEXT:    [[__RE_:%.*]] = getelementptr inbounds nuw %"class.std::complex.0", ptr [[THIS1]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[__RE_ADDR]], align 8, !tbaa [[TBAA40]]
-// CHECK1-NEXT:    [[TMP1:%.*]] = load double, ptr [[TMP0]], align 8, !tbaa [[TBAA36]]
-// CHECK1-NEXT:    store double [[TMP1]], ptr [[__RE_]], align 8, !tbaa [[TBAA42]]
+// CHECK1-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[__RE_ADDR]], align 8, !tbaa [[DOUBLEPTR_TBAA33]], !nonnull [[META22]], !align [[META35]]
+// CHECK1-NEXT:    [[TMP1:%.*]] = load double, ptr [[TMP0]], align 8, !tbaa [[DOUBLE_TBAA29]]
+// CHECK1-NEXT:    store double [[TMP1]], ptr [[__RE_]], align 8, !tbaa [[DOUBLE_TBAA36]]
 // CHECK1-NEXT:    [[__IM_:%.*]] = getelementptr inbounds nuw %"class.std::complex.0", ptr [[THIS1]], i32 0, i32 1
-// CHECK1-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[__IM_ADDR]], align 8, !tbaa [[TBAA40]]
-// CHECK1-NEXT:    [[TMP3:%.*]] = load double, ptr [[TMP2]], align 8, !tbaa [[TBAA36]]
-// CHECK1-NEXT:    store double [[TMP3]], ptr [[__IM_]], align 8, !tbaa [[TBAA44]]
+// CHECK1-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[__IM_ADDR]], align 8, !tbaa [[DOUBLEPTR_TBAA33]], !nonnull [[META22]], !align [[META35]]
+// CHECK1-NEXT:    [[TMP3:%.*]] = load double, ptr [[TMP2]], align 8, !tbaa [[DOUBLE_TBAA29]]
+// CHECK1-NEXT:    store double [[TMP3]], ptr [[__IM_]], align 8, !tbaa [[DOUBLE_TBAA38]]
 // CHECK1-NEXT:    ret void
 //
 //
-// CHECK1-LABEL: define {{[^@]+}}@_ZNKSt7complexIdE4realEv
-// CHECK1-SAME: (ptr nonnull align 8 dereferenceable(16) [[THIS:%.*]]) #[[ATTR5]] comdat align 2 {
-// CHECK1-NEXT:  entry:
+// CHECK1-LABEL: define linkonce_odr hidden double @_ZNKSt7complexIdE4realEv(
+// CHECK1-SAME: ptr nonnull align 8 dereferenceable(16) [[THIS:%.*]]) #[[ATTR5]] comdat align 2 {
+// CHECK1-NEXT:  [[ENTRY:.*:]]
 // CHECK1-NEXT:    [[THIS_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8, !tbaa [[TBAA38]]
+// CHECK1-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8, !tbaa [[_ZTSST7COMPLEXIDEPTR_TBAA31]]
 // CHECK1-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
 // CHECK1-NEXT:    [[__RE_:%.*]] = getelementptr inbounds nuw %"class.std::complex.0", ptr [[THIS1]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP0:%.*]] = load double, ptr [[__RE_]], align 8, !tbaa [[TBAA42]]
+// CHECK1-NEXT:    [[TMP0:%.*]] = load double, ptr [[__RE_]], align 8, !tbaa [[DOUBLE_TBAA36]]
 // CHECK1-NEXT:    ret double [[TMP0]]
 //
 //
-// CHECK1-LABEL: define {{[^@]+}}@_ZNKSt7complexIdE4imagEv
-// CHECK1-SAME: (ptr nonnull align 8 dereferenceable(16) [[THIS:%.*]]) #[[ATTR5]] comdat align 2 {
-// CHECK1-NEXT:  entry:
+// CHECK1-LABEL: define linkonce_odr hidden double @_ZNKSt7complexIdE4imagEv(
+// CHECK1-SAME: ptr nonnull align 8 dereferenceable(16) [[THIS:%.*]]) #[[ATTR5]] comdat align 2 {
+// CHECK1-NEXT:  [[ENTRY:.*:]]
 // CHECK1-NEXT:    [[THIS_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8, !tbaa [[TBAA38]]
+// CHECK1-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8, !tbaa [[_ZTSST7COMPLEXIDEPTR_TBAA31]]
 // CHECK1-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
 // CHECK1-NEXT:    [[__IM_:%.*]] = getelementptr inbounds nuw %"class.std::complex.0", ptr [[THIS1]], i32 0, i32 1
-// CHECK1-NEXT:    [[TMP0:%.*]] = load double, ptr [[__IM_]], align 8, !tbaa [[TBAA44]]
+// CHECK1-NEXT:    [[TMP0:%.*]] = load double, ptr [[__IM_]], align 8, !tbaa [[DOUBLE_TBAA38]]
 // CHECK1-NEXT:    ret double [[TMP0]]
 //
+//.
+// CHECK1: [[ANYPTR_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0}
+// CHECK1: [[META7]] = !{!"any pointer", [[META8:![0-9]+]], i64 0}
+// CHECK1: [[META8]] = !{!"omnipotent char", [[META9:![0-9]+]], i64 0}
+// CHECK1: [[META9]] = !{!"Simple C++ TBAA"}
+// CHECK1: [[INT_TBAA10]] = !{[[META11:![0-9]+]], [[META11]], i64 0}
+// CHECK1: [[META11]] = !{!"int", [[META8]], i64 0}
+// CHECK1: [[INTPTR_TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0}
+// CHECK1: [[META13]] = !{!"p1 int", [[META7]], i64 0}
+// CHECK1: [[FLOAT_TBAA14]] = !{[[META15:![0-9]+]], [[META15]], i64 0}
+// CHECK1: [[META15]] = !{!"float", [[META8]], i64 0}
+// CHECK1: [[ANYPTR_TBAA16]] = !{[[META17:![0-9]+]], [[META17]], i64 0}
+// CHECK1: [[META17]] = !{!"any p2 pointer", [[META7]], i64 0}
+// CHECK1: [[_ZTSST7COMPLEXIFEPTR_TBAA18]] = !{[[META19:![0-9]+]], [[META19]], i64 0}
+// CHECK1: [[META19]] = !{!"p1 _ZTSSt7complexIfE", [[META7]], i64 0}
+// CHECK1: [[FLOATPTR_TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0}
+// CHECK1: [[META21]] = !{!"p1 float", [[META7]], i64 0}
+// CHECK1: [[META22]] = !{}
+// CHECK1: [[META23]] = !{i64 4}
+// CHECK1: [[FLOAT_TBAA24]] = !{[[META25:![0-9]+]], [[META15]], i64 0}
+// CHECK1: [[META25]] = !{!"_ZTSSt7complexIfE", [[META15]], i64 0, [[META15]], i64 4}
+// CHECK1: [[FLOAT_TBAA26]] = !{[[META25]], [[META15]], i64 4}
+// CHECK1: [[SHORT_TBAA27]] = !{[[META28:![0-9]+]], [[META28]], i64 0}
+// CHECK1: [[META28]] = !{!"short", [[META8]], i64 0}
+// CHECK1: [[DOUBLE_TBAA29]] = !{[[META30:![0-9]+]], [[META30]], i64 0}
+// CHECK1: [[META30]] = !{!"double", [[META8]], i64 0}
+// CHECK1: [[_ZTSST7COMPLEXIDEPTR_TBAA31]] = !{[[META32:![0-9]+]], [[META32]], i64 0}
+// CHECK1: [[META32]] = !{!"p1 _ZTSSt7complexIdE", [[META7]], i64 0}
+// CHECK1: [[DOUBLEPTR_TBAA33]] = !{[[META34:![0-9]+]], [[META34]], i64 0}
+// CHECK1: [[META34]] = !{!"p1 double", [[META7]], i64 0}
+// CHECK1: [[META35]] = !{i64 8}
+// CHECK1: [[DOUBLE_TBAA36]] = !{[[META37:![0-9]+]], [[META30]], i64 0}
+// CHECK1: [[META37]] = !{!"_ZTSSt7complexIdE", [[META30]], i64 0, [[META30]], i64 8}
+// CHECK1: [[DOUBLE_TBAA38]] = !{[[META37]], [[META30]], i64 8}
+//.
diff --git a/clang/test/OpenMP/parallel_if_codegen_PR51349.cpp b/clang/test/OpenMP/parallel_if_codegen_PR51349.cpp
index 1c6a562..268b390 100644
--- a/clang/test/OpenMP/parallel_if_codegen_PR51349.cpp
+++ b/clang/test/OpenMP/parallel_if_codegen_PR51349.cpp
@@ -1,4 +1,4 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --check-attributes --include-generated-funcs
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-attributes --include-generated-funcs --version 6
 // RUN: %clang_cc1 -x c++ -O1 -fopenmp-version=45 -disable-llvm-optzns -verify -fopenmp -triple x86_64-unknown-linux -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK
 // RUN: %clang_cc1 -x c++ -O1 -fopenmp-version=45 -disable-llvm-optzns -verify -fopenmp -triple x86_64-unknown-linux -emit-llvm -fno-inline %s -o - | FileCheck %s --check-prefix=CHECK-NOINLINE
 // expected-no-diagnostics
@@ -15,14 +15,14 @@ void foo() {
 
 #endif
 // CHECK: Function Attrs: mustprogress nounwind
-// CHECK-LABEL: define {{[^@]+}}@_Z3foov
-// CHECK-SAME: () #[[ATTR0:[0-9]+]] {
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define dso_local void @_Z3foov(
+// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
 // CHECK-NEXT:    [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
 // CHECK-NEXT:    call void @__kmpc_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]])
-// CHECK-NEXT:    store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA3:![0-9]+]]
+// CHECK-NEXT:    store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[INT_TBAA3:![0-9]+]]
 // CHECK-NEXT:    store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4
 // CHECK-NEXT:    call void @_Z3foov.omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2:[0-9]+]]
 // CHECK-NEXT:    call void @__kmpc_end_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]])
@@ -31,36 +31,36 @@ void foo() {
 //
 //
 // CHECK: Function Attrs: noinline norecurse nounwind
-// CHECK-LABEL: define {{[^@]+}}@_Z3foov.omp_outlined
-// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] {
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define internal void @_Z3foov.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
 // CHECK-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-NEXT:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA7:![0-9]+]]
-// CHECK-NEXT:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA7]]
+// CHECK-NEXT:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[INTPTR_TBAA7:![0-9]+]]
+// CHECK-NEXT:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[INTPTR_TBAA7]]
 // CHECK-NEXT:    ret void
 //
 //
 // CHECK: Function Attrs: alwaysinline norecurse nounwind
-// CHECK-LABEL: define {{[^@]+}}@_Z3foov.omp_outlined.1
-// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] {
-// CHECK-NEXT:  entry:
+// CHECK-LABEL: define internal void @_Z3foov.omp_outlined.1(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
 // CHECK-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-NEXT:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA7]]
-// CHECK-NEXT:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA7]]
+// CHECK-NEXT:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[INTPTR_TBAA7]]
+// CHECK-NEXT:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[INTPTR_TBAA7]]
 // CHECK-NEXT:    ret void
 //
 //
 // CHECK-NOINLINE: Function Attrs: mustprogress noinline nounwind
-// CHECK-NOINLINE-LABEL: define {{[^@]+}}@_Z3foov
-// CHECK-NOINLINE-SAME: () #[[ATTR0:[0-9]+]] {
-// CHECK-NOINLINE-NEXT:  entry:
+// CHECK-NOINLINE-LABEL: define dso_local void @_Z3foov(
+// CHECK-NOINLINE-SAME: ) #[[ATTR0:[0-9]+]] {
+// CHECK-NOINLINE-NEXT:  [[ENTRY:.*:]]
 // CHECK-NOINLINE-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
 // CHECK-NOINLINE-NEXT:    [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
 // CHECK-NOINLINE-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
 // CHECK-NOINLINE-NEXT:    call void @__kmpc_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]])
-// CHECK-NOINLINE-NEXT:    store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA3:![0-9]+]]
+// CHECK-NOINLINE-NEXT:    store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[INT_TBAA3:![0-9]+]]
 // CHECK-NOINLINE-NEXT:    store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4
 // CHECK-NOINLINE-NEXT:    call void @_Z3foov.omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2:[0-9]+]]
 // CHECK-NOINLINE-NEXT:    call void @__kmpc_end_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]])
@@ -69,23 +69,40 @@ void foo() {
 //
 //
 // CHECK-NOINLINE: Function Attrs: noinline norecurse nounwind
-// CHECK-NOINLINE-LABEL: define {{[^@]+}}@_Z3foov.omp_outlined
-// CHECK-NOINLINE-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] {
-// CHECK-NOINLINE-NEXT:  entry:
+// CHECK-NOINLINE-LABEL: define internal void @_Z3foov.omp_outlined(
+// CHECK-NOINLINE-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] {
+// CHECK-NOINLINE-NEXT:  [[ENTRY:.*:]]
 // CHECK-NOINLINE-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
 // CHECK-NOINLINE-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-NOINLINE-NEXT:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA7:![0-9]+]]
-// CHECK-NOINLINE-NEXT:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA7]]
+// CHECK-NOINLINE-NEXT:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[INTPTR_TBAA7:![0-9]+]]
+// CHECK-NOINLINE-NEXT:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[INTPTR_TBAA7]]
 // CHECK-NOINLINE-NEXT:    ret void
 //
 //
 // CHECK-NOINLINE: Function Attrs: alwaysinline norecurse nounwind
-// CHECK-NOINLINE-LABEL: define {{[^@]+}}@_Z3foov.omp_outlined.1
-// CHECK-NOINLINE-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] {
-// CHECK-NOINLINE-NEXT:  entry:
+// CHECK-NOINLINE-LABEL: define internal void @_Z3foov.omp_outlined.1(
+// CHECK-NOINLINE-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] {
+// CHECK-NOINLINE-NEXT:  [[ENTRY:.*:]]
 // CHECK-NOINLINE-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
 // CHECK-NOINLINE-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-NOINLINE-NEXT:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA7]]
-// CHECK-NOINLINE-NEXT:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA7]]
+// CHECK-NOINLINE-NEXT:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[INTPTR_TBAA7]]
+// CHECK-NOINLINE-NEXT:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[INTPTR_TBAA7]]
 // CHECK-NOINLINE-NEXT:    ret void
 //
+//.
+// CHECK: [[INT_TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0}
+// CHECK: [[META4]] = !{!"int", [[META5:![0-9]+]], i64 0}
+// CHECK: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0}
+// CHECK: [[META6]] = !{!"Simple C++ TBAA"}
+// CHECK: [[INTPTR_TBAA7]] = !{[[META8:![0-9]+]], [[META8]], i64 0}
+// CHECK: [[META8]] = !{!"p1 int", [[META9:![0-9]+]], i64 0}
+// CHECK: [[META9]] = !{!"any pointer", [[META5]], i64 0}
+//.
+// CHECK-NOINLINE: [[INT_TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0}
+// CHECK-NOINLINE: [[META4]] = !{!"int", [[META5:![0-9]+]], i64 0}
+// CHECK-NOINLINE: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0}
+// CHECK-NOINLINE: [[META6]] = !{!"Simple C++ TBAA"}
+// CHECK-NOINLINE: [[INTPTR_TBAA7]] = !{[[META8:![0-9]+]], [[META8]], i64 0}
+// CHECK-NOINLINE: [[META8]] = !{!"p1 int", [[META9:![0-9]+]], i64 0}
+// CHECK-NOINLINE: [[META9]] = !{!"any pointer", [[META5]], i64 0}
+//.
diff --git a/clang/test/OpenMP/taskloop_strictmodifier_codegen.cpp b/clang/test/OpenMP/taskloop_strictmodifier_codegen.cpp
index 82dd07a..cddd31d 100644
--- a/clang/test/OpenMP/taskloop_strictmodifier_codegen.cpp
+++ b/clang/test/OpenMP/taskloop_strictmodifier_codegen.cpp
@@ -1,4 +1,4 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --prefix-filecheck-ir-name _ --version 5
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --prefix-filecheck-ir-name _ --version 6
 // RUN: %clang_cc1 -fopenmp -O1 -x c++ -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK
 // expected-no-diagnostics
 #ifndef HEADER
@@ -34,31 +34,32 @@ struct S {
 
 #endif
 
+
 // CHECK-LABEL: define noundef i32 @main(
 // CHECK-SAME: i32 noundef [[ARGC:%.*]], ptr noundef [[ARGV:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK-NEXT:    [[ARGV_ADDR:%.*]] = alloca ptr, align 8
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1:[0-9]+]])
-// CHECK-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4, !tbaa [[TBAA3:![0-9]+]]
-// CHECK-NEXT:    store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8, !tbaa [[TBAA7:![0-9]+]]
+// CHECK-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4, !tbaa [[INT_TBAA3:![0-9]+]]
+// CHECK-NEXT:    store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8, !tbaa [[CHARPTR_TBAA7:![0-9]+]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call ptr @__kmpc_omp_task_alloc(ptr nonnull @[[GLOB1]], i32 [[TMP0]], i32 1, i64 48, i64 1, ptr nonnull @.omp_task_entry..2)
 // CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 40
-// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4, !tbaa [[TBAA3]]
-// CHECK-NEXT:    store i32 [[TMP3]], ptr [[TMP2]], align 8, !tbaa [[TBAA10:![0-9]+]]
+// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4, !tbaa [[INT_TBAA3]]
+// CHECK-NEXT:    store i32 [[TMP3]], ptr [[TMP2]], align 8, !tbaa [[INT_TBAA11:![0-9]+]]
 // CHECK-NEXT:    [[TMP4:%.*]] = tail call i32 @__kmpc_omp_task(ptr nonnull @[[GLOB1]], i32 [[TMP0]], ptr [[TMP1]])
 // CHECK-NEXT:    [[TMP5:%.*]] = tail call i32 @__kmpc_master(ptr nonnull @[[GLOB1]], i32 [[TMP0]])
 // CHECK-NEXT:    [[DOTNOT:%.*]] = icmp eq i32 [[TMP5]], 0
 // CHECK-NEXT:    br i1 [[DOTNOT]], label %[[OMP_IF_END:.*]], label %[[OMP_IF_THEN:.*]]
 // CHECK:       [[OMP_IF_THEN]]:
-// CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4, !tbaa [[INT_TBAA3]]
 // CHECK-NEXT:    [[TMP7:%.*]] = tail call ptr @__kmpc_omp_task_alloc(ptr nonnull @[[GLOB1]], i32 [[TMP0]], i32 1, i64 80, i64 1, ptr nonnull @.omp_task_entry..4)
 // CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP7]], i64 40
-// CHECK-NEXT:    store i64 0, ptr [[TMP8]], align 8, !tbaa [[TBAA15:![0-9]+]]
+// CHECK-NEXT:    store i64 0, ptr [[TMP8]], align 8, !tbaa [[LONG_TBAA15:![0-9]+]]
 // CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP7]], i64 48
-// CHECK-NEXT:    store i64 9, ptr [[TMP9]], align 8, !tbaa [[TBAA15]]
+// CHECK-NEXT:    store i64 9, ptr [[TMP9]], align 8, !tbaa [[LONG_TBAA15]]
 // CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP7]], i64 56
-// CHECK-NEXT:    store i64 1, ptr [[TMP10]], align 8, !tbaa [[TBAA15]]
+// CHECK-NEXT:    store i64 1, ptr [[TMP10]], align 8, !tbaa [[LONG_TBAA15]]
 // CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP7]], i64 72
 // CHECK-NEXT:    store i64 0, ptr [[TMP11]], align 8
 // CHECK-NEXT:    [[TMP12:%.*]] = zext i32 [[TMP6]] to i64
@@ -71,32 +72,32 @@ struct S {
 // CHECK-NEXT:    br i1 [[DOTNOT22]], label %[[OMP_IF_END17:.*]], label %[[OMP_IF_THEN2:.*]]
 // CHECK:       [[OMP_IF_THEN2]]:
 // CHECK-NEXT:    tail call void @__kmpc_taskgroup(ptr nonnull @[[GLOB1]], i32 [[TMP0]])
-// CHECK-NEXT:    [[TMP14:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4, !tbaa [[TBAA3]]
-// CHECK-NEXT:    [[TMP15:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP14:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4, !tbaa [[INT_TBAA3]]
+// CHECK-NEXT:    [[TMP15:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8, !tbaa [[CHARPTR_TBAA7]]
 // CHECK-NEXT:    [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64
 // CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP15]], i64 [[IDXPROM]]
-// CHECK-NEXT:    [[TMP16:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA17:![0-9]+]]
+// CHECK-NEXT:    [[TMP16:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8, !tbaa [[CHARPTR_TBAA17:![0-9]+]]
 // CHECK-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i64 [[IDXPROM]]
-// CHECK-NEXT:    [[TMP17:%.*]] = load i8, ptr [[ARRAYIDX9]], align 1, !tbaa [[TBAA19:![0-9]+]]
+// CHECK-NEXT:    [[TMP17:%.*]] = load i8, ptr [[ARRAYIDX9]], align 1, !tbaa [[CHAR_TBAA19:![0-9]+]]
 // CHECK-NEXT:    [[CONV:%.*]] = sext i8 [[TMP17]] to i32
 // CHECK-NEXT:    [[SUB12:%.*]] = sub i32 [[CONV]], [[TMP14]]
 // CHECK-NEXT:    [[CONV15:%.*]] = zext i32 [[SUB12]] to i64
 // CHECK-NEXT:    [[MUL:%.*]] = mul nsw i64 [[CONV15]], [[IDXPROM]]
 // CHECK-NEXT:    [[SUB16:%.*]] = add nsw i64 [[MUL]], -1
 // CHECK-NEXT:    [[TMP18:%.*]] = tail call ptr @__kmpc_omp_task_alloc(ptr nonnull @[[GLOB1]], i32 [[TMP0]], i32 1, i64 80, i64 16, ptr nonnull @.omp_task_entry..6)
-// CHECK-NEXT:    [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8, !tbaa [[TBAA20:![0-9]+]]
-// CHECK-NEXT:    store ptr [[ARGC_ADDR]], ptr [[TMP19]], align 8, !tbaa [[TBAA23:![0-9]+]]
+// CHECK-NEXT:    [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8, !tbaa [[ANYPTR_TBAA20:![0-9]+]]
+// CHECK-NEXT:    store ptr [[ARGC_ADDR]], ptr [[TMP19]], align 8, !tbaa [[INTPTR_TBAA23:![0-9]+]]
 // CHECK-NEXT:    [[AGG_CAPTURED3_SROA_2_0__SROA_IDX:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP19]], i64 8
-// CHECK-NEXT:    store ptr [[ARGV_ADDR]], ptr [[AGG_CAPTURED3_SROA_2_0__SROA_IDX]], align 8, !tbaa [[TBAA25:![0-9]+]]
-// CHECK-NEXT:    [[TMP20:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4, !tbaa [[TBAA3]]
+// CHECK-NEXT:    store ptr [[ARGV_ADDR]], ptr [[AGG_CAPTURED3_SROA_2_0__SROA_IDX]], align 8, !tbaa [[CHARPTR_TBAA25:![0-9]+]]
+// CHECK-NEXT:    [[TMP20:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4, !tbaa [[INT_TBAA3]]
 // CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP20]], 0
 // CHECK-NEXT:    [[TMP21:%.*]] = sext i1 [[TOBOOL]] to i32
 // CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP18]], i64 40
-// CHECK-NEXT:    store i64 0, ptr [[TMP22]], align 8, !tbaa [[TBAA15]]
+// CHECK-NEXT:    store i64 0, ptr [[TMP22]], align 8, !tbaa [[LONG_TBAA15]]
 // CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP18]], i64 48
-// CHECK-NEXT:    store i64 [[SUB16]], ptr [[TMP23]], align 8, !tbaa [[TBAA15]]
+// CHECK-NEXT:    store i64 [[SUB16]], ptr [[TMP23]], align 8, !tbaa [[LONG_TBAA15]]
 // CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP18]], i64 56
-// CHECK-NEXT:    store i64 1, ptr [[TMP24]], align 8, !tbaa [[TBAA15]]
+// CHECK-NEXT:    store i64 1, ptr [[TMP24]], align 8, !tbaa [[LONG_TBAA15]]
 // CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP18]], i64 72
 // CHECK-NEXT:    store i64 0, ptr [[TMP25]], align 8
 // CHECK-NEXT:    call void @__kmpc_taskloop_5(ptr nonnull @[[GLOB1]], i32 [[TMP0]], ptr nonnull [[TMP18]], i32 [[TMP21]], ptr nonnull [[TMP22]], ptr nonnull [[TMP23]], i64 1, i32 1, i32 2, i64 4, i32 1, ptr null) #[[ATTR1]]
@@ -111,11 +112,11 @@ struct S {
 // CHECK-NEXT:    call void @__kmpc_taskgroup(ptr nonnull @[[GLOB1]], i32 [[TMP0]])
 // CHECK-NEXT:    [[TMP27:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr nonnull @[[GLOB1]], i32 [[TMP0]], i32 1, i64 80, i64 1, ptr nonnull @.omp_task_entry..8)
 // CHECK-NEXT:    [[TMP28:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP27]], i64 40
-// CHECK-NEXT:    store i64 0, ptr [[TMP28]], align 8, !tbaa [[TBAA15]]
+// CHECK-NEXT:    store i64 0, ptr [[TMP28]], align 8, !tbaa [[LONG_TBAA15]]
 // CHECK-NEXT:    [[TMP29:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP27]], i64 48
-// CHECK-NEXT:    store i64 9, ptr [[TMP29]], align 8, !tbaa [[TBAA15]]
+// CHECK-NEXT:    store i64 9, ptr [[TMP29]], align 8, !tbaa [[LONG_TBAA15]]
 // CHECK-NEXT:    [[TMP30:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP27]], i64 56
-// CHECK-NEXT:    store i64 1, ptr [[TMP30]], align 8, !tbaa [[TBAA15]]
+// CHECK-NEXT:    store i64 1, ptr [[TMP30]], align 8, !tbaa [[LONG_TBAA15]]
 // CHECK-NEXT:    [[TMP31:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP27]], i64 72
 // CHECK-NEXT:    store i64 0, ptr [[TMP31]], align 8
 // CHECK-NEXT:    call void @__kmpc_taskloop(ptr nonnull @[[GLOB1]], i32 [[TMP0]], ptr [[TMP27]], i32 1, ptr nonnull [[TMP28]], ptr nonnull [[TMP29]], i64 1, i32 1, i32 0, i64 0, ptr null)
@@ -126,33 +127,160 @@ struct S {
 // CHECK-NEXT:    ret i32 0
 //
 //
+// CHECK-LABEL: define internal noundef i32 @.omp_task_entry.(
+// CHECK-SAME: i32 [[TMP0:%.*]], ptr noalias noundef readonly captures(none) [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*]]:
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 40
+// CHECK-NEXT:    [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8, !tbaa [[LONG_TBAA28:![0-9]+]]
+// CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 48
+// CHECK-NEXT:    [[TMP5:%.*]] = load i64, ptr [[TMP4]], align 8, !tbaa [[LONG_TBAA29:![0-9]+]]
+// CHECK-NEXT:    [[SEXT:%.*]] = shl i64 [[TMP3]], 32
+// CHECK-NEXT:    [[TMP6:%.*]] = ashr exact i64 [[SEXT]], 32
+// CHECK-NEXT:    br label %[[OMP_INNER_FOR_COND_I:.*]]
+// CHECK:       [[OMP_INNER_FOR_COND_I]]:
+// CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[OMP_INNER_FOR_COND_I]] ], [ [[TMP6]], %[[ENTRY]] ]
+// CHECK-NEXT:    [[CMP_NOT_I:%.*]] = icmp ult i64 [[TMP5]], [[INDVARS_IV]]
+// CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1
+// CHECK-NEXT:    br i1 [[CMP_NOT_I]], [[DOTOMP_OUTLINED__1_EXIT:label %.*]], label %[[OMP_INNER_FOR_COND_I]]
+// CHECK:       [[_OMP_OUTLINED__1_EXIT:.*:]]
+// CHECK-NEXT:    ret i32 0
+//
+//
+// CHECK-LABEL: define internal noundef i32 @.omp_task_entry..2(
+// CHECK-SAME: i32 noundef [[TMP0:%.*]], ptr noalias noundef readonly captures(none) [[TMP1:%.*]]) #[[ATTR4:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call i32 @__kmpc_master(ptr nonnull @[[GLOB1]], i32 [[TMP0]])
+// CHECK-NEXT:    [[DOTNOT_I:%.*]] = icmp eq i32 [[TMP2]], 0
+// CHECK-NEXT:    br i1 [[DOTNOT_I]], [[DOTOMP_OUTLINED__EXIT:label %.*]], label %[[OMP_IF_THEN_I:.*]]
+// CHECK:       [[OMP_IF_THEN_I]]:
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 40
+// CHECK-NEXT:    tail call void @__kmpc_taskgroup(ptr nonnull @[[GLOB1]], i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[INT_TBAA3]]
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call ptr @__kmpc_omp_task_alloc(ptr nonnull @[[GLOB1]], i32 [[TMP0]], i32 33, i64 80, i64 1, ptr nonnull @.omp_task_entry.)
+// CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP5]], i64 32
+// CHECK-NEXT:    store i32 [[TMP4]], ptr [[TMP6]], align 8, !tbaa [[CHAR_TBAA19]]
+// CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP5]], i64 40
+// CHECK-NEXT:    store i64 0, ptr [[TMP7]], align 8, !tbaa [[LONG_TBAA15]]
+// CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP5]], i64 48
+// CHECK-NEXT:    store i64 9, ptr [[TMP8]], align 8, !tbaa [[LONG_TBAA15]]
+// CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP5]], i64 56
+// CHECK-NEXT:    store i64 1, ptr [[TMP9]], align 8, !tbaa [[LONG_TBAA15]]
+// CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP5]], i64 72
+// CHECK-NEXT:    store i64 0, ptr [[TMP10]], align 8
+// CHECK-NEXT:    tail call void @__kmpc_taskloop(ptr nonnull @[[GLOB1]], i32 [[TMP0]], ptr [[TMP5]], i32 1, ptr nonnull [[TMP7]], ptr nonnull [[TMP8]], i64 1, i32 1, i32 0, i64 0, ptr null)
+// CHECK-NEXT:    tail call void @__kmpc_end_taskgroup(ptr nonnull @[[GLOB1]], i32 [[TMP0]])
+// CHECK-NEXT:    tail call void @__kmpc_end_master(ptr nonnull @[[GLOB1]], i32 [[TMP0]])
+// CHECK-NEXT:    br [[DOTOMP_OUTLINED__EXIT]]
+// CHECK:       [[_OMP_OUTLINED__EXIT:.*:]]
+// CHECK-NEXT:    ret i32 0
+//
+//
+// CHECK-LABEL: define internal noundef i32 @.omp_task_entry..4(
+// CHECK-SAME: i32 [[TMP0:%.*]], ptr noalias noundef readonly captures(none) [[TMP1:%.*]]) #[[ATTR3]] {
+// CHECK-NEXT:  [[ENTRY:.*]]:
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 40
+// CHECK-NEXT:    [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8, !tbaa [[LONG_TBAA28]]
+// CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 48
+// CHECK-NEXT:    [[TMP5:%.*]] = load i64, ptr [[TMP4]], align 8, !tbaa [[LONG_TBAA29]]
+// CHECK-NEXT:    [[SEXT:%.*]] = shl i64 [[TMP3]], 32
+// CHECK-NEXT:    [[TMP6:%.*]] = ashr exact i64 [[SEXT]], 32
+// CHECK-NEXT:    br label %[[OMP_INNER_FOR_COND_I:.*]]
+// CHECK:       [[OMP_INNER_FOR_COND_I]]:
+// CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[OMP_INNER_FOR_COND_I]] ], [ [[TMP6]], %[[ENTRY]] ]
+// CHECK-NEXT:    [[CMP_NOT_I:%.*]] = icmp ult i64 [[TMP5]], [[INDVARS_IV]]
+// CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1
+// CHECK-NEXT:    br i1 [[CMP_NOT_I]], [[DOTOMP_OUTLINED__3_EXIT:label %.*]], label %[[OMP_INNER_FOR_COND_I]]
+// CHECK:       [[_OMP_OUTLINED__3_EXIT:.*:]]
+// CHECK-NEXT:    ret i32 0
+//
+//
+// CHECK-LABEL: define internal noundef i32 @.omp_task_entry..6(
+// CHECK-SAME: i32 [[TMP0:%.*]], ptr noalias noundef readonly captures(none) [[TMP1:%.*]]) #[[ATTR5:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !tbaa [[ANYPTR_TBAA20]]
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 40
+// CHECK-NEXT:    [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8, !tbaa [[LONG_TBAA28]]
+// CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 48
+// CHECK-NEXT:    [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8, !tbaa [[LONG_TBAA29]]
+// CHECK-NEXT:    tail call void @llvm.experimental.noalias.scope.decl(metadata [[META30:![0-9]+]])
+// CHECK-NEXT:    [[TMP7:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[INTPTR_TBAA33:![0-9]+]], !alias.scope [[META30]], !nonnull [[META35:![0-9]+]], !align [[META36:![0-9]+]]
+// CHECK-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[INT_TBAA3]], !noalias [[META30]]
+// CHECK-NEXT:    [[CMP_I:%.*]] = icmp sgt i32 [[TMP8]], 0
+// CHECK-NEXT:    br i1 [[CMP_I]], label %[[LAND_LHS_TRUE_I:.*]], [[DOTOMP_OUTLINED__5_EXIT:label %.*]]
+// CHECK:       [[LAND_LHS_TRUE_I]]:
+// CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i64 8
+// CHECK-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8, !tbaa [[CHARPTR_TBAA37:![0-9]+]], !alias.scope [[META30]], !nonnull [[META35]], !align [[META38:![0-9]+]]
+// CHECK-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8, !tbaa [[CHARPTR_TBAA7]], !noalias [[META30]]
+// CHECK-NEXT:    [[IDXPROM_I:%.*]] = zext nneg i32 [[TMP8]] to i64
+// CHECK-NEXT:    [[ARRAYIDX_I:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP11]], i64 [[IDXPROM_I]]
+// CHECK-NEXT:    [[TMP12:%.*]] = load ptr, ptr [[ARRAYIDX_I]], align 8, !tbaa [[CHARPTR_TBAA17]], !noalias [[META30]]
+// CHECK-NEXT:    [[ARRAYIDX5_I:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP12]], i64 [[IDXPROM_I]]
+// CHECK-NEXT:    [[TMP13:%.*]] = load i8, ptr [[ARRAYIDX5_I]], align 1, !tbaa [[CHAR_TBAA19]], !noalias [[META30]]
+// CHECK-NEXT:    [[CONV_I:%.*]] = sext i8 [[TMP13]] to i32
+// CHECK-NEXT:    [[CMP13_I:%.*]] = icmp slt i32 [[TMP8]], [[CONV_I]]
+// CHECK-NEXT:    br i1 [[CMP13_I]], label %[[OMP_INNER_FOR_COND_I:.*]], [[DOTOMP_OUTLINED__5_EXIT]]
+// CHECK:       [[OMP_INNER_FOR_COND_I]]:
+// CHECK-NEXT:    [[DOTOMP_IV_0_I:%.*]] = phi i64 [ [[ADD46_I:%.*]], %[[OMP_INNER_FOR_COND_I]] ], [ [[TMP4]], %[[LAND_LHS_TRUE_I]] ]
+// CHECK-NEXT:    [[CMP16_NOT_I:%.*]] = icmp ugt i64 [[DOTOMP_IV_0_I]], [[TMP6]]
+// CHECK-NEXT:    [[ADD46_I]] = add nsw i64 [[DOTOMP_IV_0_I]], 1
+// CHECK-NEXT:    br i1 [[CMP16_NOT_I]], [[DOTOMP_OUTLINED__5_EXIT]], label %[[OMP_INNER_FOR_COND_I]]
+// CHECK:       [[_OMP_OUTLINED__5_EXIT:.*:]]
+// CHECK-NEXT:    ret i32 0
+//
+//
+// CHECK-LABEL: define internal noundef i32 @.omp_task_entry..8(
+// CHECK-SAME: i32 noundef [[TMP0:%.*]], ptr noalias noundef readonly captures(none) [[TMP1:%.*]]) #[[ATTR4]] {
+// CHECK-NEXT:  [[ENTRY:.*]]:
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 40
+// CHECK-NEXT:    [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8, !tbaa [[LONG_TBAA28]]
+// CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 48
+// CHECK-NEXT:    [[TMP5:%.*]] = load i64, ptr [[TMP4]], align 8, !tbaa [[LONG_TBAA29]]
+// CHECK-NEXT:    [[SEXT:%.*]] = shl i64 [[TMP3]], 32
+// CHECK-NEXT:    [[CONV1_I2:%.*]] = ashr exact i64 [[SEXT]], 32
+// CHECK-NEXT:    [[CMP_NOT_I3:%.*]] = icmp ult i64 [[TMP5]], [[CONV1_I2]]
+// CHECK-NEXT:    br i1 [[CMP_NOT_I3]], [[DOTOMP_OUTLINED__7_EXIT:label %.*]], label %[[OMP_INNER_FOR_BODY_I:.*]]
+// CHECK:       [[OMP_INNER_FOR_BODY_I]]:
+// CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTCANCEL_CONTINUE_I:.*]] ], [ [[CONV1_I2]], %[[ENTRY]] ]
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call i32 @__kmpc_cancel(ptr nonnull @[[GLOB1]], i32 [[TMP0]], i32 4)
+// CHECK-NEXT:    [[DOTNOT_I:%.*]] = icmp eq i32 [[TMP6]], 0
+// CHECK-NEXT:    br i1 [[DOTNOT_I]], label %[[DOTCANCEL_CONTINUE_I]], [[DOTOMP_OUTLINED__7_EXIT]]
+// CHECK:       [[_CANCEL_CONTINUE_I:.*:]]
+// CHECK-NEXT:    [[TMP7:%.*]] = tail call i32 @__kmpc_cancellationpoint(ptr nonnull @[[GLOB1]], i32 [[TMP0]], i32 4)
+// CHECK-NEXT:    [[DOTNOT12_I:%.*]] = icmp ne i32 [[TMP7]], 0
+// CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1
+// CHECK-NEXT:    [[CMP_NOT_I:%.*]] = icmp ult i64 [[TMP5]], [[INDVARS_IV_NEXT]]
+// CHECK-NEXT:    [[OR_COND:%.*]] = select i1 [[DOTNOT12_I]], i1 true, i1 [[CMP_NOT_I]]
+// CHECK-NEXT:    br i1 [[OR_COND]], [[DOTOMP_OUTLINED__7_EXIT]], label %[[OMP_INNER_FOR_BODY_I]]
+// CHECK:       [[_OMP_OUTLINED__7_EXIT:.*:]]
+// CHECK-NEXT:    ret i32 0
+//
 //
 // CHECK-LABEL: define linkonce_odr void @_ZN1SC2Ei(
 // CHECK-SAME: ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[C:%.*]]) unnamed_addr #[[ATTR6:[0-9]+]] align 2 {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[C_ADDR:%.*]] = alloca i32, align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]])
-// CHECK-NEXT:    store i32 [[C]], ptr [[C_ADDR]], align 4, !tbaa [[TBAA3]]
+// CHECK-NEXT:    store i32 [[C]], ptr [[C_ADDR]], align 4, !tbaa [[INT_TBAA3]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @__kmpc_master(ptr nonnull @[[GLOB1]], i32 [[TMP0]])
 // CHECK-NEXT:    [[DOTNOT:%.*]] = icmp eq i32 [[TMP1]], 0
 // CHECK-NEXT:    br i1 [[DOTNOT]], label %[[OMP_IF_END:.*]], label %[[OMP_IF_THEN:.*]]
 // CHECK:       [[OMP_IF_THEN]]:
-// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[THIS]], align 4, !tbaa [[TBAA35:![0-9]+]]
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[THIS]], align 4, !tbaa [[INT_TBAA39:![0-9]+]]
 // CHECK-NEXT:    tail call void @__kmpc_taskgroup(ptr nonnull @[[GLOB1]], i32 [[TMP0]])
-// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[C_ADDR]], align 4, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[C_ADDR]], align 4, !tbaa [[INT_TBAA3]]
 // CHECK-NEXT:    [[SUB4:%.*]] = add nsw i32 [[TMP3]], -1
 // CHECK-NEXT:    [[TMP4:%.*]] = tail call ptr @__kmpc_omp_task_alloc(ptr nonnull @[[GLOB1]], i32 [[TMP0]], i32 1, i64 80, i64 16, ptr nonnull @.omp_task_entry..10)
-// CHECK-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8, !tbaa [[TBAA20]]
-// CHECK-NEXT:    store ptr [[THIS]], ptr [[TMP5]], align 8, !tbaa [[TBAA37:![0-9]+]]
+// CHECK-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8, !tbaa [[ANYPTR_TBAA20]]
+// CHECK-NEXT:    store ptr [[THIS]], ptr [[TMP5]], align 8, !tbaa [[_ZTS1SPTR_TBAA41:![0-9]+]]
 // CHECK-NEXT:    [[AGG_CAPTURED_SROA_2_0__SROA_IDX:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP5]], i64 8
-// CHECK-NEXT:    store ptr [[C_ADDR]], ptr [[AGG_CAPTURED_SROA_2_0__SROA_IDX]], align 8, !tbaa [[TBAA23]]
+// CHECK-NEXT:    store ptr [[C_ADDR]], ptr [[AGG_CAPTURED_SROA_2_0__SROA_IDX]], align 8, !tbaa [[INTPTR_TBAA23]]
 // CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP4]], i64 40
-// CHECK-NEXT:    store i64 0, ptr [[TMP6]], align 8, !tbaa [[TBAA15]]
+// CHECK-NEXT:    store i64 0, ptr [[TMP6]], align 8, !tbaa [[LONG_TBAA15]]
 // CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP4]], i64 48
 // CHECK-NEXT:    [[CONV:%.*]] = sext i32 [[SUB4]] to i64
-// CHECK-NEXT:    store i64 [[CONV]], ptr [[TMP7]], align 8, !tbaa [[TBAA15]]
+// CHECK-NEXT:    store i64 [[CONV]], ptr [[TMP7]], align 8, !tbaa [[LONG_TBAA15]]
 // CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP4]], i64 56
-// CHECK-NEXT:    store i64 1, ptr [[TMP8]], align 8, !tbaa [[TBAA15]]
+// CHECK-NEXT:    store i64 1, ptr [[TMP8]], align 8, !tbaa [[LONG_TBAA15]]
 // CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP4]], i64 72
 // CHECK-NEXT:    store i64 0, ptr [[TMP9]], align 8
 // CHECK-NEXT:    [[TMP10:%.*]] = zext i32 [[TMP2]] to i64
@@ -162,4 +290,85 @@ struct S {
 // CHECK-NEXT:    br label %[[OMP_IF_END]]
 // CHECK:       [[OMP_IF_END]]:
 // CHECK-NEXT:    ret void
-
+//
+//
+// CHECK-LABEL: define internal noundef i32 @.omp_task_entry..10(
+// CHECK-SAME: i32 [[TMP0:%.*]], ptr noalias noundef readonly captures(none) [[TMP1:%.*]]) #[[ATTR5]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !tbaa [[ANYPTR_TBAA20]]
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 40
+// CHECK-NEXT:    [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8, !tbaa [[LONG_TBAA28]]
+// CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 48
+// CHECK-NEXT:    [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8, !tbaa [[LONG_TBAA29]]
+// CHECK-NEXT:    tail call void @llvm.experimental.noalias.scope.decl(metadata [[META43:![0-9]+]])
+// CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i64 8
+// CHECK-NEXT:    [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8, !tbaa [[INTPTR_TBAA46:![0-9]+]], !alias.scope [[META43]], !nonnull [[META35]], !align [[META36]]
+// CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4, !tbaa [[INT_TBAA3]], !noalias [[META43]]
+// CHECK-NEXT:    [[CMP_I:%.*]] = icmp sgt i32 [[TMP9]], 0
+// CHECK-NEXT:    br i1 [[CMP_I]], label %[[TASKLOOP_IF_THEN_I:.*]], [[DOTOMP_OUTLINED__9_EXIT:label %.*]]
+// CHECK:       [[TASKLOOP_IF_THEN_I]]:
+// CHECK-NEXT:    [[SEXT:%.*]] = shl i64 [[TMP4]], 32
+// CHECK-NEXT:    [[TMP10:%.*]] = ashr exact i64 [[SEXT]], 32
+// CHECK-NEXT:    br label %[[OMP_INNER_FOR_COND_I:.*]]
+// CHECK:       [[OMP_INNER_FOR_COND_I]]:
+// CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[OMP_INNER_FOR_COND_I]] ], [ [[TMP10]], %[[TASKLOOP_IF_THEN_I]] ]
+// CHECK-NEXT:    [[CMP8_NOT_I:%.*]] = icmp ult i64 [[TMP6]], [[INDVARS_IV]]
+// CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1
+// CHECK-NEXT:    br i1 [[CMP8_NOT_I]], [[DOTOMP_OUTLINED__9_EXIT]], label %[[OMP_INNER_FOR_COND_I]]
+// CHECK:       [[_OMP_OUTLINED__9_EXIT:.*:]]
+// CHECK-NEXT:    ret i32 0
+//
+//
+// CHECK-LABEL: define internal void @_GLOBAL__sub_I_taskloop_strictmodifier_codegen.cpp(
+// CHECK-SAME: ) #[[ATTR7:[0-9]+]] section "__TEXT,__StaticInit,regular,pure_instructions" {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    tail call void @_ZN1SC2Ei(ptr noundef nonnull align 4 dereferenceable(4) @s, i32 noundef 1)
+// CHECK-NEXT:    ret void
+//
+//.
+// CHECK: [[INT_TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0}
+// CHECK: [[META4]] = !{!"int", [[META5:![0-9]+]], i64 0}
+// CHECK: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0}
+// CHECK: [[META6]] = !{!"Simple C++ TBAA"}
+// CHECK: [[CHARPTR_TBAA7]] = !{[[META8:![0-9]+]], [[META8]], i64 0}
+// CHECK: [[META8]] = !{!"p2 omnipotent char", [[META9:![0-9]+]], i64 0}
+// CHECK: [[META9]] = !{!"any p2 pointer", [[META10:![0-9]+]], i64 0}
+// CHECK: [[META10]] = !{!"any pointer", [[META5]], i64 0}
+// CHECK: [[INT_TBAA11]] = !{[[META12:![0-9]+]], [[META4]], i64 40}
+// CHECK: [[META12]] = !{!"_ZTS24kmp_task_t_with_privates", [[META13:![0-9]+]], i64 0, [[META14:![0-9]+]], i64 40}
+// CHECK: [[META13]] = !{!"_ZTS10kmp_task_t", [[META10]], i64 0, [[META10]], i64 8, [[META4]], i64 16, [[META5]], i64 24, [[META5]], i64 32}
+// CHECK: [[META14]] = !{!"_ZTS15.kmp_privates.t", [[META4]], i64 0}
+// CHECK: [[LONG_TBAA15]] = !{[[META16:![0-9]+]], [[META16]], i64 0}
+// CHECK: [[META16]] = !{!"long", [[META5]], i64 0}
+// CHECK: [[CHARPTR_TBAA17]] = !{[[META18:![0-9]+]], [[META18]], i64 0}
+// CHECK: [[META18]] = !{!"p1 omnipotent char", [[META10]], i64 0}
+// CHECK: [[CHAR_TBAA19]] = !{[[META5]], [[META5]], i64 0}
+// CHECK: [[ANYPTR_TBAA20]] = !{[[META21:![0-9]+]], [[META10]], i64 0}
+// CHECK: [[META21]] = !{!"_ZTS24kmp_task_t_with_privates", [[META22:![0-9]+]], i64 0}
+// CHECK: [[META22]] = !{!"_ZTS10kmp_task_t", [[META10]], i64 0, [[META10]], i64 8, [[META4]], i64 16, [[META5]], i64 24, [[META5]], i64 32, [[META16]], i64 40, [[META16]], i64 48, [[META16]], i64 56, [[META4]], i64 64, [[META10]], i64 72}
+// CHECK: [[INTPTR_TBAA23]] = !{[[META24:![0-9]+]], [[META24]], i64 0}
+// CHECK: [[META24]] = !{!"p1 int", [[META10]], i64 0}
+// CHECK: [[CHARPTR_TBAA25]] = !{[[META26:![0-9]+]], [[META26]], i64 0}
+// CHECK: [[META26]] = !{!"p3 omnipotent char", [[META27:![0-9]+]], i64 0}
+// CHECK: [[META27]] = !{!"any p3 pointer", [[META9]], i64 0}
+// CHECK: [[LONG_TBAA28]] = !{[[META21]], [[META16]], i64 40}
+// CHECK: [[LONG_TBAA29]] = !{[[META21]], [[META16]], i64 48}
+// CHECK: [[META30]] = !{[[META31:![0-9]+]]}
+// CHECK: [[META31]] = distinct !{[[META31]], [[META32:![0-9]+]], !".omp_outlined..5: %__context"}
+// CHECK: [[META32]] = distinct !{[[META32]], !".omp_outlined..5"}
+// CHECK: [[INTPTR_TBAA33]] = !{[[META34:![0-9]+]], [[META24]], i64 0}
+// CHECK: [[META34]] = !{!"_ZTSZ4mainE3$_3", [[META24]], i64 0, [[META26]], i64 8}
+// CHECK: [[META35]] = !{}
+// CHECK: [[META36]] = !{i64 4}
+// CHECK: [[CHARPTR_TBAA37]] = !{[[META34]], [[META26]], i64 8}
+// CHECK: [[META38]] = !{i64 8}
+// CHECK: [[INT_TBAA39]] = !{[[META40:![0-9]+]], [[META4]], i64 0}
+// CHECK: [[META40]] = !{!"_ZTS1S", [[META4]], i64 0}
+// CHECK: [[_ZTS1SPTR_TBAA41]] = !{[[META42:![0-9]+]], [[META42]], i64 0}
+// CHECK: [[META42]] = !{!"p1 _ZTS1S", [[META10]], i64 0}
+// CHECK: [[META43]] = !{[[META44:![0-9]+]]}
+// CHECK: [[META44]] = distinct !{[[META44]], [[META45:![0-9]+]], !".omp_outlined..9: %__context"}
+// CHECK: [[META45]] = distinct !{[[META45]], !".omp_outlined..9"}
+// CHECK: [[INTPTR_TBAA46]] = !{[[META47:![0-9]+]], [[META24]], i64 8}
+// CHECK: [[META47]] = !{!"_ZTSZN1SC1EiEUt_", [[META42]], i64 0, [[META24]], i64 8}
+//.
diff --git a/clang/test/PCH/leakfiles.test b/clang/test/PCH/leakfiles.test
index dc4047ac..45dc36f 100644
--- a/clang/test/PCH/leakfiles.test
+++ b/clang/test/PCH/leakfiles.test
@@ -1,9 +1,8 @@
 // Test that compiling using a PCH doesn't leak file descriptors.
 // https://bugs.chromium.org/p/chromium/issues/detail?id=924225
 //
-// This test requires bash loops and ulimit.
-// REQUIRES: shell
-// UNSUPPORTED: target={{.*win32.*}}
+// This test uses ulimit.
+// UNSUPPORTED: system-windows
 //
 // Set up source files. lib/lib.h includes lots of lib*.h files in that dir.
 // client.c includes lib/lib.h, and also the individual files directly.
@@ -12,10 +11,10 @@
 // RUN: mkdir %t
 // RUN: cd %t
 // RUN: mkdir lib
-// RUN: for i in {1..300}; do touch lib/lib$i.h; done
-// RUN: for i in {1..300}; do echo "#include \"lib$i.h\"" >> lib/lib.h; done
+// RUN: %python -c "from pathlib import Path; list(map(lambda i: Path(f'lib/lib{i}.h').touch(), range(1, 301)))"
+// RUN: %python -c "for i in range(1, 301): print(f'#include \"lib{i}.h\"')" > lib/lib.h
 // RUN: echo "#include \"lib/lib.h\"" > client.c
-// RUN: for i in {1..300}; do echo "#include \"lib/lib$i.h\"" >> client.c; done
+// RUN: %python -c "for i in range(1, 301): print(f'#include \"lib/lib{i}.h\"')" > client.c
 //
 // We want to verify that we don't hold all the files open at the same time.
 // This is important e.g. on mac, which has a low default FD limit.
diff --git a/clang/test/Preprocessor/riscv-target-features.c b/clang/test/Preprocessor/riscv-target-features.c
index 204c985..0dcdb29 100644
--- a/clang/test/Preprocessor/riscv-target-features.c
+++ b/clang/test/Preprocessor/riscv-target-features.c
@@ -96,6 +96,7 @@
 // CHECK-NOT: __riscv_zfinx {{.*$}}
 // CHECK-NOT: __riscv_zhinx {{.*$}}
 // CHECK-NOT: __riscv_zhinxmin {{.*$}}
+// CHECK-NOT: __riscv_zibi {{.*$}}
 // CHECK-NOT: __riscv_zic64b {{.*$}}
 // CHECK-NOT: __riscv_zicbom {{.*$}}
 // CHECK-NOT: __riscv_zicbop {{.*$}}
@@ -812,6 +813,14 @@
 // RUN:   -o - | FileCheck --check-prefix=CHECK-ZHINXMIN-EXT %s
 // CHECK-ZHINXMIN-EXT: __riscv_zhinxmin 1000000{{$}}
 
+// RUN: %clang --target=riscv32 -menable-experimental-extensions \
+// RUN:   -march=rv32i_zibi0p1 -E -dM %s \
+// RUN:   -o - | FileCheck --check-prefix=CHECK-ZIBI-EXT %s
+// RUN: %clang --target=riscv64 -menable-experimental-extensions \
+// RUN:   -march=rv64i_zibi0p1 -E -dM %s \
+// RUN:   -o - | FileCheck --check-prefix=CHECK-ZIBI-EXT %s
+// CHECK-ZIBI-EXT: __riscv_zibi
+
 // RUN: %clang --target=riscv32-unknown-linux-gnu \
 // RUN:   -march=rv32izic64b -E -dM %s \
 // RUN:   -o - | FileCheck --check-prefix=CHECK-ZIC64B-EXT %s
diff --git a/clang/test/SemaCXX/PR51712-large-array-constexpr-check-oom.cpp b/clang/test/SemaCXX/PR51712-large-array-constexpr-check-oom.cpp
index 98e1a9a..df5d8c5 100644
--- a/clang/test/SemaCXX/PR51712-large-array-constexpr-check-oom.cpp
+++ b/clang/test/SemaCXX/PR51712-large-array-constexpr-check-oom.cpp
@@ -1,7 +1,6 @@
 // Only run this test where ulimit is known to work well.
 // (There's nothing really platform-specific being tested, this is just ulimit).
 //
-// REQUIRES: shell
 // REQUIRES: system-linux
 // UNSUPPORTED: msan
 // UNSUPPORTED: asan
diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp
index 0ed029c..9526f62 100644
--- a/clang/tools/libclang/CIndex.cpp
+++ b/clang/tools/libclang/CIndex.cpp
@@ -1834,19 +1834,6 @@ bool CursorVisitor::VisitDependentNameTypeLoc(DependentNameTypeLoc TL) {
   return VisitNestedNameSpecifierLoc(TL.getQualifierLoc());
 }
 
-bool CursorVisitor::VisitDependentTemplateSpecializationTypeLoc(
-    DependentTemplateSpecializationTypeLoc TL) {
-  if (VisitNestedNameSpecifierLoc(TL.getQualifierLoc()))
-    return true;
-
-  // Visit the template arguments.
-  for (unsigned I = 0, N = TL.getNumArgs(); I != N; ++I)
-    if (VisitTemplateArgumentLoc(TL.getArgLoc(I)))
-      return true;
-
-  return false;
-}
-
 bool CursorVisitor::VisitPackExpansionTypeLoc(PackExpansionTypeLoc TL) {
   return Visit(TL.getPatternLoc());
 }
diff --git a/clang/tools/libclang/CXIndexDataConsumer.cpp b/clang/tools/libclang/CXIndexDataConsumer.cpp
index 423dd1b..932201a 100644
--- a/clang/tools/libclang/CXIndexDataConsumer.cpp
+++ b/clang/tools/libclang/CXIndexDataConsumer.cpp
@@ -393,8 +393,6 @@ SourceLocation CXIndexDataConsumer::CXXBasesListInfo::getBaseLoc(
   // TypeLoc::getNameLoc()
   if (auto TTL = TL.getAs<DependentNameTypeLoc>())
     return TTL.getNameLoc();
-  if (auto TTL = TL.getAs<DependentTemplateSpecializationTypeLoc>())
-    return TTL.getTemplateNameLoc();
   if (auto TTL = TL.getAs<TemplateSpecializationTypeLoc>())
     return TTL.getTemplateNameLoc();
   if (auto TTL = TL.getAs<TagTypeLoc>())
diff --git a/clang/unittests/AST/ASTImporterTest.cpp b/clang/unittests/AST/ASTImporterTest.cpp
index 5badbd7..e7160bc 100644
--- a/clang/unittests/AST/ASTImporterTest.cpp
+++ b/clang/unittests/AST/ASTImporterTest.cpp
@@ -774,8 +774,8 @@ TEST_P(ImportType, ImportDependentTemplateSpecialization) {
              "  typename A<T>::template B<T> a;"
              "};",
              Lang_CXX03, "", Lang_CXX03, Verifier,
-             classTemplateDecl(has(cxxRecordDecl(has(
-                 fieldDecl(hasType(dependentTemplateSpecializationType())))))));
+             classTemplateDecl(has(cxxRecordDecl(
+                 has(fieldDecl(hasType(templateSpecializationType())))))));
 }
 
 TEST_P(ImportType, ImportDeducedTemplateSpecialization) {
@@ -10025,7 +10025,8 @@ protected:
       EXPECT_EQ(ToD->getPreviousDecl(), ToDInherited);
     } else {
       EXPECT_EQ(FromD, FromDInherited->getPreviousDecl());
-      EXPECT_EQ(ToD, ToDInherited->getPreviousDecl());
+      // The order is reversed by the import process.
+      EXPECT_EQ(ToD->getPreviousDecl(), ToDInherited);
     }
   }
 
diff --git a/clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp b/clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp
index d7df9ca..9692d6e 100644
--- a/clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp
+++ b/clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp
@@ -2031,7 +2031,7 @@ TEST_P(ASTMatchersTest, DependentTemplateSpecializationType) {
           typename A<T>::template B<T> a;
         };
       )",
-      dependentTemplateSpecializationType()));
+      templateSpecializationType()));
 }
 
 TEST_P(ASTMatchersTest, RecordType) {
diff --git a/clang/unittests/Format/CMakeLists.txt b/clang/unittests/Format/CMakeLists.txt
index 5e5a7a0..03fff98 100644
--- a/clang/unittests/Format/CMakeLists.txt
+++ b/clang/unittests/Format/CMakeLists.txt
@@ -28,6 +28,7 @@ add_distinct_clang_unittest(FormatTests
   MacroExpanderTest.cpp
   MatchFilePathTest.cpp
   NamespaceEndCommentsFixerTest.cpp
+  NumericLiteralCaseTest.cpp
   NumericLiteralInfoTest.cpp
   ObjCPropertyAttributeOrderFixerTest.cpp
   QualifierFixerTest.cpp
diff --git a/clang/unittests/Format/NumericLiteralCaseTest.cpp b/clang/unittests/Format/NumericLiteralCaseTest.cpp
new file mode 100644
index 0000000..ecd230d
--- /dev/null
+++ b/clang/unittests/Format/NumericLiteralCaseTest.cpp
@@ -0,0 +1,346 @@
+//===- unittest/Format/NumericLiteralCaseTest.cpp -------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "FormatTestBase.h"
+
+#define DEBUG_TYPE "numeric-literal-case-test"
+
+namespace clang {
+namespace format {
+namespace test {
+namespace {
+
+class NumericLiteralCaseTest : public FormatTestBase {};
+
+TEST_F(NumericLiteralCaseTest, Prefix) {
+  constexpr StringRef Bin0("b = 0b0'10'010uL;");
+  constexpr StringRef Bin1("b = 0B010'010Ul;");
+  constexpr StringRef Hex0("b = 0xdead'BEEFuL;");
+  constexpr StringRef Hex1("b = 0Xdead'BEEFUl;");
+  verifyFormat(Bin0);
+  verifyFormat(Bin1);
+  verifyFormat(Hex0);
+  verifyFormat(Hex1);
+
+  auto Style = getLLVMStyle();
+  EXPECT_EQ(Style.NumericLiteralCase.Prefix, FormatStyle::NLCS_Leave);
+  EXPECT_EQ(Style.NumericLiteralCase.HexDigit, FormatStyle::NLCS_Leave);
+  EXPECT_EQ(Style.NumericLiteralCase.ExponentLetter, FormatStyle::NLCS_Leave);
+  EXPECT_EQ(Style.NumericLiteralCase.Suffix, FormatStyle::NLCS_Leave);
+
+  Style.NumericLiteralCase.Prefix = FormatStyle::NLCS_Upper;
+  verifyFormat("b = 0B0'10'010uL;", Bin0, Style);
+  verifyFormat(Bin1, Style);
+  verifyFormat("b = 0Xdead'BEEFuL;", Hex0, Style);
+  verifyFormat(Hex1, Style);
+  verifyFormat("i = 0XaBcD.a0Ebp123F;", Style);
+  verifyFormat("j = 0XaBcD.a0EbP123f;", Style);
+
+  Style.NumericLiteralCase.Prefix = FormatStyle::NLCS_Lower;
+  verifyFormat(Bin0, Style);
+  verifyFormat("b = 0b010'010Ul;", Bin1, Style);
+  verifyFormat(Hex0, Style);
+  verifyFormat("b = 0xdead'BEEFUl;", Hex1, Style);
+}
+
+TEST_F(NumericLiteralCaseTest, HexDigit) {
+  constexpr StringRef A("a = 0xaBc0'123fuL;");
+  constexpr StringRef B("b = 0XaBc0'123FUl;");
+  constexpr StringRef C("c = 0xa'Bc.0p12'3f32;");
+  constexpr StringRef D("d = 0xa'Bc.0P12'3F128;");
+  constexpr StringRef E("e = 0b0011'00Ull;");
+  constexpr StringRef F("f = 0B0100'000zu;");
+  constexpr StringRef G("g = 0.123e-19f;");
+  constexpr StringRef H("h = 0.12'3E-19F16;");
+  constexpr StringRef I("i = 0x.0000aBcp12'3F128;");
+  constexpr StringRef J("j = 0xaa1'fP12'3F128;");
+  constexpr StringRef K("k = 0x0;");
+  constexpr StringRef L("l = 0xA;");
+  verifyFormat(A);
+  verifyFormat(B);
+  verifyFormat(C);
+  verifyFormat(D);
+  verifyFormat(E);
+  verifyFormat(F);
+  verifyFormat(G);
+  verifyFormat(H);
+  verifyFormat(I);
+  verifyFormat(J);
+  verifyFormat(K);
+  verifyFormat(L);
+
+  auto Style = getLLVMStyle();
+  Style.NumericLiteralCase.HexDigit = FormatStyle::NLCS_Upper;
+  verifyFormat("a = 0xABC0'123FuL;", A, Style);
+  verifyFormat("b = 0XABC0'123FUl;", B, Style);
+  verifyFormat("c = 0xA'BC.0p12'3f32;", C, Style);
+  verifyFormat("d = 0xA'BC.0P12'3F128;", D, Style);
+  verifyFormat(E, Style);
+  verifyFormat(F, Style);
+  verifyFormat(G, Style);
+  verifyFormat(H, Style);
+  verifyFormat("i = 0x.0000ABCp12'3F128;", I, Style);
+  verifyFormat("j = 0xAA1'FP12'3F128;", J, Style);
+  verifyFormat(K, Style);
+  verifyFormat(L, Style);
+
+  Style.NumericLiteralCase.HexDigit = FormatStyle::NLCS_Lower;
+  verifyFormat("a = 0xabc0'123fuL;", A, Style);
+  verifyFormat("b = 0Xabc0'123fUl;", B, Style);
+  verifyFormat("c = 0xa'bc.0p12'3f32;", C, Style);
+  verifyFormat("d = 0xa'bc.0P12'3F128;", D, Style);
+  verifyFormat(E, Style);
+  verifyFormat(F, Style);
+  verifyFormat(G, Style);
+  verifyFormat(H, Style);
+  verifyFormat("i = 0x.0000abcp12'3F128;", I, Style);
+  verifyFormat("j = 0xaa1'fP12'3F128;", J, Style);
+  verifyFormat(K, Style);
+  verifyFormat("l = 0xa;", Style);
+}
+
+TEST_F(NumericLiteralCaseTest, ExponentLetter) {
+  constexpr StringRef A("a = .0'01e-19f;");
+  constexpr StringRef B("b = .00'1E2F;");
+  constexpr StringRef C("c = 10'2.e99;");
+  constexpr StringRef D("d = 123.456E-1;");
+  constexpr StringRef E("e = 0x12abEe3.456p-10'0;");
+  constexpr StringRef F("f = 0x.deEfP23;");
+  constexpr StringRef G("g = 0xe0E1.p-1;");
+  verifyFormat(A);
+  verifyFormat(B);
+  verifyFormat(C);
+  verifyFormat(D);
+  verifyFormat(E);
+  verifyFormat(F);
+  verifyFormat(G);
+
+  auto Style = getLLVMStyle();
+  Style.NumericLiteralCase.ExponentLetter = FormatStyle::NLCS_Lower;
+  verifyFormat(A, Style);
+  verifyFormat("b = .00'1e2F;", B, Style);
+  verifyFormat(C, Style);
+  verifyFormat("d = 123.456e-1;", D, Style);
+  verifyFormat(E, Style);
+  verifyFormat("f = 0x.deEfp23;", F, Style);
+  verifyFormat(G, Style);
+
+  Style.NumericLiteralCase.ExponentLetter = FormatStyle::NLCS_Upper;
+  verifyFormat("a = .0'01E-19f;", A, Style);
+  verifyFormat(B, Style);
+  verifyFormat("c = 10'2.E99;", C, Style);
+  verifyFormat(D, Style);
+  verifyFormat("e = 0x12abEe3.456P-10'0;", E, Style);
+  verifyFormat(F, Style);
+  verifyFormat("g = 0xe0E1.P-1;", G, Style);
+}
+
+TEST_F(NumericLiteralCaseTest, IntegerSuffix) {
+  constexpr StringRef A("a = 102u;");
+  constexpr StringRef B("b = 0177U;");
+  constexpr StringRef C("c = 0b101'111llU;");
+  constexpr StringRef D("d = 0xdead'BeefuZ;");
+  constexpr StringRef E("e = 3lU;");
+  constexpr StringRef F("f = 1zu;");
+  constexpr StringRef G("g = 0uLL;");
+  constexpr StringRef H("h = 10'233'213'0101uLL;");
+  verifyFormat(A);
+  verifyFormat(B);
+  verifyFormat(C);
+  verifyFormat(D);
+  verifyFormat(E);
+  verifyFormat(F);
+  verifyFormat(G);
+  verifyFormat(H);
+
+  auto Style = getLLVMStyle();
+  Style.NumericLiteralCase.Suffix = FormatStyle::NLCS_Lower;
+  verifyFormat(A, Style);
+  verifyFormat("b = 0177u;", B, Style);
+  verifyFormat("c = 0b101'111llu;", C, Style);
+  verifyFormat("d = 0xdead'Beefuz;", D, Style);
+  verifyFormat("e = 3lu;", E, Style);
+  verifyFormat(F, Style);
+  verifyFormat("g = 0ull;", G, Style);
+  verifyFormat("h = 10'233'213'0101ull;", H, Style);
+
+  Style.NumericLiteralCase.Suffix = FormatStyle::NLCS_Upper;
+  verifyFormat("a = 102U;", A, Style);
+  verifyFormat(B, Style);
+  verifyFormat("c = 0b101'111LLU;", C, Style);
+  verifyFormat("d = 0xdead'BeefUZ;", D, Style);
+  verifyFormat("e = 3LU;", E, Style);
+  verifyFormat("f = 1ZU;", F, Style);
+  verifyFormat("g = 0ULL;", G, Style);
+  verifyFormat("h = 10'233'213'0101ULL;", H, Style);
+}
+
+TEST_F(NumericLiteralCaseTest, FloatingPointSuffix) {
+  auto Style = getLLVMStyle();
+  // Floating point literals without suffixes.
+  constexpr std::array<StringRef, 6> FloatingPointStatements = {
+      "a = 0.",       "b = 1.0",        "c = .123'45E-10",
+      "d = 12'3.0e1", "e = 0Xa0eE.P10", "f = 0xeE01.aFf3p6",
+  };
+
+  // All legal floating-point literal suffixes defined in the C++23 standard in
+  // lowercase.
+  constexpr std::array<StringRef, 7> FloatingPointSuffixes = {
+      "f", "l", "f16", "f32", "f64", "f128", "bf16",
+  };
+
+  // Test all combinations of literals with suffixes.
+  for (const auto &Statement : FloatingPointStatements) {
+    for (const auto &Suffix : FloatingPointSuffixes) {
+      const auto LowerLine = Statement.str() + Suffix.str() + ";";
+      const auto UpperLine = Statement.str() + Suffix.upper() + ";";
+
+      Style.NumericLiteralCase.Suffix = FormatStyle::NLCS_Leave;
+      verifyFormat(LowerLine, Style);
+      verifyFormat(UpperLine, Style);
+
+      Style.NumericLiteralCase.Suffix = FormatStyle::NLCS_Lower;
+      verifyFormat(LowerLine, Style);
+      verifyFormat(LowerLine, UpperLine, Style);
+
+      Style.NumericLiteralCase.Suffix = FormatStyle::NLCS_Upper;
+      verifyFormat(UpperLine, LowerLine, Style);
+      verifyFormat(UpperLine, Style);
+    }
+  }
+}
+
+TEST_F(NumericLiteralCaseTest, CppStandardAndUserDefinedLiteralsAreUntouched) {
+  auto Style = getLLVMStyle();
+  Style.NumericLiteralCase.Prefix = FormatStyle::NLCS_Upper;
+  Style.NumericLiteralCase.HexDigit = FormatStyle::NLCS_Upper;
+  Style.NumericLiteralCase.ExponentLetter = FormatStyle::NLCS_Upper;
+  Style.NumericLiteralCase.Suffix = FormatStyle::NLCS_Upper;
+
+  // C++ user-defined suffixes begin with '_' or are reserved for the standard
+  // library.
+  constexpr StringRef UDLiterals("a = 12.if;\n"
+                                 "b = -3i;\n"
+                                 "c = 100'01il;\n"
+                                 "d = 100'0.12il;\n"
+                                 "e = 12h;\n"
+                                 "f = 0XABE12h;\n"
+                                 "g = 0XFA03min;\n"
+                                 "h = 0X12B4Ds;\n"
+                                 "i = 20.13E-1ms;\n"
+                                 "j = 20.13E-1us;\n"
+                                 "k = 20.13E-1ns;\n"
+                                 "l = 20.13E-1y;\n"
+                                 "m = 20.13E-1d;\n"
+                                 "n = 20.13E-1d;\n"
+                                 "o = 1d;\n"
+                                 "p = 102_ffl_lzlz;\n"
+                                 "q = 10.2_l;\n"
+                                 "r = 0XABDE.0'1P-23_f;\n"
+                                 "s = 102_foo_bar;\n"
+                                 "t = 123.456_felfz_ballpen;\n"
+                                 "u = 0XBEAD1_spacebar;");
+
+  verifyFormat(UDLiterals, Style);
+  Style.NumericLiteralCase.Suffix = FormatStyle::NLCS_Lower;
+  verifyFormat(UDLiterals, Style);
+}
+
+TEST_F(NumericLiteralCaseTest, FixRanges) {
+  auto Style = getLLVMStyle();
+  Style.NumericLiteralCase.Prefix = FormatStyle::NLCS_Lower;
+  Style.NumericLiteralCase.HexDigit = FormatStyle::NLCS_Lower;
+  Style.NumericLiteralCase.ExponentLetter = FormatStyle::NLCS_Lower;
+  Style.NumericLiteralCase.Suffix = FormatStyle::NLCS_Lower;
+
+  constexpr StringRef CodeBlock("a = 0xFea3duLL;\n"
+                                "b = 0X.aEbp-12f;\n"
+                                "c = 0uLL;\n"
+                                "// clang-format off\n"
+                                "e = 0xBeAdu;\n"
+                                "// clang-format on\n"
+                                "g = 0xabCDu;\n"
+                                "h = 0b010uL;\n"
+                                "// clang-format off\n"
+                                "i = 0B1010'000Zu;\n"
+                                "// clang-format on\n"
+                                "k = 0XaBuL;");
+
+  verifyFormat("a = 0xfea3dull;\n"
+               "b = 0x.aebp-12f;\n"
+               "c = 0ull;\n"
+               "// clang-format off\n"
+               "e = 0xBeAdu;\n"
+               "// clang-format on\n"
+               "g = 0xabcdu;\n"
+               "h = 0b010ul;\n"
+               "// clang-format off\n"
+               "i = 0B1010'000Zu;\n"
+               "// clang-format on\n"
+               "k = 0xabul;",
+               CodeBlock, Style);
+}
+
+TEST_F(NumericLiteralCaseTest, UnderScoreSeparatorLanguages) {
+  auto Style = getLLVMStyle();
+
+  constexpr StringRef CodeBlock("a = 0xFea_3dl;\n"
+                                "b = 0123_345;\n"
+                                "c = 0b11____00lU;\n"
+                                "d = 0XB_e_A_du;\n"
+                                "e = 123_456.333__456e-10f;\n"
+                                "f = .1_0E-10D;\n"
+                                "g = 1_0.F;\n"
+                                "h = 0B1_0;");
+  auto TestUnderscore = [&](auto Language) {
+    Style.Language = Language;
+    Style.NumericLiteralCase.Prefix = FormatStyle::NLCS_Lower;
+    Style.NumericLiteralCase.HexDigit = FormatStyle::NLCS_Upper;
+    Style.NumericLiteralCase.ExponentLetter = FormatStyle::NLCS_Lower;
+    Style.NumericLiteralCase.Suffix = FormatStyle::NLCS_Upper;
+    verifyFormat("a = 0xFEA_3DL;\n"
+                 "b = 0123_345;\n"
+                 "c = 0b11____00LU;\n"
+                 "d = 0xB_E_A_DU;\n"
+                 "e = 123_456.333__456e-10F;\n"
+                 "f = .1_0e-10D;\n"
+                 "g = 1_0.F;\n"
+                 "h = 0b1_0;",
+                 CodeBlock, Style);
+
+    Style.NumericLiteralCase.Prefix = FormatStyle::NLCS_Upper;
+    Style.NumericLiteralCase.HexDigit = FormatStyle::NLCS_Lower;
+    Style.NumericLiteralCase.ExponentLetter = FormatStyle::NLCS_Upper;
+    Style.NumericLiteralCase.Suffix = FormatStyle::NLCS_Lower;
+
+    verifyFormat("a = 0Xfea_3dl;\n"
+                 "b = 0123_345;\n"
+                 "c = 0B11____00lu;\n"
+                 "d = 0Xb_e_a_du;\n"
+                 "e = 123_456.333__456E-10f;\n"
+                 "f = .1_0E-10d;\n"
+                 "g = 1_0.f;\n"
+                 "h = 0B1_0;",
+                 CodeBlock, Style);
+  };
+
+  TestUnderscore(FormatStyle::LK_CSharp);
+  TestUnderscore(FormatStyle::LK_Java);
+  TestUnderscore(FormatStyle::LK_JavaScript);
+
+  Style.Language = FormatStyle::LK_JavaScript;
+  Style.NumericLiteralCase.Prefix = FormatStyle::NLCS_Upper;
+  verifyFormat("o = 0O0_10_010;", "o = 0o0_10_010;", Style);
+  Style.NumericLiteralCase.Prefix = FormatStyle::NLCS_Lower;
+  verifyFormat("o = 0o0_10_010;", "o = 0O0_10_010;", Style);
+}
+
+} // namespace
+} // namespace test
+} // namespace format
+} // namespace clang
diff --git a/clang/www/cxx_status.html b/clang/www/cxx_status.html
index bb7144b..25940cc 100755
--- a/clang/www/cxx_status.html
+++ b/clang/www/cxx_status.html
@@ -280,12 +280,7 @@ C++23, informally referred to as C++26.</p>
  <tr>
   <td>Trivial Relocatability</pre></td>
   <td><a href="https://wg21.link/P2786">P2786R13</a></td>
-  <td class="partial" align="center">
-    <details>
-      <summary>Clang 21 (Partial)</summary>
-      The feature test macro (<code>__cpp_trivial_relocatability</code>) has not yet been set.
-    </details>
-  </td>
+  <td class="unreleased" align="center">Clang 21</td>
  </tr>
  <tr>
   <td><pre>#embed</pre></td>
diff --git a/compiler-rt/lib/builtins/aarch64/sme-abi.S b/compiler-rt/lib/builtins/aarch64/sme-abi.S
index d5510ac..1713a59 100644
--- a/compiler-rt/lib/builtins/aarch64/sme-abi.S
+++ b/compiler-rt/lib/builtins/aarch64/sme-abi.S
@@ -280,17 +280,17 @@ DEFINE_COMPILERRT_FUNCTION(__arm_sme_save)
   mov     w16, #1
   str     x16, [x0]
 
-  add     x18, x0, #32
+  add     x16, x0, #32
   tbz     x17, #FEAT_SME2_BIT, 1f
 
   // Store ZT0
-  str     zt0, [x18]
-  add     x18, x18, #64
+  str     zt0, [x16]
+  add     x16, x16, #64
 
 1:
-  // Set up lazy-save (x18 = pointer to buffer)
+  // Set up lazy-save (x16 = pointer to buffer)
   rdsvl   x17, #1
-  str     x18, [x0, #16]!
+  str     x16, [x0, #16]!
   strh    w17, [x0, #8]
   strh    wzr, [x0, #10]
   str     wzr, [x0, #12]
diff --git a/compiler-rt/test/lit.common.cfg.py b/compiler-rt/test/lit.common.cfg.py
index 7734491..e2e8154 100644
--- a/compiler-rt/test/lit.common.cfg.py
+++ b/compiler-rt/test/lit.common.cfg.py
@@ -713,9 +713,9 @@ if config.target_os == "Linux":
         if config.android:
             return
 
-        from packaging.version import Version
+        from distutils.version import LooseVersion
 
-        ver = Version(ver_string)
+        ver = LooseVersion(ver_string)
         any_glibc = False
         for required in [
             "2.19",
@@ -727,7 +727,7 @@ if config.target_os == "Linux":
             "2.38",
             "2.40",
         ]:
-            if ver >= Version(required):
+            if ver >= LooseVersion(required):
                 config.available_features.add("glibc-" + required)
                 any_glibc = True
             if any_glibc:
diff --git a/flang/include/flang/Lower/AbstractConverter.h b/flang/include/flang/Lower/AbstractConverter.h
index 8e9de41..0ffe27e 100644
--- a/flang/include/flang/Lower/AbstractConverter.h
+++ b/flang/include/flang/Lower/AbstractConverter.h
@@ -271,6 +271,9 @@ public:
   virtual const Fortran::lower::pft::FunctionLikeUnit *
   getCurrentFunctionUnit() const = 0;
 
+  /// Check support of Multi-image features if -fcoarray is provided
+  virtual void checkCoarrayEnabled() = 0;
+
   //===--------------------------------------------------------------------===//
   // Types
   //===--------------------------------------------------------------------===//
diff --git a/flang/include/flang/Lower/OpenMP/Clauses.h b/flang/include/flang/Lower/OpenMP/Clauses.h
index 6388468..18e2f20 100644
--- a/flang/include/flang/Lower/OpenMP/Clauses.h
+++ b/flang/include/flang/Lower/OpenMP/Clauses.h
@@ -277,6 +277,7 @@ using Read = tomp::clause::ReadT<TypeTy, IdTy, ExprTy>;
 using Reduction = tomp::clause::ReductionT<TypeTy, IdTy, ExprTy>;
 using Relaxed = tomp::clause::RelaxedT<TypeTy, IdTy, ExprTy>;
 using Release = tomp::clause::ReleaseT<TypeTy, IdTy, ExprTy>;
+using Replayable = tomp::clause::ReplayableT<TypeTy, IdTy, ExprTy>;
 using ReverseOffload = tomp::clause::ReverseOffloadT<TypeTy, IdTy, ExprTy>;
 using Safelen = tomp::clause::SafelenT<TypeTy, IdTy, ExprTy>;
 using Schedule = tomp::clause::ScheduleT<TypeTy, IdTy, ExprTy>;
@@ -290,6 +291,7 @@ using Permutation = tomp::clause::PermutationT<TypeTy, IdTy, ExprTy>;
 using TaskReduction = tomp::clause::TaskReductionT<TypeTy, IdTy, ExprTy>;
 using ThreadLimit = tomp::clause::ThreadLimitT<TypeTy, IdTy, ExprTy>;
 using Threads = tomp::clause::ThreadsT<TypeTy, IdTy, ExprTy>;
+using Transparent = tomp::clause::TransparentT<TypeTy, IdTy, ExprTy>;
 using To = tomp::clause::ToT<TypeTy, IdTy, ExprTy>;
 using UnifiedAddress = tomp::clause::UnifiedAddressT<TypeTy, IdTy, ExprTy>;
 using UnifiedSharedMemory =
diff --git a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
index 3c020ab..d80ee9e 100644
--- a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
+++ b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
@@ -573,15 +573,6 @@ struct IntrinsicLibrary {
 
   void setResultMustBeFreed() { resultMustBeFreed = true; }
 
-  // Check support of coarray features
-  void checkCoarrayEnabled() {
-    if (converter &&
-        !converter->getFoldingContext().languageFeatures().IsEnabled(
-            Fortran::common::LanguageFeature::Coarray))
-      fir::emitFatalError(loc, "Coarrays disabled, use '-fcoarray' to enable.",
-                          false);
-  }
-
   fir::FirOpBuilder &builder;
   mlir::Location loc;
   bool resultMustBeFreed = false;
diff --git a/flang/include/flang/Optimizer/Builder/Runtime/Coarray.h b/flang/include/flang/Optimizer/Builder/Runtime/Coarray.h
index 10ed503..20bfb7c 100644
--- a/flang/include/flang/Optimizer/Builder/Runtime/Coarray.h
+++ b/flang/include/flang/Optimizer/Builder/Runtime/Coarray.h
@@ -71,5 +71,15 @@ void genCoMin(fir::FirOpBuilder &builder, mlir::Location loc, mlir::Value A,
 void genCoSum(fir::FirOpBuilder &builder, mlir::Location loc, mlir::Value A,
               mlir::Value resultImage, mlir::Value stat, mlir::Value errmsg);
 
+/// Generate call to runtime subroutine prif_sync_all
+void genSyncAllStatement(fir::FirOpBuilder &builder, mlir::Location loc,
+                         mlir::Value stat, mlir::Value errmsg);
+/// Generate call to runtime subroutine prif_sync_memory
+void genSyncMemoryStatement(fir::FirOpBuilder &builder, mlir::Location loc,
+                            mlir::Value stat, mlir::Value errmsg);
+/// Generate call to runtime subroutine prif_sync_images
+void genSyncImagesStatement(fir::FirOpBuilder &builder, mlir::Location loc,
+                            mlir::Value imageSet, mlir::Value stat,
+                            mlir::Value errmsg);
 } // namespace fir::runtime
 #endif // FORTRAN_OPTIMIZER_BUILDER_RUNTIME_COARRAY_H
diff --git a/flang/include/flang/Parser/dump-parse-tree.h b/flang/include/flang/Parser/dump-parse-tree.h
index d2ab7cb..1c9fd76 100644
--- a/flang/include/flang/Parser/dump-parse-tree.h
+++ b/flang/include/flang/Parser/dump-parse-tree.h
@@ -658,6 +658,7 @@ public:
   NODE(parser, OmpReductionSpecifier)
   NODE(parser, OmpRefModifier)
   NODE_ENUM(OmpRefModifier, Value)
+  NODE(parser, OmpReplayableClause)
   NODE(parser, OmpScheduleClause)
   NODE(OmpScheduleClause, Modifier)
   NODE_ENUM(OmpScheduleClause, Kind)
@@ -686,6 +687,7 @@ public:
   NODE(parser, OmpTraitSetSelector)
   NODE(parser, OmpTraitSetSelectorName)
   NODE_ENUM(OmpTraitSetSelectorName, Value)
+  NODE(parser, OmpTransparentClause)
   NODE(parser, OmpTypeNameList)
   NODE(parser, OmpTypeSpecifier)
   NODE(parser, OmpUpdateClause)
diff --git a/flang/include/flang/Parser/openmp-utils.h b/flang/include/flang/Parser/openmp-utils.h
index 3d3dfae..8205d25 100644
--- a/flang/include/flang/Parser/openmp-utils.h
+++ b/flang/include/flang/Parser/openmp-utils.h
@@ -155,6 +155,8 @@ template <typename T> OmpDirectiveName GetOmpDirectiveName(const T &x) {
 }
 
 const OmpObjectList *GetOmpObjectList(const OmpClause &clause);
+const BlockConstruct *GetFortranBlockConstruct(
+    const ExecutionPartConstruct &epc);
 
 } // namespace Fortran::parser::omp
 
diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h
index 622b5f9..951c96b 100644
--- a/flang/include/flang/Parser/parse-tree.h
+++ b/flang/include/flang/Parser/parse-tree.h
@@ -4643,6 +4643,14 @@ struct OmpReductionClause {
   std::tuple<MODIFIERS(), OmpObjectList> t;
 };
 
+// Ref: [6.0:440:441]
+//
+// replayable-clause ->
+//    REPLAYABLE[(replayable-expression)]           // since 6.0
+struct OmpReplayableClause {
+  WRAPPER_CLASS_BOILERPLATE(OmpReplayableClause, Scalar<Logical<ConstantExpr>>);
+};
+
 // Ref: [4.5:56-63], [5.0:101-109], [5.1:126-133], [5.2:252-254]
 //
 // schedule-clause ->
@@ -4692,6 +4700,14 @@ struct OmpToClause {
   std::tuple<MODIFIERS(), OmpObjectList, /*CommaSeparated=*/bool> t;
 };
 
+// Ref: [6.0:510-511]
+//
+// transparent-clause ->
+//    TRANSPARENT[(impex-type)]                     // since 6.0
+struct OmpTransparentClause {
+  WRAPPER_CLASS_BOILERPLATE(OmpTransparentClause, ScalarIntExpr);
+};
+
 // Ref: [5.0:254-255], [5.1:287-288], [5.2:321-322]
 //
 // In ATOMIC construct
diff --git a/flang/include/flang/Semantics/openmp-utils.h b/flang/include/flang/Semantics/openmp-utils.h
index 1c54124..68318d6 100644
--- a/flang/include/flang/Semantics/openmp-utils.h
+++ b/flang/include/flang/Semantics/openmp-utils.h
@@ -83,6 +83,7 @@ const SomeExpr *HasStorageOverlap(
 bool IsAssignment(const parser::ActionStmt *x);
 bool IsPointerAssignment(const evaluate::Assignment &x);
 const parser::Block &GetInnermostExecPart(const parser::Block &block);
+bool IsStrictlyStructuredBlock(const parser::Block &block);
 } // namespace omp
 } // namespace Fortran::semantics
 
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index 6125ea9..4a5b988 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -1131,6 +1131,16 @@ public:
     return currentFunctionUnit;
   }
 
+  void checkCoarrayEnabled() override final {
+    if (!getFoldingContext().languageFeatures().IsEnabled(
+            Fortran::common::LanguageFeature::Coarray))
+      fir::emitFatalError(
+          getCurrentLocation(),
+          "Not yet implemented: Multi-image features are experimental and are "
+          "disabled by default, use '-fcoarray' to enable.",
+          false);
+  }
+
   void registerTypeInfo(mlir::Location loc,
                         Fortran::lower::SymbolRef typeInfoSym,
                         const Fortran::semantics::DerivedTypeSpec &typeSpec,
diff --git a/flang/lib/Lower/OpenMP/Clauses.cpp b/flang/lib/Lower/OpenMP/Clauses.cpp
index cecc1a9..78fe5aa 100644
--- a/flang/lib/Lower/OpenMP/Clauses.cpp
+++ b/flang/lib/Lower/OpenMP/Clauses.cpp
@@ -239,11 +239,11 @@ MAKE_EMPTY_CLASS(Relaxed, Relaxed);
 MAKE_EMPTY_CLASS(Release, Release);
 MAKE_EMPTY_CLASS(ReverseOffload, ReverseOffload);
 MAKE_EMPTY_CLASS(SeqCst, SeqCst);
+MAKE_EMPTY_CLASS(SelfMaps, SelfMaps);
 MAKE_EMPTY_CLASS(Simd, Simd);
 MAKE_EMPTY_CLASS(Threads, Threads);
 MAKE_EMPTY_CLASS(UnifiedAddress, UnifiedAddress);
 MAKE_EMPTY_CLASS(UnifiedSharedMemory, UnifiedSharedMemory);
-MAKE_EMPTY_CLASS(SelfMaps, SelfMaps);
 MAKE_EMPTY_CLASS(Unknown, Unknown);
 MAKE_EMPTY_CLASS(Untied, Untied);
 MAKE_EMPTY_CLASS(Weak, Weak);
@@ -257,6 +257,8 @@ MAKE_EMPTY_CLASS(Threadprivate, Threadprivate);
 
 MAKE_INCOMPLETE_CLASS(AdjustArgs, AdjustArgs);
 MAKE_INCOMPLETE_CLASS(AppendArgs, AppendArgs);
+MAKE_INCOMPLETE_CLASS(Replayable, Replayable);
+MAKE_INCOMPLETE_CLASS(Transparent, Transparent);
 
 List<IteratorSpecifier>
 makeIteratorSpecifiers(const parser::OmpIteratorSpecifier &inp,
diff --git a/flang/lib/Lower/Runtime.cpp b/flang/lib/Lower/Runtime.cpp
index 494dd49..b19ca01 100644
--- a/flang/lib/Lower/Runtime.cpp
+++ b/flang/lib/Lower/Runtime.cpp
@@ -12,6 +12,7 @@
 #include "flang/Lower/OpenMP.h"
 #include "flang/Lower/StatementContext.h"
 #include "flang/Optimizer/Builder/FIRBuilder.h"
+#include "flang/Optimizer/Builder/Runtime/Coarray.h"
 #include "flang/Optimizer/Builder/Runtime/RTBuilder.h"
 #include "flang/Optimizer/Builder/Todo.h"
 #include "flang/Optimizer/Dialect/FIROpsSupport.h"
@@ -47,6 +48,42 @@ static void genUnreachable(fir::FirOpBuilder &builder, mlir::Location loc) {
   builder.setInsertionPointToStart(newBlock);
 }
 
+/// Initializes values for STAT and ERRMSG
+static std::pair<mlir::Value, mlir::Value> getStatAndErrmsg(
+    Fortran::lower::AbstractConverter &converter, mlir::Location loc,
+    const std::list<Fortran::parser::StatOrErrmsg> &statOrErrList) {
+  fir::FirOpBuilder &builder = converter.getFirOpBuilder();
+  Fortran::lower::StatementContext stmtCtx;
+
+  mlir::Value errMsgExpr, statExpr;
+  for (const Fortran::parser::StatOrErrmsg &statOrErr : statOrErrList) {
+    std::visit(Fortran::common::visitors{
+                   [&](const Fortran::parser::StatVariable &statVar) {
+                     statExpr = fir::getBase(converter.genExprAddr(
+                         loc, Fortran::semantics::GetExpr(statVar), stmtCtx));
+                   },
+                   [&](const Fortran::parser::MsgVariable &errMsgVar) {
+                     const Fortran::semantics::SomeExpr *expr =
+                         Fortran::semantics::GetExpr(errMsgVar);
+                     errMsgExpr = fir::getBase(
+                         converter.genExprBox(loc, *expr, stmtCtx));
+                   }},
+               statOrErr.u);
+  }
+
+  if (!statExpr) {
+    statExpr = fir::AbsentOp::create(builder, loc,
+                                     builder.getRefType(builder.getI32Type()));
+  }
+  if (!errMsgExpr) {
+    errMsgExpr = fir::AbsentOp::create(
+        builder, loc,
+        fir::BoxType::get(fir::CharacterType::get(
+            builder.getContext(), 1, fir::CharacterType::unknownLen())));
+  }
+  return {statExpr, errMsgExpr};
+}
+
 //===----------------------------------------------------------------------===//
 // Misc. Fortran statements that lower to runtime calls
 //===----------------------------------------------------------------------===//
@@ -169,20 +206,68 @@ void Fortran::lower::genUnlockStatement(
 
 void Fortran::lower::genSyncAllStatement(
     Fortran::lower::AbstractConverter &converter,
-    const Fortran::parser::SyncAllStmt &) {
-  TODO(converter.getCurrentLocation(), "coarray: SYNC ALL runtime");
+    const Fortran::parser::SyncAllStmt &stmt) {
+  mlir::Location loc = converter.getCurrentLocation();
+  converter.checkCoarrayEnabled();
+
+  // Handle STAT and ERRMSG values
+  const std::list<Fortran::parser::StatOrErrmsg> &statOrErrList = stmt.v;
+  auto [statAddr, errMsgAddr] = getStatAndErrmsg(converter, loc, statOrErrList);
+
+  fir::FirOpBuilder &builder = converter.getFirOpBuilder();
+  fir::runtime::genSyncAllStatement(builder, loc, statAddr, errMsgAddr);
 }
 
 void Fortran::lower::genSyncImagesStatement(
     Fortran::lower::AbstractConverter &converter,
-    const Fortran::parser::SyncImagesStmt &) {
-  TODO(converter.getCurrentLocation(), "coarray: SYNC IMAGES runtime");
+    const Fortran::parser::SyncImagesStmt &stmt) {
+  mlir::Location loc = converter.getCurrentLocation();
+  converter.checkCoarrayEnabled();
+  fir::FirOpBuilder &builder = converter.getFirOpBuilder();
+
+  // Handle STAT and ERRMSG values
+  const std::list<Fortran::parser::StatOrErrmsg> &statOrErrList =
+      std::get<std::list<Fortran::parser::StatOrErrmsg>>(stmt.t);
+  auto [statAddr, errMsgAddr] = getStatAndErrmsg(converter, loc, statOrErrList);
+
+  // SYNC_IMAGES(*) is passed as count == -1 while  SYNC IMAGES([]) has count
+  // == 0. Note further that SYNC IMAGES(*) is not semantically equivalent to
+  // SYNC ALL.
+  Fortran::lower::StatementContext stmtCtx;
+  mlir::Value imageSet;
+  const Fortran::parser::SyncImagesStmt::ImageSet &imgSet =
+      std::get<Fortran::parser::SyncImagesStmt::ImageSet>(stmt.t);
+  std::visit(Fortran::common::visitors{
+                 [&](const Fortran::parser::IntExpr &intExpr) {
+                   const SomeExpr *expr = Fortran::semantics::GetExpr(intExpr);
+                   imageSet =
+                       fir::getBase(converter.genExprBox(loc, *expr, stmtCtx));
+                 },
+                 [&](const Fortran::parser::Star &) {
+                   imageSet = fir::AbsentOp::create(
+                       builder, loc,
+                       fir::BoxType::get(fir::SequenceType::get(
+                           {fir::SequenceType::getUnknownExtent()},
+                           builder.getI32Type())));
+                 }},
+             imgSet.u);
+
+  fir::runtime::genSyncImagesStatement(builder, loc, imageSet, statAddr,
+                                       errMsgAddr);
 }
 
 void Fortran::lower::genSyncMemoryStatement(
     Fortran::lower::AbstractConverter &converter,
-    const Fortran::parser::SyncMemoryStmt &) {
-  TODO(converter.getCurrentLocation(), "coarray: SYNC MEMORY runtime");
+    const Fortran::parser::SyncMemoryStmt &stmt) {
+  mlir::Location loc = converter.getCurrentLocation();
+  converter.checkCoarrayEnabled();
+
+  // Handle STAT and ERRMSG values
+  const std::list<Fortran::parser::StatOrErrmsg> &statOrErrList = stmt.v;
+  auto [statAddr, errMsgAddr] = getStatAndErrmsg(converter, loc, statOrErrList);
+
+  fir::FirOpBuilder &builder = converter.getFirOpBuilder();
+  fir::runtime::genSyncMemoryStatement(builder, loc, statAddr, errMsgAddr);
 }
 
 void Fortran::lower::genSyncTeamStatement(
diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
index 6ae48c1..aa12dbf 100644
--- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
@@ -3716,7 +3716,7 @@ mlir::Value IntrinsicLibrary::genCmplx(mlir::Type resultType,
 
 // CO_BROADCAST
 void IntrinsicLibrary::genCoBroadcast(llvm::ArrayRef<fir::ExtendedValue> args) {
-  checkCoarrayEnabled();
+  converter->checkCoarrayEnabled();
   assert(args.size() == 4);
   mlir::Value sourceImage = fir::getBase(args[1]);
   mlir::Value status =
@@ -3735,7 +3735,7 @@ void IntrinsicLibrary::genCoBroadcast(llvm::ArrayRef<fir::ExtendedValue> args) {
 
 // CO_MAX
 void IntrinsicLibrary::genCoMax(llvm::ArrayRef<fir::ExtendedValue> args) {
-  checkCoarrayEnabled();
+  converter->checkCoarrayEnabled();
   assert(args.size() == 4);
   mlir::Value refNone =
       fir::AbsentOp::create(builder, loc,
@@ -3755,7 +3755,7 @@ void IntrinsicLibrary::genCoMax(llvm::ArrayRef<fir::ExtendedValue> args) {
 
 // CO_MIN
 void IntrinsicLibrary::genCoMin(llvm::ArrayRef<fir::ExtendedValue> args) {
-  checkCoarrayEnabled();
+  converter->checkCoarrayEnabled();
   assert(args.size() == 4);
   mlir::Value refNone =
       fir::AbsentOp::create(builder, loc,
@@ -3775,7 +3775,7 @@ void IntrinsicLibrary::genCoMin(llvm::ArrayRef<fir::ExtendedValue> args) {
 
 // CO_SUM
 void IntrinsicLibrary::genCoSum(llvm::ArrayRef<fir::ExtendedValue> args) {
-  checkCoarrayEnabled();
+  converter->checkCoarrayEnabled();
   assert(args.size() == 4);
   mlir::Value absentInt =
       fir::AbsentOp::create(builder, loc,
@@ -7438,7 +7438,7 @@ IntrinsicLibrary::genNull(mlir::Type, llvm::ArrayRef<fir::ExtendedValue> args) {
 fir::ExtendedValue
 IntrinsicLibrary::genNumImages(mlir::Type resultType,
                                llvm::ArrayRef<fir::ExtendedValue> args) {
-  checkCoarrayEnabled();
+  converter->checkCoarrayEnabled();
   assert(args.size() == 0 || args.size() == 1);
 
   if (args.size())
@@ -8519,7 +8519,7 @@ mlir::Value IntrinsicLibrary::genThisGrid(mlir::Type resultType,
 fir::ExtendedValue
 IntrinsicLibrary::genThisImage(mlir::Type resultType,
                                llvm::ArrayRef<fir::ExtendedValue> args) {
-  checkCoarrayEnabled();
+  converter->checkCoarrayEnabled();
   assert(args.size() >= 1 && args.size() <= 3);
   const bool coarrayIsAbsent = args.size() == 1;
   mlir::Value team =
diff --git a/flang/lib/Optimizer/Builder/Runtime/Coarray.cpp b/flang/lib/Optimizer/Builder/Runtime/Coarray.cpp
index 9a893d6..364e7b7 100644
--- a/flang/lib/Optimizer/Builder/Runtime/Coarray.cpp
+++ b/flang/lib/Optimizer/Builder/Runtime/Coarray.cpp
@@ -165,3 +165,64 @@ void fir::runtime::genCoSum(fir::FirOpBuilder &builder, mlir::Location loc,
   genCollectiveSubroutine(builder, loc, A, resultImage, stat, errmsg,
                           PRIFNAME_SUB("co_sum"));
 }
+
+/// Generate call to runtime subroutine prif_sync_all
+void fir::runtime::genSyncAllStatement(fir::FirOpBuilder &builder,
+                                       mlir::Location loc, mlir::Value stat,
+                                       mlir::Value errmsg) {
+  mlir::FunctionType ftype =
+      PRIF_FUNCTYPE(PRIF_STAT_TYPE, PRIF_ERRMSG_TYPE, PRIF_ERRMSG_TYPE);
+  mlir::func::FuncOp funcOp =
+      builder.createFunction(loc, PRIFNAME_SUB("sync_all"), ftype);
+
+  auto [errmsgArg, errmsgAllocArg] = genErrmsgPRIF(builder, loc, errmsg);
+  llvm::SmallVector<mlir::Value> args = fir::runtime::createArguments(
+      builder, loc, ftype, stat, errmsgArg, errmsgAllocArg);
+  fir::CallOp::create(builder, loc, funcOp, args);
+}
+
+/// Generate call to runtime subroutine prif_sync_memory
+void fir::runtime::genSyncMemoryStatement(fir::FirOpBuilder &builder,
+                                          mlir::Location loc, mlir::Value stat,
+                                          mlir::Value errmsg) {
+  mlir::FunctionType ftype =
+      PRIF_FUNCTYPE(PRIF_STAT_TYPE, PRIF_ERRMSG_TYPE, PRIF_ERRMSG_TYPE);
+  mlir::func::FuncOp funcOp =
+      builder.createFunction(loc, PRIFNAME_SUB("sync_memory"), ftype);
+
+  auto [errmsgArg, errmsgAllocArg] = genErrmsgPRIF(builder, loc, errmsg);
+  llvm::SmallVector<mlir::Value> args = fir::runtime::createArguments(
+      builder, loc, ftype, stat, errmsgArg, errmsgAllocArg);
+  fir::CallOp::create(builder, loc, funcOp, args);
+}
+
+/// Generate call to runtime subroutine prif_sync_images
+void fir::runtime::genSyncImagesStatement(fir::FirOpBuilder &builder,
+                                          mlir::Location loc,
+                                          mlir::Value imageSet,
+                                          mlir::Value stat,
+                                          mlir::Value errmsg) {
+  mlir::Type imgSetTy = fir::BoxType::get(fir::SequenceType::get(
+      {fir::SequenceType::getUnknownExtent()}, builder.getI32Type()));
+  mlir::FunctionType ftype = PRIF_FUNCTYPE(imgSetTy, PRIF_STAT_TYPE,
+                                           PRIF_ERRMSG_TYPE, PRIF_ERRMSG_TYPE);
+  mlir::func::FuncOp funcOp =
+      builder.createFunction(loc, PRIFNAME_SUB("sync_images"), ftype);
+
+  // If imageSet is scalar, PRIF require to pass an array of size 1.
+  if (auto boxTy = mlir::dyn_cast<fir::BoxType>(imageSet.getType())) {
+    if (!mlir::isa<fir::SequenceType>(boxTy.getEleTy())) {
+      mlir::Value one =
+          builder.createIntegerConstant(loc, builder.getI32Type(), 1);
+      mlir::Value shape = fir::ShapeOp::create(builder, loc, one);
+      imageSet = fir::ReboxOp::create(
+          builder, loc,
+          fir::BoxType::get(fir::SequenceType::get({1}, builder.getI32Type())),
+          imageSet, shape, mlir::Value{});
+    }
+  }
+  auto [errmsgArg, errmsgAllocArg] = genErrmsgPRIF(builder, loc, errmsg);
+  llvm::SmallVector<mlir::Value> args = fir::runtime::createArguments(
+      builder, loc, ftype, imageSet, stat, errmsgArg, errmsgAllocArg);
+  fir::CallOp::create(builder, loc, funcOp, args);
+}
diff --git a/flang/lib/Parser/openmp-parsers.cpp b/flang/lib/Parser/openmp-parsers.cpp
index 68e0acd..519bce6 100644
--- a/flang/lib/Parser/openmp-parsers.cpp
+++ b/flang/lib/Parser/openmp-parsers.cpp
@@ -15,6 +15,7 @@
 #include "stmt-parser.h"
 #include "token-parsers.h"
 #include "type-parser-implementation.h"
+#include "flang/Parser/openmp-utils.h"
 #include "flang/Parser/parse-tree.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/STLExtras.h"
@@ -24,6 +25,7 @@
 
 // OpenMP Directives and Clauses
 namespace Fortran::parser {
+using namespace Fortran::parser::omp;
 
 // Helper function to print the buffer contents starting at the current point.
 [[maybe_unused]] static std::string ahead(const ParseState &state) {
@@ -34,6 +36,9 @@ namespace Fortran::parser {
 constexpr auto startOmpLine = skipStuffBeforeStatement >> "!$OMP "_sptok;
 constexpr auto endOmpLine = space >> endOfLine;
 
+constexpr auto logicalConstantExpr{logical(constantExpr)};
+constexpr auto scalarLogicalConstantExpr{scalar(logicalConstantExpr)};
+
 // Given a parser for a single element, and a parser for a list of elements
 // of the same type, create a parser that constructs the entire list by having
 // the single element be the head of the list, and the rest be the tail.
@@ -868,6 +873,8 @@ TYPE_PARSER(construct<OmpReductionClause>(
     maybe(nonemptyList(Parser<OmpReductionClause::Modifier>{}) / ":"),
     Parser<OmpObjectList>{}))
 
+TYPE_PARSER(construct<OmpReplayableClause>(scalarLogicalConstantExpr))
+
 // OMP 5.0 2.19.5.6 IN_REDUCTION (reduction-identifier: variable-name-list)
 TYPE_PARSER(construct<OmpInReductionClause>(
     maybe(nonemptyList(Parser<OmpInReductionClause::Modifier>{}) / ":"),
@@ -877,6 +884,8 @@ TYPE_PARSER(construct<OmpTaskReductionClause>(
     maybe(nonemptyList(Parser<OmpTaskReductionClause::Modifier>{}) / ":"),
     Parser<OmpObjectList>{}))
 
+TYPE_PARSER(construct<OmpTransparentClause>(scalarIntExpr))
+
 // OMP 5.0 2.11.4 allocate-clause -> ALLOCATE ([allocator:] variable-name-list)
 // OMP 5.2 2.13.4 allocate-clause -> ALLOCATE ([allocate-modifier
 //                                   [, allocate-modifier] :]
@@ -1192,6 +1201,8 @@ TYPE_PARSER( //
     "READ" >> construct<OmpClause>(construct<OmpClause::Read>()) ||
     "RELAXED" >> construct<OmpClause>(construct<OmpClause::Relaxed>()) ||
     "RELEASE" >> construct<OmpClause>(construct<OmpClause::Release>()) ||
+    "REPLAYABLE" >> construct<OmpClause>(construct<OmpClause::Replayable>(
+                        maybe(parenthesized(Parser<OmpReplayableClause>{})))) ||
     "REVERSE_OFFLOAD" >>
         construct<OmpClause>(construct<OmpClause::ReverseOffload>()) ||
     "SAFELEN" >> construct<OmpClause>(construct<OmpClause::Safelen>(
@@ -1215,6 +1226,9 @@ TYPE_PARSER( //
                           parenthesized(scalarIntExpr))) ||
     "TO" >> construct<OmpClause>(construct<OmpClause::To>(
                 parenthesized(Parser<OmpToClause>{}))) ||
+    "TRANSPARENT" >>
+        construct<OmpClause>(construct<OmpClause::Transparent>(
+            maybe(parenthesized(Parser<OmpTransparentClause>{})))) ||
     "USE" >> construct<OmpClause>(construct<OmpClause::Use>(
                  parenthesized(Parser<OmpObject>{}))) ||
     "USE_DEVICE_PTR" >> construct<OmpClause>(construct<OmpClause::UseDevicePtr>(
@@ -1280,16 +1294,6 @@ TYPE_PARSER(sourced(
         maybe(Parser<OmpClauseList>{}),
         pure(OmpDirectiveSpecification::Flags::None))))
 
-static bool IsFortranBlockConstruct(const ExecutionPartConstruct &epc) {
-  // ExecutionPartConstruct -> ExecutableConstruct
-  //   -> Indirection<BlockConstruct>
-  if (auto *ec{std::get_if<ExecutableConstruct>(&epc.u)}) {
-    return std::holds_alternative<common::Indirection<BlockConstruct>>(ec->u);
-  } else {
-    return false;
-  }
-}
-
 static bool IsStandaloneOrdered(const OmpDirectiveSpecification &dirSpec) {
   // An ORDERED construct is standalone if it has DOACROSS or DEPEND clause.
   return dirSpec.DirId() == llvm::omp::Directive::OMPD_ordered &&
@@ -1307,7 +1311,7 @@ struct StrictlyStructuredBlockParser {
     // Detect BLOCK construct without parsing the entire thing.
     if (lookAhead(skipStuffBeforeStatement >> "BLOCK"_tok).Parse(state)) {
       if (auto epc{Parser<ExecutionPartConstruct>{}.Parse(state)}) {
-        if (IsFortranBlockConstruct(*epc)) {
+        if (GetFortranBlockConstruct(*epc) != nullptr) {
           Block body;
           body.emplace_back(std::move(*epc));
           return std::move(body);
diff --git a/flang/lib/Parser/openmp-utils.cpp b/flang/lib/Parser/openmp-utils.cpp
index ef7e4fc..937a17f 100644
--- a/flang/lib/Parser/openmp-utils.cpp
+++ b/flang/lib/Parser/openmp-utils.cpp
@@ -12,6 +12,7 @@
 
 #include "flang/Parser/openmp-utils.h"
 
+#include "flang/Common/indirection.h"
 #include "flang/Common/template.h"
 #include "flang/Common/visit.h"
 
@@ -61,4 +62,16 @@ const OmpObjectList *GetOmpObjectList(const OmpClause &clause) {
       clause.u);
 }
 
+const BlockConstruct *GetFortranBlockConstruct(
+    const ExecutionPartConstruct &epc) {
+  // ExecutionPartConstruct -> ExecutableConstruct
+  //   -> Indirection<BlockConstruct>
+  if (auto *ec{std::get_if<ExecutableConstruct>(&epc.u)}) {
+    if (auto *ind{std::get_if<common::Indirection<BlockConstruct>>(&ec->u)}) {
+      return &ind->value();
+    }
+  }
+  return nullptr;
+}
+
 } // namespace Fortran::parser::omp
diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp
index d1654a3..6bc9f99 100644
--- a/flang/lib/Semantics/check-omp-structure.cpp
+++ b/flang/lib/Semantics/check-omp-structure.cpp
@@ -874,22 +874,8 @@ void OmpStructureChecker::Enter(const parser::OmpBlockConstruct &x) {
   // Missing mandatory end block: this is checked in semantics because that
   // makes it easier to control the error messages.
   // The end block is mandatory when the construct is not applied to a strictly
-  // structured block (aka it is applied to a loosely structured block). In
-  // other words, the body doesn't contain exactly one parser::BlockConstruct.
-  auto isStrictlyStructuredBlock{[](const parser::Block &block) -> bool {
-    if (block.size() != 1) {
-      return false;
-    }
-    const parser::ExecutionPartConstruct &contents{block.front()};
-    auto *executableConstruct{
-        std::get_if<parser::ExecutableConstruct>(&contents.u)};
-    if (!executableConstruct) {
-      return false;
-    }
-    return std::holds_alternative<common::Indirection<parser::BlockConstruct>>(
-        executableConstruct->u);
-  }};
-  if (!endSpec && !isStrictlyStructuredBlock(block)) {
+  // structured block (aka it is applied to a loosely structured block).
+  if (!endSpec && !IsStrictlyStructuredBlock(block)) {
     llvm::omp::Directive dirId{beginSpec.DirId()};
     auto &msg{context_.Say(beginSpec.source,
         "Expected OpenMP END %s directive"_err_en_US,
@@ -2845,6 +2831,8 @@ CHECK_SIMPLE_CLAUSE(AcqRel, OMPC_acq_rel)
 CHECK_SIMPLE_CLAUSE(Acquire, OMPC_acquire)
 CHECK_SIMPLE_CLAUSE(Relaxed, OMPC_relaxed)
 CHECK_SIMPLE_CLAUSE(Release, OMPC_release)
+CHECK_SIMPLE_CLAUSE(Replayable, OMPC_replayable)
+CHECK_SIMPLE_CLAUSE(Transparent, OMPC_transparent)
 CHECK_SIMPLE_CLAUSE(SeqCst, OMPC_seq_cst)
 CHECK_SIMPLE_CLAUSE(Fail, OMPC_fail)
 
diff --git a/flang/lib/Semantics/openmp-utils.cpp b/flang/lib/Semantics/openmp-utils.cpp
index e8df346c..2980f82 100644
--- a/flang/lib/Semantics/openmp-utils.cpp
+++ b/flang/lib/Semantics/openmp-utils.cpp
@@ -21,6 +21,7 @@
 #include "flang/Evaluate/traverse.h"
 #include "flang/Evaluate/type.h"
 #include "flang/Evaluate/variable.h"
+#include "flang/Parser/openmp-utils.h"
 #include "flang/Parser/parse-tree.h"
 #include "flang/Semantics/expression.h"
 #include "flang/Semantics/semantics.h"
@@ -37,6 +38,7 @@
 #include <vector>
 
 namespace Fortran::semantics::omp {
+using namespace Fortran::parser::omp;
 
 SourcedActionStmt GetActionStmt(const parser::ExecutionPartConstruct *x) {
   if (x == nullptr) {
@@ -397,16 +399,21 @@ const parser::Block &GetInnermostExecPart(const parser::Block &block) {
   const parser::Block *iter{&block};
   while (iter->size() == 1) {
     const parser::ExecutionPartConstruct &ep{iter->front()};
-    if (auto *exec{std::get_if<parser::ExecutableConstruct>(&ep.u)}) {
-      using BlockConstruct = common::Indirection<parser::BlockConstruct>;
-      if (auto *bc{std::get_if<BlockConstruct>(&exec->u)}) {
-        iter = &std::get<parser::Block>(bc->value().t);
-        continue;
-      }
+    if (auto *bc{GetFortranBlockConstruct(ep)}) {
+      iter = &std::get<parser::Block>(bc->t);
+    } else {
+      break;
     }
-    break;
   }
   return *iter;
 }
 
+bool IsStrictlyStructuredBlock(const parser::Block &block) {
+  if (block.size() == 1) {
+    return GetFortranBlockConstruct(block.front()) != nullptr;
+  } else {
+    return false;
+  }
+}
+
 } // namespace Fortran::semantics::omp
diff --git a/flang/test/Lower/Coarray/sync_all.f90 b/flang/test/Lower/Coarray/sync_all.f90
new file mode 100644
index 0000000..c2c12d8
--- /dev/null
+++ b/flang/test/Lower/Coarray/sync_all.f90
@@ -0,0 +1,37 @@
+! RUN: %flang_fc1 -emit-hlfir -fcoarray %s -o - | FileCheck %s --check-prefixes=COARRAY
+! RUN: not %flang_fc1 -emit-hlfir %s 2>&1 | FileCheck %s --check-prefixes=NOCOARRAY
+
+program test_sync_all
+  implicit none
+  ! NOCOARRAY: Not yet implemented: Multi-image features are experimental and are disabled by default, use '-fcoarray' to enable.
+ 
+  ! COARRAY: %[[ERRMSG:.*]]:2 = hlfir.declare %[[VAL_1:.*]] typeparams %[[C_128:.*]] {uniq_name = "_QFEerror_message"} : (!fir.ref<!fir.char<1,128>>, index) -> (!fir.ref<!fir.char<1,128>>, !fir.ref<!fir.char<1,128>>)
+  ! COARRAY: %[[STAT:.*]]:2 = hlfir.declare %[[VAL_2:.*]] {uniq_name = "_QFEsync_status"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+  integer sync_status
+  character(len=128) :: error_message
+
+  ! COARRAY: %[[VAL_3:.*]] = fir.absent !fir.ref<i32>
+  ! COARRAY: %[[VAL_4:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+  ! COARRAY: %[[VAL_5:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+  ! COARRAY: fir.call @_QMprifPprif_sync_all(%[[VAL_3]], %[[VAL_4]], %[[VAL_5]]) fastmath<contract> : (!fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> ()
+  sync all
+
+  ! COARRAY: %[[VAL_6:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+  ! COARRAY: %[[VAL_7:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+  ! COARRAY: fir.call @_QMprifPprif_sync_all(%[[STAT]]#0, %[[VAL_6]], %[[VAL_7]]) fastmath<contract> : (!fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> ()
+  sync all(stat=sync_status)
+  
+  ! COARRAY: %[[VAL_8:.*]] = fir.embox %[[ERRMSG]]#0 : (!fir.ref<!fir.char<1,128>>) -> !fir.box<!fir.char<1,128>>
+  ! COARRAY: %[[VAL_9:.*]] = fir.absent !fir.ref<i32>
+  ! COARRAY: %[[VAL_10:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+  ! COARRAY: %[[VAL_11:.*]] = fir.convert %[[VAL_8]] : (!fir.box<!fir.char<1,128>>) -> !fir.box<!fir.char<1,?>>
+  ! COARRAY: fir.call @_QMprifPprif_sync_all(%[[VAL_9]], %[[VAL_11]], %[[VAL_10]]) fastmath<contract> : (!fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> ()
+  sync all(                  errmsg=error_message)
+  
+  ! COARRAY: %[[VAL_12:.*]] = fir.embox %[[ERRMSG]]#0 : (!fir.ref<!fir.char<1,128>>) -> !fir.box<!fir.char<1,128>>
+  ! COARRAY: %[[VAL_13:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+  ! COARRAY: %[[VAL_14:.*]] = fir.convert %[[VAL_12]] : (!fir.box<!fir.char<1,128>>) -> !fir.box<!fir.char<1,?>>
+  ! COARRAY: fir.call @_QMprifPprif_sync_all(%[[STAT]]#0, %[[VAL_14]], %[[VAL_13]]) fastmath<contract> : (!fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> ()
+  sync all(stat=sync_status, errmsg=error_message)
+
+end program test_sync_all
diff --git a/flang/test/Lower/Coarray/sync_images.f90 b/flang/test/Lower/Coarray/sync_images.f90
new file mode 100644
index 0000000..0224bf2
--- /dev/null
+++ b/flang/test/Lower/Coarray/sync_images.f90
@@ -0,0 +1,62 @@
+! RUN: %flang_fc1 -emit-hlfir -fcoarray %s -o - | FileCheck %s --check-prefixes=COARRAY
+! RUN: not %flang_fc1 -emit-hlfir %s 2>&1 | FileCheck %s --check-prefixes=NOCOARRAY
+
+program test_sync_images
+  implicit none
+  ! NOCOARRAY: Not yet implemented: Multi-image features are experimental and are disabled by default, use '-fcoarray' to enable.
+ 
+  ! COARRAY: %[[ERRMSG:.*]]:2 = hlfir.declare %[[VAL_1:.*]] typeparams %[[C_128:.*]] {uniq_name = "_QFEerror_message"} : (!fir.ref<!fir.char<1,128>>, index) -> (!fir.ref<!fir.char<1,128>>, !fir.ref<!fir.char<1,128>>)
+  ! COARRAY: %[[ME:.*]]:2 = hlfir.declare %[[VAL_3:.*]] {uniq_name = "_QFEme"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+  ! COARRAY: %[[STAT:.*]]:2 = hlfir.declare %[[VAL_2:.*]] {uniq_name = "_QFEsync_status"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+  integer sync_status, me
+  character(len=128) :: error_message
+
+  ! COARRAY: %[[VAL_1:.*]] = fir.embox %[[ERRMSG]]#0 : (!fir.ref<!fir.char<1,128>>) -> !fir.box<!fir.char<1,128>>
+  ! COARRAY: %[[VAL_2:.*]] = fir.absent !fir.box<!fir.array<?xi32>> 
+  ! COARRAY: %[[VAL_3:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+  ! COARRAY: %[[VAL_4:.*]] = fir.convert %[[VAL_1]] : (!fir.box<!fir.char<1,128>>) -> !fir.box<!fir.char<1,?>>
+  ! COARRAY: fir.call @_QMprifPprif_sync_images(%[[VAL_2]], %[[STAT]]#0, %[[VAL_4]], %[[VAL_3]]) fastmath<contract> : (!fir.box<!fir.array<?xi32>>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> ()
+  sync images(*, stat=sync_status, errmsg=error_message)
+
+  ! COARRAY: %[[VAL_5:.*]] = fir.embox %[[ERRMSG]]#0 : (!fir.ref<!fir.char<1,128>>) -> !fir.box<!fir.char<1,128>>
+  ! COARRAY: %[[VAL_6:.*]] = fir.embox %[[ME]]#0 : (!fir.ref<i32>) -> !fir.box<i32>
+  ! COARRAY: %[[VAL_7:.*]] = fir.rebox %[[VAL_6]](%[[SHAPE:.*]]) : (!fir.box<i32>, !fir.shape<1>) -> !fir.box<!fir.array<1xi32>>
+  ! COARRAY: %[[VAL_8:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+  ! COARRAY: %[[VAL_9:.*]] = fir.convert %[[VAL_7]] : (!fir.box<!fir.array<1xi32>>) -> !fir.box<!fir.array<?xi32>> 
+  ! COARRAY: %[[VAL_10:.*]] = fir.convert %[[VAL_5]] : (!fir.box<!fir.char<1,128>>) -> !fir.box<!fir.char<1,?>>
+  ! COARRAY: fir.call @_QMprifPprif_sync_images(%[[VAL_9]], %[[STAT]]#0, %[[VAL_10]], %[[VAL_8]]) fastmath<contract> : (!fir.box<!fir.array<?xi32>>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> ()
+  sync images(me,   stat=sync_status, errmsg=error_message)
+
+  ! COARRAY: %[[VAL_11:.*]] = fir.embox %[[ERRMSG]]#0 : (!fir.ref<!fir.char<1,128>>) -> !fir.box<!fir.char<1,128>>
+  ! COARRAY: %[[VAL_12:.*]] = fir.embox %[[IMG_SET:.*]]#0(%[[SHAPE_1:.*]]) : (!fir.ref<!fir.array<1xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<1xi32>>
+  ! COARRAY: %[[VAL_13:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+  ! COARRAY: %[[VAL_14:.*]] = fir.convert %[[VAL_12]] : (!fir.box<!fir.array<1xi32>>) -> !fir.box<!fir.array<?xi32>>
+  ! COARRAY: %[[VAL_15:.*]] = fir.convert %[[VAL_11]] : (!fir.box<!fir.char<1,128>>) -> !fir.box<!fir.char<1,?>>
+  ! COARRAY: fir.call @_QMprifPprif_sync_images(%[[VAL_14]], %[[STAT]]#0, %[[VAL_15]], %[[VAL_13]]) fastmath<contract> : (!fir.box<!fir.array<?xi32>>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> ()
+  sync images([1],  stat=sync_status, errmsg=error_message)
+  
+  ! COARRAY: %[[VAL_17:.*]] = fir.absent !fir.ref<i32>
+  ! COARRAY: %[[VAL_18:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+  ! COARRAY: %[[VAL_19:.*]] = fir.absent !fir.box<!fir.array<?xi32>> 
+  ! COARRAY: %[[VAL_20:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+  ! COARRAY: fir.call @_QMprifPprif_sync_images(%[[VAL_19]], %[[VAL_17]], %[[VAL_18]], %[[VAL_20]]) fastmath<contract> : (!fir.box<!fir.array<?xi32>>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> ()
+  sync images(*)
+  
+  ! COARRAY: %[[VAL_23:.*]] = fir.absent !fir.ref<i32>
+  ! COARRAY: %[[VAL_24:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+  ! COARRAY: %[[VAL_21:.*]] = fir.embox %[[ME]]#0 : (!fir.ref<i32>) -> !fir.box<i32>
+  ! COARRAY: %[[VAL_22:.*]] = fir.rebox %[[VAL_21]](%[[SHAPE_2:.*]]) : (!fir.box<i32>, !fir.shape<1>) -> !fir.box<!fir.array<1xi32>>
+  ! COARRAY: %[[VAL_25:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+  ! COARRAY: %[[VAL_26:.*]] = fir.convert %[[VAL_22]] : (!fir.box<!fir.array<1xi32>>) -> !fir.box<!fir.array<?xi32>> 
+  ! COARRAY: fir.call @_QMprifPprif_sync_images(%[[VAL_26]], %[[VAL_23]], %[[VAL_24]], %[[VAL_25]]) fastmath<contract> : (!fir.box<!fir.array<?xi32>>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> ()
+  sync images(me)
+  
+  ! COARRAY: %[[VAL_28:.*]] = fir.absent !fir.ref<i32>
+  ! COARRAY: %[[VAL_29:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+  ! COARRAY: %[[VAL_27:.*]] = fir.embox %[[IMG_SET:.*]]#0(%[[SHAPE_3:.*]]) : (!fir.ref<!fir.array<1xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<1xi32>>
+  ! COARRAY: %[[VAL_30:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+  ! COARRAY: %[[VAL_31:.*]] = fir.convert %[[VAL_27]] : (!fir.box<!fir.array<1xi32>>) -> !fir.box<!fir.array<?xi32>>
+  ! COARRAY: fir.call @_QMprifPprif_sync_images(%[[VAL_31]], %[[VAL_28]], %[[VAL_29]], %[[VAL_30]]) fastmath<contract> : (!fir.box<!fir.array<?xi32>>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> ()
+  sync images([1])
+
+end program test_sync_images
diff --git a/flang/test/Lower/Coarray/sync_memory.f90 b/flang/test/Lower/Coarray/sync_memory.f90
new file mode 100644
index 0000000..773cb6f
--- /dev/null
+++ b/flang/test/Lower/Coarray/sync_memory.f90
@@ -0,0 +1,37 @@
+! RUN: %flang_fc1 -emit-hlfir -fcoarray %s -o - | FileCheck %s --check-prefixes=COARRAY
+! RUN: not %flang_fc1 -emit-hlfir %s 2>&1 | FileCheck %s --check-prefixes=NOCOARRAY
+
+program test_sync_memory
+  implicit none
+  ! NOCOARRAY: Not yet implemented: Multi-image features are experimental and are disabled by default, use '-fcoarray' to enable.
+ 
+  ! COARRAY: %[[ERRMSG:.*]]:2 = hlfir.declare %[[VAL_1:.*]] typeparams %[[C_128:.*]] {uniq_name = "_QFEerror_message"} : (!fir.ref<!fir.char<1,128>>, index) -> (!fir.ref<!fir.char<1,128>>, !fir.ref<!fir.char<1,128>>)
+  ! COARRAY: %[[STAT:.*]]:2 = hlfir.declare %[[VAL_2:.*]] {uniq_name = "_QFEsync_status"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+  integer sync_status
+  character(len=128) :: error_message
+
+  ! COARRAY: %[[VAL_3:.*]] = fir.absent !fir.ref<i32>
+  ! COARRAY: %[[VAL_4:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+  ! COARRAY: %[[VAL_5:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+  ! COARRAY: fir.call @_QMprifPprif_sync_memory(%[[VAL_3]], %[[VAL_4]], %[[VAL_5]]) fastmath<contract> : (!fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> ()
+  sync memory
+
+  ! COARRAY: %[[VAL_6:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+  ! COARRAY: %[[VAL_7:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+  ! COARRAY: fir.call @_QMprifPprif_sync_memory(%[[STAT]]#0, %[[VAL_6]], %[[VAL_7]]) fastmath<contract> : (!fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> ()
+  sync memory(stat=sync_status)
+  
+  ! COARRAY: %[[VAL_8:.*]] = fir.embox %[[ERRMSG]]#0 : (!fir.ref<!fir.char<1,128>>) -> !fir.box<!fir.char<1,128>>
+  ! COARRAY: %[[VAL_9:.*]] = fir.absent !fir.ref<i32>
+  ! COARRAY: %[[VAL_10:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+  ! COARRAY: %[[VAL_11:.*]] = fir.convert %[[VAL_8]] : (!fir.box<!fir.char<1,128>>) -> !fir.box<!fir.char<1,?>>
+  ! COARRAY: fir.call @_QMprifPprif_sync_memory(%[[VAL_9]], %[[VAL_11]], %[[VAL_10]]) fastmath<contract> : (!fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> ()
+  sync memory(                  errmsg=error_message)
+  
+  ! COARRAY: %[[VAL_12:.*]] = fir.embox %[[ERRMSG]]#0 : (!fir.ref<!fir.char<1,128>>) -> !fir.box<!fir.char<1,128>>
+  ! COARRAY: %[[VAL_13:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+  ! COARRAY: %[[VAL_14:.*]] = fir.convert %[[VAL_12]] : (!fir.box<!fir.char<1,128>>) -> !fir.box<!fir.char<1,?>>
+  ! COARRAY: fir.call @_QMprifPprif_sync_memory(%[[STAT]]#0, %[[VAL_14]], %[[VAL_13]]) fastmath<contract> : (!fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> ()
+  sync memory(stat=sync_status, errmsg=error_message)
+
+end program test_sync_memory
diff --git a/flang/test/Parser/OpenMP/replayable-clause.f90 b/flang/test/Parser/OpenMP/replayable-clause.f90
new file mode 100644
index 0000000..c173344
--- /dev/null
+++ b/flang/test/Parser/OpenMP/replayable-clause.f90
@@ -0,0 +1,60 @@
+!RUN: %flang_fc1 -fdebug-unparse -fopenmp -fopenmp-version=60 %s | FileCheck --ignore-case --check-prefix="UNPARSE" %s
+!RUN: %flang_fc1 -fdebug-dump-parse-tree -fopenmp -fopenmp-version=60 %s | FileCheck --check-prefix="PARSE-TREE" %s
+
+subroutine f00
+  !$omp task replayable
+  block
+  end block
+end
+
+!UNPARSE: SUBROUTINE f00
+!UNPARSE: !$OMP TASK REPLAYABLE
+!UNPARSE:  BLOCK
+!UNPARSE:  END BLOCK
+!UNPARSE: END SUBROUTINE
+
+!PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OmpBlockConstruct
+!PARSE-TREE: | OmpBeginDirective
+!PARSE-TREE: | | OmpDirectiveName -> llvm::omp::Directive = task
+!PARSE-TREE: | | OmpClauseList -> OmpClause -> Replayable ->
+!PARSE-TREE: | | Flags = None
+!PARSE-TREE: | Block
+
+
+subroutine f01(x)
+  implicit none
+  integer :: x
+  !$omp target_update to(x) replayable(.true.)
+end
+
+!UNPARSE: SUBROUTINE f01 (x)
+!UNPARSE:  IMPLICIT NONE
+!UNPARSE:  INTEGER x
+!UNPARSE: !$OMP TARGET_UPDATE TO(x) REPLAYABLE(.true._4)
+!UNPARSE: END SUBROUTINE
+
+!PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPStandaloneConstruct -> OpenMPSimpleStandaloneConstruct -> OmpDirectiveSpecification
+!PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = target update
+!PARSE-TREE: | OmpClauseList -> OmpClause -> To -> OmpToClause
+!PARSE-TREE: | | OmpObjectList -> OmpObject -> Designator -> DataRef -> Name = 'x'
+!PARSE-TREE: | | bool = 'true'
+!PARSE-TREE: | OmpClause -> Replayable -> OmpReplayableClause -> Scalar -> Logical -> Constant -> Expr = '.true._4'
+!PARSE-TREE: | | LiteralConstant -> LogicalLiteralConstant
+!PARSE-TREE: | | | bool = 'true'
+!PARSE-TREE: | Flags = None
+
+
+subroutine f02
+  !$omp taskwait replayable(.false.)
+end
+
+!UNPARSE: SUBROUTINE f02
+!UNPARSE: !$OMP TASKWAIT REPLAYABLE(.false._4)
+!UNPARSE: END SUBROUTINE
+
+!PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPStandaloneConstruct -> OpenMPSimpleStandaloneConstruct -> OmpDirectiveSpecification
+!PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = taskwait
+!PARSE-TREE: | OmpClauseList -> OmpClause -> Replayable -> OmpReplayableClause -> Scalar -> Logical -> Constant -> Expr = '.false._4'
+!PARSE-TREE: | | LiteralConstant -> LogicalLiteralConstant
+!PARSE-TREE: | | | bool = 'false'
+!PARSE-TREE: | Flags = None
diff --git a/flang/test/Parser/OpenMP/transparent-clause.f90 b/flang/test/Parser/OpenMP/transparent-clause.f90
new file mode 100644
index 0000000..01f49f5
--- /dev/null
+++ b/flang/test/Parser/OpenMP/transparent-clause.f90
@@ -0,0 +1,76 @@
+!RUN: %flang_fc1 -fdebug-unparse -fopenmp -fopenmp-version=60 %s | FileCheck --ignore-case --check-prefix="UNPARSE" %s
+!RUN: %flang_fc1 -fdebug-dump-parse-tree -fopenmp -fopenmp-version=60 %s | FileCheck --check-prefix="PARSE-TREE" %s
+
+subroutine f00(x)
+  implicit none
+  integer :: x
+  !$omp target_data map(to: x) transparent
+  block
+  end block
+end
+
+!UNPARSE: SUBROUTINE f00 (x)
+!UNPARSE:  IMPLICIT NONE
+!UNPARSE:  INTEGER x
+!UNPARSE: !$OMP TARGET_DATA MAP(TO: x) TRANSPARENT
+!UNPARSE:  BLOCK
+!UNPARSE:  END BLOCK
+!UNPARSE: END SUBROUTINE
+
+!PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OmpBlockConstruct
+!PARSE-TREE: | OmpBeginDirective
+!PARSE-TREE: | | OmpDirectiveName -> llvm::omp::Directive = target data
+!PARSE-TREE: | | OmpClauseList -> OmpClause -> Map -> OmpMapClause
+!PARSE-TREE: | | | Modifier -> OmpMapType -> Value = To
+!PARSE-TREE: | | | OmpObjectList -> OmpObject -> Designator -> DataRef -> Name = 'x'
+!PARSE-TREE: | | | bool = 'true'
+!PARSE-TREE: | | OmpClause -> Transparent ->
+!PARSE-TREE: | | Flags = None
+!PARSE-TREE: | Block
+
+
+subroutine f01
+  !$omp task transparent(0)
+  !$omp end task
+end
+
+!UNPARSE: SUBROUTINE f01
+!UNPARSE: !$OMP TASK TRANSPARENT(0_4)
+!UNPARSE: !$OMP END TASK
+!UNPARSE: END SUBROUTINE
+
+!PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OmpBlockConstruct
+!PARSE-TREE: | OmpBeginDirective
+!PARSE-TREE: | | OmpDirectiveName -> llvm::omp::Directive = task
+!PARSE-TREE: | | OmpClauseList -> OmpClause -> Transparent -> OmpTransparentClause -> Scalar -> Integer -> Expr = '0_4'
+!PARSE-TREE: | | | LiteralConstant -> IntLiteralConstant = '0'
+!PARSE-TREE: | | Flags = None
+!PARSE-TREE: | Block
+!PARSE-TREE: | OmpEndDirective
+!PARSE-TREE: | | OmpDirectiveName -> llvm::omp::Directive = task
+!PARSE-TREE: | | OmpClauseList ->
+!PARSE-TREE: | | Flags = None
+
+
+subroutine f02
+  implicit none
+  integer :: i
+  !$omp taskloop transparent(2)
+  do i = 1, 10
+  end do
+end
+
+!UNPARSE: SUBROUTINE f02
+!UNPARSE:  IMPLICIT NONE
+!UNPARSE:  INTEGER i
+!UNPARSE: !$OMP TASKLOOP  TRANSPARENT(2_4)
+!UNPARSE:  DO i=1_4,10_4
+!UNPARSE:  END DO
+!UNPARSE: END SUBROUTINE
+
+!PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct
+!PARSE-TREE: | OmpBeginLoopDirective
+!PARSE-TREE: | | OmpLoopDirective -> llvm::omp::Directive = taskloop
+!PARSE-TREE: | | OmpClauseList -> OmpClause -> Transparent -> OmpTransparentClause -> Scalar -> Integer -> Expr = '2_4'
+!PARSE-TREE: | | | LiteralConstant -> IntLiteralConstant = '2'
+!PARSE-TREE: | DoConstruct
diff --git a/flang/test/Semantics/OpenMP/replayable-clause.f90 b/flang/test/Semantics/OpenMP/replayable-clause.f90
new file mode 100644
index 0000000..b8fe6ce
--- /dev/null
+++ b/flang/test/Semantics/OpenMP/replayable-clause.f90
@@ -0,0 +1,22 @@
+!RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=60
+
+subroutine f00(x)
+  implicit none
+  logical :: x
+  !ERROR: Must be a constant value
+  !$omp task replayable(x)
+  !$omp end task
+end
+
+subroutine f01
+  !ERROR: Must have LOGICAL type, but is INTEGER(4)
+  !$omp task replayable(7)
+  !$omp end task
+end
+
+subroutine f02
+  !No diagnostic expected
+  !$omp task replayable
+  !$omp end task
+end
+
diff --git a/flang/test/Semantics/OpenMP/transparent-clause.f90 b/flang/test/Semantics/OpenMP/transparent-clause.f90
new file mode 100644
index 0000000..4831ba0
--- /dev/null
+++ b/flang/test/Semantics/OpenMP/transparent-clause.f90
@@ -0,0 +1,19 @@
+!RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=60
+
+subroutine f00(x)
+  integer :: x(10)
+  !ERROR: Must be a scalar value, but is a rank-1 array
+  !$omp task transparent(x)
+  !$omp end task
+end
+
+subroutine f01
+  implicit none
+  integer :: i
+  !ERROR: Must have INTEGER type, but is CHARACTER(KIND=1,LEN=5_8)
+  !$omp taskloop transparent("hello")
+  do i = 1, 10
+  end do
+  !$omp end taskloop
+end
+
diff --git a/libc/src/__support/CMakeLists.txt b/libc/src/__support/CMakeLists.txt
index b6e87ac..0ef09a9 100644
--- a/libc/src/__support/CMakeLists.txt
+++ b/libc/src/__support/CMakeLists.txt
@@ -302,6 +302,7 @@ add_header_library(
   DEPENDS
     libc.hdr.stdint_proxy
     libc.src.__support.common
+    libc.src.string.memory_utils.inline_memcpy
 )
 
 add_header_library(
diff --git a/libc/src/__support/arg_list.h b/libc/src/__support/arg_list.h
index 1e26a5e..7b78a9c 100644
--- a/libc/src/__support/arg_list.h
+++ b/libc/src/__support/arg_list.h
@@ -12,6 +12,7 @@
 #include "hdr/stdint_proxy.h"
 #include "src/__support/common.h"
 #include "src/__support/macros/config.h"
+#include "src/string/memory_utils/inline_memcpy.h"
 
 #include <stdarg.h>
 #include <stddef.h>
@@ -126,7 +127,7 @@ public:
 
     // Memcpy because pointer alignment may be illegal given a packed struct.
     T val;
-    __builtin_memcpy(&val, ptr, sizeof(T));
+    inline_memcpy(&val, ptr, sizeof(T));
 
     ptr =
         reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(ptr) + sizeof(T));
diff --git a/libc/src/__support/endian_internal.h b/libc/src/__support/endian_internal.h
index 4ac8709..07cde7b 100644
--- a/libc/src/__support/endian_internal.h
+++ b/libc/src/__support/endian_internal.h
@@ -35,7 +35,7 @@ template <> LIBC_INLINE uint16_t byte_swap<uint16_t>(uint16_t value) {
 #if __has_builtin(__builtin_bswap16)
   return __builtin_bswap16(value);
 #else
-  return (v << 8) | (v >> 8);
+  return (value << 8) | (value >> 8);
 #endif // __builtin_bswap16
 }
 
@@ -43,8 +43,9 @@ template <> LIBC_INLINE uint32_t byte_swap<uint32_t>(uint32_t value) {
 #if __has_builtin(__builtin_bswap32)
   return __builtin_bswap32(value);
 #else
-  return byte_swap<uint16_t>(static_cast<uint16>(v >> 16)) ||
-         (static_cast<uint32_t>(byte_swap<uint16_t>(static_cast<uint16_t>(v)))
+  return byte_swap<uint16_t>(static_cast<uint16_t>(value >> 16)) ||
+         (static_cast<uint32_t>(
+              byte_swap<uint16_t>(static_cast<uint16_t>(value)))
           << 16);
 #endif // __builtin_bswap64
 }
@@ -53,8 +54,9 @@ template <> LIBC_INLINE uint64_t byte_swap<uint64_t>(uint64_t value) {
 #if __has_builtin(__builtin_bswap64)
   return __builtin_bswap64(value);
 #else
-  return byte_swap<uint32_t>(static_cast<uint32>(v >> 32)) ||
-         (static_cast<uint64_t>(byte_swap<uint32_t>(static_cast<uint32_t>(v)))
+  return byte_swap<uint32_t>(static_cast<uint32_t>(value >> 32)) ||
+         (static_cast<uint64_t>(
+              byte_swap<uint32_t>(static_cast<uint32_t>(value)))
           << 32);
 #endif // __builtin_bswap64
 }
diff --git a/libc/src/__support/macros/config.h b/libc/src/__support/macros/config.h
index 501a816..b06a890 100644
--- a/libc/src/__support/macros/config.h
+++ b/libc/src/__support/macros/config.h
@@ -46,6 +46,8 @@
 #define __builtin_expect(value, expectation) (value)
 #define __builtin_unreachable() __assume(0)
 
+#define __builtin_prefetch(X, Y, Z)
+
 #endif // LIBC_COMPILER_IS_MSVC
 
 #ifdef __clang__
diff --git a/libc/src/stdio/printf_core/CMakeLists.txt b/libc/src/stdio/printf_core/CMakeLists.txt
index 76eb0a2..ee66145 100644
--- a/libc/src/stdio/printf_core/CMakeLists.txt
+++ b/libc/src/stdio/printf_core/CMakeLists.txt
@@ -112,6 +112,7 @@ add_header_library(
     libc.src.__support.libc_assert
     libc.src.__support.uint128
     libc.src.__support.StringUtil.error_to_string
+    libc.src.string.memory_utils.inline_memcpy
 )
 
 add_header_library(
diff --git a/libc/src/stdio/printf_core/float_dec_converter_limited.h b/libc/src/stdio/printf_core/float_dec_converter_limited.h
index f468dbc..9cdc135 100644
--- a/libc/src/stdio/printf_core/float_dec_converter_limited.h
+++ b/libc/src/stdio/printf_core/float_dec_converter_limited.h
@@ -53,6 +53,7 @@
 #include "src/stdio/printf_core/core_structs.h"
 #include "src/stdio/printf_core/float_inf_nan_converter.h"
 #include "src/stdio/printf_core/writer.h"
+#include "src/string/memory_utils/inline_memcpy.h"
 
 namespace LIBC_NAMESPACE_DECL {
 namespace printf_core {
@@ -250,7 +251,7 @@ DigitsOutput decimal_digits(DigitsInput input, int precision, bool e_mode) {
   // there's space for it in the DigitsOutput buffer).
   DigitsOutput output;
   output.ndigits = view.size();
-  __builtin_memcpy(output.digits, view.data(), output.ndigits);
+  inline_memcpy(output.digits, view.data(), output.ndigits);
 
   // Set up the output exponent, which is done differently depending on mode.
   // Also, figure out whether we have one digit too many, and if so, set the
@@ -551,7 +552,7 @@ convert_float_inner(Writer<write_mode> *writer, const FormatSection &to_conv,
     cpp::string_view expview = expcvt.view();
     expbuf[0] = internal::islower(to_conv.conv_name) ? 'e' : 'E';
     explen = expview.size() + 1;
-    __builtin_memcpy(expbuf + 1, expview.data(), expview.size());
+    inline_memcpy(expbuf + 1, expview.data(), expview.size());
   }
 
   // Now we know enough to work out the length of the unpadded output:
diff --git a/libc/src/stdlib/CMakeLists.txt b/libc/src/stdlib/CMakeLists.txt
index aa653c3..c464f82 100644
--- a/libc/src/stdlib/CMakeLists.txt
+++ b/libc/src/stdlib/CMakeLists.txt
@@ -292,6 +292,7 @@ add_header_library(
     libc.hdr.stdint_proxy
     libc.include.stdlib
     libc.src.__support.CPP.cstddef
+    libc.src.string.memory_utils.inline_memcpy    
 )
 
 add_entrypoint_object(
diff --git a/libc/src/stdlib/qsort_data.h b/libc/src/stdlib/qsort_data.h
index 739fce8..4f97740 100644
--- a/libc/src/stdlib/qsort_data.h
+++ b/libc/src/stdlib/qsort_data.h
@@ -12,6 +12,7 @@
 #include "hdr/stdint_proxy.h"
 #include "src/__support/CPP/cstddef.h"
 #include "src/__support/macros/config.h"
+#include "src/string/memory_utils/inline_memcpy.h"
 
 namespace LIBC_NAMESPACE_DECL {
 namespace internal {
@@ -54,9 +55,9 @@ public:
     const cpp::byte *elem_i_block_end = elem_i + (elem_size - elem_size_rem);
 
     while (elem_i != elem_i_block_end) {
-      __builtin_memcpy(tmp_block, elem_i, BLOCK_SIZE);
-      __builtin_memcpy(elem_i, elem_j, BLOCK_SIZE);
-      __builtin_memcpy(elem_j, tmp_block, BLOCK_SIZE);
+      inline_memcpy(tmp_block, elem_i, BLOCK_SIZE);
+      inline_memcpy(elem_i, elem_j, BLOCK_SIZE);
+      inline_memcpy(elem_j, tmp_block, BLOCK_SIZE);
 
       elem_i += BLOCK_SIZE;
       elem_j += BLOCK_SIZE;
@@ -112,9 +113,9 @@ public:
     cpp::byte *elem_i = get_internal(i);
     cpp::byte *elem_j = get_internal(j);
 
-    __builtin_memcpy(tmp, elem_i, ELEM_SIZE);
+    inline_memcpy(tmp, elem_i, ELEM_SIZE);
     __builtin_memmove(elem_i, elem_j, ELEM_SIZE);
-    __builtin_memcpy(elem_j, tmp, ELEM_SIZE);
+    inline_memcpy(elem_j, tmp, ELEM_SIZE);
   }
 
   LIBC_INLINE size_t len() const { return array_len; }
diff --git a/libc/src/string/CMakeLists.txt b/libc/src/string/CMakeLists.txt
index 5c9f622..b8cdb2a7 100644
--- a/libc/src/string/CMakeLists.txt
+++ b/libc/src/string/CMakeLists.txt
@@ -22,6 +22,7 @@ add_header_library(
     libc.src.__support.CPP.type_traits
     libc.src.__support.CPP.simd
     libc.src.__support.common
+    libc.src.string.memory_utils.inline_memcpy
   ${string_config_options}
 )
 
diff --git a/libc/src/string/memory_utils/CMakeLists.txt b/libc/src/string/memory_utils/CMakeLists.txt
index 670db30..9cabfb9 100644
--- a/libc/src/string/memory_utils/CMakeLists.txt
+++ b/libc/src/string/memory_utils/CMakeLists.txt
@@ -42,6 +42,7 @@ add_header_library(
     libc.src.__support.macros.config
     libc.src.__support.macros.optimization
     libc.src.__support.macros.properties.architectures
+    libc.src.__support.macros.properties.compiler
 )
 
 add_header_library(
diff --git a/libc/src/string/memory_utils/op_generic.h b/libc/src/string/memory_utils/op_generic.h
index 3760341..010f218 100644
--- a/libc/src/string/memory_utils/op_generic.h
+++ b/libc/src/string/memory_utils/op_generic.h
@@ -31,6 +31,7 @@
 #include "src/__support/macros/attributes.h" // LIBC_INLINE
 #include "src/__support/macros/config.h"     // LIBC_NAMESPACE_DECL
 #include "src/__support/macros/optimization.h"
+#include "src/__support/macros/properties/compiler.h"
 #include "src/__support/macros/properties/types.h" // LIBC_TYPES_HAS_INT64
 #include "src/string/memory_utils/op_builtin.h"
 #include "src/string/memory_utils/utils.h"
@@ -39,12 +40,22 @@ static_assert((UINTPTR_MAX == 4294967295U) ||
                   (UINTPTR_MAX == 18446744073709551615UL),
               "We currently only support 32- or 64-bit platforms");
 
+#ifdef LIBC_COMPILER_IS_MSVC
+
+namespace LIBC_NAMESPACE_DECL {
+using generic_v128 = __m128i;
+using generic_v256 = __m256i;
+using generic_v512 = __m512i;
+} // namespace LIBC_NAMESPACE_DECL
+
+#else
 namespace LIBC_NAMESPACE_DECL {
 // Compiler types using the vector attributes.
 using generic_v128 = uint8_t __attribute__((__vector_size__(16)));
 using generic_v256 = uint8_t __attribute__((__vector_size__(32)));
 using generic_v512 = uint8_t __attribute__((__vector_size__(64)));
 } // namespace LIBC_NAMESPACE_DECL
+#endif // LIBC_COMPILER_IS_MSVC
 
 namespace LIBC_NAMESPACE_DECL {
 namespace generic {
diff --git a/libc/src/string/memory_utils/op_x86.h b/libc/src/string/memory_utils/op_x86.h
index 8bd8412..1b40527 100644
--- a/libc/src/string/memory_utils/op_x86.h
+++ b/libc/src/string/memory_utils/op_x86.h
@@ -15,6 +15,7 @@
 #include "src/__support/macros/attributes.h" // LIBC_INLINE
 #include "src/__support/macros/config.h"     // LIBC_NAMESPACE_DECL
 #include "src/__support/macros/properties/architectures.h"
+#include "src/__support/macros/properties/compiler.h"
 
 #if defined(LIBC_TARGET_ARCH_IS_X86)
 
@@ -57,7 +58,12 @@ LIBC_INLINE_VAR constexpr bool K_AVX512_BW = LLVM_LIBC_IS_DEFINED(__AVX512BW__);
 // Memcpy repmovsb implementation
 struct Memcpy {
   LIBC_INLINE static void repmovsb(void *dst, const void *src, size_t count) {
+#ifdef LIBC_COMPILER_IS_MSVC
+    __movsb(static_cast<unsigned char *>(dst),
+            static_cast<const unsigned char *>(src), count);
+#else
     asm volatile("rep movsb" : "+D"(dst), "+S"(src), "+c"(count) : : "memory");
+#endif // LIBC_COMPILER_IS_MSVC
   }
 };
 
@@ -138,8 +144,10 @@ LIBC_INLINE MemcmpReturnType cmp_neq<uint64_t>(CPtr p1, CPtr p2,
 // When we use these SIMD types in template specialization GCC complains:
 // "ignoring attributes on template argument ‘__m128i’ [-Wignored-attributes]"
 // Therefore, we disable this warning in this file.
+#ifndef LIBC_COMPILER_IS_MSVC
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wignored-attributes"
+#endif // !LIBC_COMPILER_IS_MSVC
 
 ///////////////////////////////////////////////////////////////////////////////
 // Specializations for __m128i
@@ -366,7 +374,9 @@ LIBC_INLINE MemcmpReturnType cmp_neq<__m512i>(CPtr p1, CPtr p2, size_t offset) {
 }
 #endif // __AVX512BW__
 
+#ifndef LIBC_COMPILER_IS_MSVC
 #pragma GCC diagnostic pop
+#endif // !LIBC_COMPILER_IS_MSVC
 
 } // namespace generic
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/string/memory_utils/utils.h b/libc/src/string/memory_utils/utils.h
index 0f9c9e3..86ff4f1 100644
--- a/libc/src/string/memory_utils/utils.h
+++ b/libc/src/string/memory_utils/utils.h
@@ -17,6 +17,7 @@
 #include "src/__support/macros/attributes.h" // LIBC_INLINE
 #include "src/__support/macros/config.h"     // LIBC_NAMESPACE_DECL
 #include "src/__support/macros/properties/architectures.h"
+#include "src/__support/macros/properties/compiler.h"
 
 #include <stddef.h> // size_t
 
@@ -90,13 +91,17 @@ LIBC_INLINE void memcpy_inline(void *__restrict dst,
   // different value of the Size parameter. This doesn't play well with GCC's
   // Value Range Analysis that wrongly detects out of bounds accesses. We
   // disable these warnings for the purpose of this function.
+#ifndef LIBC_COMPILER_IS_MSVC
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Warray-bounds"
 #pragma GCC diagnostic ignored "-Wstringop-overread"
 #pragma GCC diagnostic ignored "-Wstringop-overflow"
+#endif // !LIBC_COMPILER_IS_MSVC
   for (size_t i = 0; i < Size; ++i)
     static_cast<char *>(dst)[i] = static_cast<const char *>(src)[i];
+#ifndef LIBC_COMPILER_IS_MSVC
 #pragma GCC diagnostic pop
+#endif // !LIBC_COMPILER_IS_MSVC
 #endif
 }
 
diff --git a/libc/src/string/stpcpy.cpp b/libc/src/string/stpcpy.cpp
index 48c0db9..fefae81 100644
--- a/libc/src/string/stpcpy.cpp
+++ b/libc/src/string/stpcpy.cpp
@@ -8,6 +8,7 @@
 
 #include "src/string/stpcpy.h"
 #include "src/__support/macros/config.h"
+#include "src/string/memory_utils/inline_memcpy.h"
 #include "src/string/string_utils.h"
 
 #include "src/__support/common.h"
@@ -17,7 +18,7 @@ namespace LIBC_NAMESPACE_DECL {
 LLVM_LIBC_FUNCTION(char *, stpcpy,
                    (char *__restrict dest, const char *__restrict src)) {
   size_t size = internal::string_length(src) + 1;
-  __builtin_memcpy(dest, src, size);
+  inline_memcpy(dest, src, size);
   char *result = dest + size;
 
   if (result != nullptr)
diff --git a/libc/src/string/string_utils.h b/libc/src/string/string_utils.h
index 1080348..9d636d0 100644
--- a/libc/src/string/string_utils.h
+++ b/libc/src/string/string_utils.h
@@ -21,6 +21,7 @@
 #include "src/__support/CPP/type_traits.h" // cpp::is_same_v
 #include "src/__support/macros/config.h"
 #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
+#include "src/string/memory_utils/inline_memcpy.h"
 
 #if defined(LIBC_COPT_STRING_UNSAFE_WIDE_READ)
 #if LIBC_HAS_VECTOR_TYPE
@@ -242,7 +243,7 @@ LIBC_INLINE size_t strlcpy(char *__restrict dst, const char *__restrict src,
   if (!size)
     return len;
   size_t n = len < size - 1 ? len : size - 1;
-  __builtin_memcpy(dst, src, n);
+  inline_memcpy(dst, src, n);
   dst[n] = '\0';
   return len;
 }
diff --git a/libc/src/wchar/CMakeLists.txt b/libc/src/wchar/CMakeLists.txt
index 9ba0a06..adde382 100644
--- a/libc/src/wchar/CMakeLists.txt
+++ b/libc/src/wchar/CMakeLists.txt
@@ -452,6 +452,7 @@ add_entrypoint_object(
   DEPENDS
     libc.hdr.types.size_t
     libc.hdr.wchar_macros
+    libc.src.string.memory_utils.inline_memcpy
 )
 
 add_entrypoint_object(
diff --git a/libc/src/wchar/wcpcpy.cpp b/libc/src/wchar/wcpcpy.cpp
index 9e2b12f..b6d80d4 100644
--- a/libc/src/wchar/wcpcpy.cpp
+++ b/libc/src/wchar/wcpcpy.cpp
@@ -19,7 +19,7 @@ namespace LIBC_NAMESPACE_DECL {
 LLVM_LIBC_FUNCTION(wchar_t *, wcpcpy,
                    (wchar_t *__restrict s1, const wchar_t *__restrict s2)) {
   size_t size = internal::string_length(s2);
-  __builtin_memcpy(s1, s2, (size + 1) * sizeof(wchar_t));
+  inline_memcpy(s1, s2, (size + 1) * sizeof(wchar_t));
   wchar_t *result = s1 + size;
   return result;
 }
diff --git a/libc/src/wchar/wcscpy.cpp b/libc/src/wchar/wcscpy.cpp
index 01ba994..703706e 100644
--- a/libc/src/wchar/wcscpy.cpp
+++ b/libc/src/wchar/wcscpy.cpp
@@ -19,7 +19,7 @@ namespace LIBC_NAMESPACE_DECL {
 LLVM_LIBC_FUNCTION(wchar_t *, wcscpy,
                    (wchar_t *__restrict s1, const wchar_t *__restrict s2)) {
   size_t size = internal::string_length(s2) + 1;
-  __builtin_memcpy(s1, s2, size * sizeof(wchar_t));
+  inline_memcpy(s1, s2, size * sizeof(wchar_t));
   return s1;
 }
 
diff --git a/libc/src/wchar/wmemcpy.cpp b/libc/src/wchar/wmemcpy.cpp
index bf92309..56708d6c 100644
--- a/libc/src/wchar/wmemcpy.cpp
+++ b/libc/src/wchar/wmemcpy.cpp
@@ -12,13 +12,14 @@
 #include "hdr/types/wchar_t.h"
 #include "src/__support/common.h"
 #include "src/__support/macros/config.h"
+#include "src/string/memory_utils/inline_memcpy.h"
 
 namespace LIBC_NAMESPACE_DECL {
 
 LLVM_LIBC_FUNCTION(wchar_t *, wmemcpy,
                    (wchar_t *__restrict s1, const wchar_t *__restrict s2,
                     size_t n)) {
-  __builtin_memcpy(s1, s2, n * sizeof(wchar_t));
+  inline_memcpy(s1, s2, n * sizeof(wchar_t));
   return s1;
 }
 
diff --git a/libc/src/wchar/wmempcpy.cpp b/libc/src/wchar/wmempcpy.cpp
index 21e1621..d8b89c0 100644
--- a/libc/src/wchar/wmempcpy.cpp
+++ b/libc/src/wchar/wmempcpy.cpp
@@ -11,13 +11,14 @@
 #include "hdr/types/size_t.h"
 #include "hdr/types/wchar_t.h"
 #include "src/__support/common.h"
+#include "src/string/memory_utils/inline_memcpy.h"
 
 namespace LIBC_NAMESPACE_DECL {
 
 LLVM_LIBC_FUNCTION(wchar_t *, wmempcpy,
                    (wchar_t *__restrict to, const wchar_t *__restrict from,
                     size_t size)) {
-  __builtin_memcpy(to, from, size * sizeof(wchar_t));
+  inline_memcpy(to, from, size * sizeof(wchar_t));
   return reinterpret_cast<wchar_t *>(to) + size;
 }
 
diff --git a/libc/test/UnitTest/CMakeLists.txt b/libc/test/UnitTest/CMakeLists.txt
index f1a83fc..31d1e9d 100644
--- a/libc/test/UnitTest/CMakeLists.txt
+++ b/libc/test/UnitTest/CMakeLists.txt
@@ -76,6 +76,7 @@ add_unittest_framework_library(
     libc.src.__support.CPP.string_view
     libc.src.__support.CPP.type_traits
     libc.src.__support.fixed_point.fx_rep
+    libc.src.__support.macros.properties.compiler
     libc.src.__support.macros.properties.types
     libc.src.__support.OSUtil.osutil
     libc.src.__support.uint128
diff --git a/libc/test/UnitTest/LibcTest.h b/libc/test/UnitTest/LibcTest.h
index fbeafd0..cf098cd 100644
--- a/libc/test/UnitTest/LibcTest.h
+++ b/libc/test/UnitTest/LibcTest.h
@@ -30,6 +30,7 @@
 #include "src/__support/CPP/string_view.h"
 #include "src/__support/CPP/type_traits.h"
 #include "src/__support/c_string.h"
+#include "src/__support/macros/properties/compiler.h"
 #include "test/UnitTest/ExecuteFunction.h"
 #include "test/UnitTest/TestLogger.h"
 
@@ -260,7 +261,11 @@ constexpr char const *GetPrettyFunctionParamType(char const *str) {
 // This function recovers ParamType at compile time by using __PRETTY_FUNCTION__
 // It can be customized by using the REGISTER_TYPE_NAME macro below.
 template <typename ParamType> static constexpr const char *GetTypeName() {
+#ifdef LIBC_COMPILER_IS_MSVC
+  return GetPrettyFunctionParamType(__FUNCSIG__);
+#else
   return GetPrettyFunctionParamType(__PRETTY_FUNCTION__);
+#endif // LIBC_COMPILER_IS_MSVC
 }
 
 template <typename T>
diff --git a/libclc/CMakeLists.txt b/libclc/CMakeLists.txt
index c75f450..7960f34 100644
--- a/libclc/CMakeLists.txt
+++ b/libclc/CMakeLists.txt
@@ -104,6 +104,7 @@ else()
 
   # Note we do not adhere to LLVM_ENABLE_PER_TARGET_RUNTIME_DIR.
   set( LIBCLC_OUTPUT_LIBRARY_DIR ${LIBCLC_OUTPUT_DIR}/lib/libclc )
+  file( MAKE_DIRECTORY ${LIBCLC_OUTPUT_LIBRARY_DIR} )
 endif()
 
 if( EXISTS ${LIBCLC_CUSTOM_LLVM_TOOLS_BINARY_DIR} )
diff --git a/libclc/utils/CMakeLists.txt b/libclc/utils/CMakeLists.txt
index 6851ae1..a14d133 100644
--- a/libclc/utils/CMakeLists.txt
+++ b/libclc/utils/CMakeLists.txt
@@ -1,6 +1,3 @@
-# Construct LLVM version define
-set( LLVM_VERSION_DEFINE "-DHAVE_LLVM=0x${LLVM_VERSION_MAJOR}0${LLVM_VERSION_MINOR}" )
-
 # Setup prepare_builtins tools
 set( LLVM_LINK_COMPONENTS
   BitReader
@@ -19,6 +16,5 @@ else()
   setup_host_tool( prepare_builtins PREPARE_BUILTINS prepare_builtins_exe prepare_builtins_target )
 endif()
 
-target_compile_definitions( prepare_builtins PRIVATE ${LLVM_VERSION_DEFINE} )
 # These were not properly reported in early LLVM and we don't need them
 target_compile_options( prepare_builtins PRIVATE -fno-rtti -fno-exceptions )
diff --git a/libclc/utils/prepare-builtins.cpp b/libclc/utils/prepare-builtins.cpp
index b10dfcc..40a5445 100644
--- a/libclc/utils/prepare-builtins.cpp
+++ b/libclc/utils/prepare-builtins.cpp
@@ -6,12 +6,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#if HAVE_LLVM > 0x0390
 #include "llvm/Bitcode/BitcodeReader.h"
 #include "llvm/Bitcode/BitcodeWriter.h"
-#else
-#include "llvm/Bitcode/ReaderWriter.h"
-#endif
 
 #include "llvm/Config/llvm-config.h"
 #include "llvm/IR/Function.h"
@@ -62,12 +58,8 @@ int main(int argc, char **argv) {
       std::unique_ptr<MemoryBuffer> &BufferPtr = BufferOrErr.get();
       SMDiagnostic Err;
       std::unique_ptr<llvm::Module> MPtr =
-#if HAVE_LLVM > 0x0390
           ExitOnErr(Expected<std::unique_ptr<llvm::Module>>(
               parseIR(BufferPtr.get()->getMemBufferRef(), Err, Context)));
-#else
-          parseIR(BufferPtr.get()->getMemBufferRef(), Err, Context);
-#endif
       M = MPtr.release();
     }
   }
@@ -106,13 +98,8 @@ int main(int argc, char **argv) {
   }
 
   std::error_code EC;
-#if HAVE_LLVM >= 0x0600
   std::unique_ptr<ToolOutputFile> Out(
       new ToolOutputFile(OutputFilename, EC, sys::fs::OF_None));
-#else
-  std::unique_ptr<tool_output_file> Out(
-      new tool_output_file(OutputFilename, EC, sys::fs::OF_None));
-#endif
   if (EC) {
     errs() << EC.message() << '\n';
     exit(1);
@@ -121,11 +108,7 @@ int main(int argc, char **argv) {
   if (TextualOut)
     M->print(Out->os(), nullptr, true);
   else
-#if HAVE_LLVM >= 0x0700
     WriteBitcodeToFile(*M, Out->os());
-#else
-    WriteBitcodeToFile(M, Out->os());
-#endif
 
   // Declare success.
   Out->keep();
diff --git a/libcxx/include/__cxx03/fstream b/libcxx/include/__cxx03/fstream
index 44bdabc..65c2c3e 100644
--- a/libcxx/include/__cxx03/fstream
+++ b/libcxx/include/__cxx03/fstream
@@ -191,6 +191,7 @@ typedef basic_fstream<wchar_t> wfstream;
 #include <__cxx03/__config>
 #include <__cxx03/__fwd/fstream.h>
 #include <__cxx03/__locale>
+#include <__cxx03/__memory/addressof.h>
 #include <__cxx03/__type_traits/enable_if.h>
 #include <__cxx03/__type_traits/is_same.h>
 #include <__cxx03/__utility/move.h>
@@ -1062,11 +1063,12 @@ private:
 };
 
 template <class _CharT, class _Traits>
-inline basic_ifstream<_CharT, _Traits>::basic_ifstream() : basic_istream<char_type, traits_type>(&__sb_) {}
+inline basic_ifstream<_CharT, _Traits>::basic_ifstream()
+    : basic_istream<char_type, traits_type>(std::addressof(__sb_)) {}
 
 template <class _CharT, class _Traits>
 inline basic_ifstream<_CharT, _Traits>::basic_ifstream(const char* __s, ios_base::openmode __mode)
-    : basic_istream<char_type, traits_type>(&__sb_) {
+    : basic_istream<char_type, traits_type>(std::addressof(__sb_)) {
   if (__sb_.open(__s, __mode | ios_base::in) == nullptr)
     this->setstate(ios_base::failbit);
 }
@@ -1074,15 +1076,16 @@ inline basic_ifstream<_CharT, _Traits>::basic_ifstream(const char* __s, ios_base
 #  ifdef _LIBCPP_HAS_OPEN_WITH_WCHAR
 template <class _CharT, class _Traits>
 inline basic_ifstream<_CharT, _Traits>::basic_ifstream(const wchar_t* __s, ios_base::openmode __mode)
-    : basic_istream<char_type, traits_type>(&__sb_) {
+    : basic_istream<char_type, traits_type>(std::addressof(__sb_)) {
   if (__sb_.open(__s, __mode | ios_base::in) == nullptr)
     this->setstate(ios_base::failbit);
 }
 #  endif
 
+// extension
 template <class _CharT, class _Traits>
 inline basic_ifstream<_CharT, _Traits>::basic_ifstream(const string& __s, ios_base::openmode __mode)
-    : basic_istream<char_type, traits_type>(&__sb_) {
+    : basic_istream<char_type, traits_type>(std::addressof(__sb_)) {
   if (__sb_.open(__s, __mode | ios_base::in) == nullptr)
     this->setstate(ios_base::failbit);
 }
@@ -1090,7 +1093,7 @@ inline basic_ifstream<_CharT, _Traits>::basic_ifstream(const string& __s, ios_ba
 template <class _CharT, class _Traits>
 inline basic_ifstream<_CharT, _Traits>::basic_ifstream(basic_ifstream&& __rhs)
     : basic_istream<char_type, traits_type>(std::move(__rhs)), __sb_(std::move(__rhs.__sb_)) {
-  this->set_rdbuf(&__sb_);
+  this->set_rdbuf(std::addressof(__sb_));
 }
 
 template <class _CharT, class _Traits>
@@ -1113,7 +1116,7 @@ inline _LIBCPP_HIDE_FROM_ABI void swap(basic_ifstream<_CharT, _Traits>& __x, bas
 
 template <class _CharT, class _Traits>
 inline basic_filebuf<_CharT, _Traits>* basic_ifstream<_CharT, _Traits>::rdbuf() const {
-  return const_cast<basic_filebuf<char_type, traits_type>*>(&__sb_);
+  return const_cast<basic_filebuf<char_type, traits_type>*>(std::addressof(__sb_));
 }
 
 template <class _CharT, class _Traits>
@@ -1199,11 +1202,12 @@ private:
 };
 
 template <class _CharT, class _Traits>
-inline basic_ofstream<_CharT, _Traits>::basic_ofstream() : basic_ostream<char_type, traits_type>(&__sb_) {}
+inline basic_ofstream<_CharT, _Traits>::basic_ofstream()
+    : basic_ostream<char_type, traits_type>(std::addressof(__sb_)) {}
 
 template <class _CharT, class _Traits>
 inline basic_ofstream<_CharT, _Traits>::basic_ofstream(const char* __s, ios_base::openmode __mode)
-    : basic_ostream<char_type, traits_type>(&__sb_) {
+    : basic_ostream<char_type, traits_type>(std::addressof(__sb_)) {
   if (__sb_.open(__s, __mode | ios_base::out) == nullptr)
     this->setstate(ios_base::failbit);
 }
@@ -1211,15 +1215,16 @@ inline basic_ofstream<_CharT, _Traits>::basic_ofstream(const char* __s, ios_base
 #  ifdef _LIBCPP_HAS_OPEN_WITH_WCHAR
 template <class _CharT, class _Traits>
 inline basic_ofstream<_CharT, _Traits>::basic_ofstream(const wchar_t* __s, ios_base::openmode __mode)
-    : basic_ostream<char_type, traits_type>(&__sb_) {
+    : basic_ostream<char_type, traits_type>(std::addressof(__sb_)) {
   if (__sb_.open(__s, __mode | ios_base::out) == nullptr)
     this->setstate(ios_base::failbit);
 }
 #  endif
 
+// extension
 template <class _CharT, class _Traits>
 inline basic_ofstream<_CharT, _Traits>::basic_ofstream(const string& __s, ios_base::openmode __mode)
-    : basic_ostream<char_type, traits_type>(&__sb_) {
+    : basic_ostream<char_type, traits_type>(std::addressof(__sb_)) {
   if (__sb_.open(__s, __mode | ios_base::out) == nullptr)
     this->setstate(ios_base::failbit);
 }
@@ -1227,7 +1232,7 @@ inline basic_ofstream<_CharT, _Traits>::basic_ofstream(const string& __s, ios_ba
 template <class _CharT, class _Traits>
 inline basic_ofstream<_CharT, _Traits>::basic_ofstream(basic_ofstream&& __rhs)
     : basic_ostream<char_type, traits_type>(std::move(__rhs)), __sb_(std::move(__rhs.__sb_)) {
-  this->set_rdbuf(&__sb_);
+  this->set_rdbuf(std::addressof(__sb_));
 }
 
 template <class _CharT, class _Traits>
@@ -1250,7 +1255,7 @@ inline _LIBCPP_HIDE_FROM_ABI void swap(basic_ofstream<_CharT, _Traits>& __x, bas
 
 template <class _CharT, class _Traits>
 inline basic_filebuf<_CharT, _Traits>* basic_ofstream<_CharT, _Traits>::rdbuf() const {
-  return const_cast<basic_filebuf<char_type, traits_type>*>(&__sb_);
+  return const_cast<basic_filebuf<char_type, traits_type>*>(std::addressof(__sb_));
 }
 
 template <class _CharT, class _Traits>
@@ -1340,11 +1345,12 @@ private:
 };
 
 template <class _CharT, class _Traits>
-inline basic_fstream<_CharT, _Traits>::basic_fstream() : basic_iostream<char_type, traits_type>(&__sb_) {}
+inline basic_fstream<_CharT, _Traits>::basic_fstream()
+    : basic_iostream<char_type, traits_type>(std::addressof(__sb_)) {}
 
 template <class _CharT, class _Traits>
 inline basic_fstream<_CharT, _Traits>::basic_fstream(const char* __s, ios_base::openmode __mode)
-    : basic_iostream<char_type, traits_type>(&__sb_) {
+    : basic_iostream<char_type, traits_type>(std::addressof(__sb_)) {
   if (__sb_.open(__s, __mode) == nullptr)
     this->setstate(ios_base::failbit);
 }
@@ -1352,7 +1358,7 @@ inline basic_fstream<_CharT, _Traits>::basic_fstream(const char* __s, ios_base::
 #  ifdef _LIBCPP_HAS_OPEN_WITH_WCHAR
 template <class _CharT, class _Traits>
 inline basic_fstream<_CharT, _Traits>::basic_fstream(const wchar_t* __s, ios_base::openmode __mode)
-    : basic_iostream<char_type, traits_type>(&__sb_) {
+    : basic_iostream<char_type, traits_type>(std::addressof(__sb_)) {
   if (__sb_.open(__s, __mode) == nullptr)
     this->setstate(ios_base::failbit);
 }
@@ -1360,15 +1366,16 @@ inline basic_fstream<_CharT, _Traits>::basic_fstream(const wchar_t* __s, ios_bas
 
 template <class _CharT, class _Traits>
 inline basic_fstream<_CharT, _Traits>::basic_fstream(const string& __s, ios_base::openmode __mode)
-    : basic_iostream<char_type, traits_type>(&__sb_) {
+    : basic_iostream<char_type, traits_type>(std::addressof(__sb_)) {
   if (__sb_.open(__s, __mode) == nullptr)
     this->setstate(ios_base::failbit);
 }
 
+// extension
 template <class _CharT, class _Traits>
 inline basic_fstream<_CharT, _Traits>::basic_fstream(basic_fstream&& __rhs)
     : basic_iostream<char_type, traits_type>(std::move(__rhs)), __sb_(std::move(__rhs.__sb_)) {
-  this->set_rdbuf(&__sb_);
+  this->set_rdbuf(std::addressof(__sb_));
 }
 
 template <class _CharT, class _Traits>
@@ -1391,7 +1398,7 @@ inline _LIBCPP_HIDE_FROM_ABI void swap(basic_fstream<_CharT, _Traits>& __x, basi
 
 template <class _CharT, class _Traits>
 inline basic_filebuf<_CharT, _Traits>* basic_fstream<_CharT, _Traits>::rdbuf() const {
-  return const_cast<basic_filebuf<char_type, traits_type>*>(&__sb_);
+  return const_cast<basic_filebuf<char_type, traits_type>*>(std::addressof(__sb_));
 }
 
 template <class _CharT, class _Traits>
diff --git a/libcxx/include/__cxx03/ios b/libcxx/include/__cxx03/ios
index 7c52290..aa03ce3 100644
--- a/libcxx/include/__cxx03/ios
+++ b/libcxx/include/__cxx03/ios
@@ -218,6 +218,7 @@ storage-class-specifier const error_category& iostream_category() noexcept;
 #  include <__cxx03/__fwd/ios.h>
 #  include <__cxx03/__ios/fpos.h>
 #  include <__cxx03/__locale>
+#  include <__cxx03/__memory/addressof.h>
 #  include <__cxx03/__system_error/error_category.h>
 #  include <__cxx03/__system_error/error_code.h>
 #  include <__cxx03/__system_error/error_condition.h>
@@ -696,7 +697,7 @@ inline _LIBCPP_HIDE_FROM_ABI _CharT basic_ios<_CharT, _Traits>::fill(char_type _
 
 template <class _CharT, class _Traits>
 basic_ios<_CharT, _Traits>& basic_ios<_CharT, _Traits>::copyfmt(const basic_ios& __rhs) {
-  if (this != &__rhs) {
+  if (this != std::addressof(__rhs)) {
     __call_callbacks(erase_event);
     ios_base::copyfmt(__rhs);
     __tie_  = __rhs.__tie_;
diff --git a/libcxx/include/__cxx03/sstream b/libcxx/include/__cxx03/sstream
index de56cd9..44c2423 100644
--- a/libcxx/include/__cxx03/sstream
+++ b/libcxx/include/__cxx03/sstream
@@ -713,18 +713,19 @@ private:
 
 public:
   // [istringstream.cons] Constructors:
-  _LIBCPP_HIDE_FROM_ABI basic_istringstream() : basic_istream<_CharT, _Traits>(&__sb_), __sb_(ios_base::in) {}
+  _LIBCPP_HIDE_FROM_ABI basic_istringstream()
+      : basic_istream<_CharT, _Traits>(std::addressof(__sb_)), __sb_(ios_base::in) {}
 
   _LIBCPP_HIDE_FROM_ABI explicit basic_istringstream(ios_base::openmode __wch)
-      : basic_istream<_CharT, _Traits>(&__sb_), __sb_(__wch | ios_base::in) {}
+      : basic_istream<_CharT, _Traits>(std::addressof(__sb_)), __sb_(__wch | ios_base::in) {}
 
   _LIBCPP_HIDE_FROM_ABI explicit basic_istringstream(const string_type& __s, ios_base::openmode __wch = ios_base::in)
-      : basic_istream<_CharT, _Traits>(&__sb_), __sb_(__s, __wch | ios_base::in) {}
+      : basic_istream<_CharT, _Traits>(std::addressof(__sb_)), __sb_(__s, __wch | ios_base::in) {}
 
   basic_istringstream(const basic_istringstream&) = delete;
   _LIBCPP_HIDE_FROM_ABI basic_istringstream(basic_istringstream&& __rhs)
       : basic_istream<_CharT, _Traits>(std::move(__rhs)), __sb_(std::move(__rhs.__sb_)) {
-    basic_istream<_CharT, _Traits>::set_rdbuf(&__sb_);
+    basic_istream<_CharT, _Traits>::set_rdbuf(std::addressof(__sb_));
   }
 
   // [istringstream.assign] Assign and swap:
@@ -741,7 +742,7 @@ public:
 
   // [istringstream.members] Member functions:
   _LIBCPP_HIDE_FROM_ABI basic_stringbuf<char_type, traits_type, allocator_type>* rdbuf() const {
-    return const_cast<basic_stringbuf<char_type, traits_type, allocator_type>*>(&__sb_);
+    return const_cast<basic_stringbuf<char_type, traits_type, allocator_type>*>(std::addressof(__sb_));
   }
 
   _LIBCPP_HIDE_FROM_ABI string_type str() const { return __sb_.str(); }
@@ -774,18 +775,19 @@ private:
 
 public:
   // [ostringstream.cons] Constructors:
-  _LIBCPP_HIDE_FROM_ABI basic_ostringstream() : basic_ostream<_CharT, _Traits>(&__sb_), __sb_(ios_base::out) {}
+  _LIBCPP_HIDE_FROM_ABI basic_ostringstream()
+      : basic_ostream<_CharT, _Traits>(std::addressof(__sb_)), __sb_(ios_base::out) {}
 
   _LIBCPP_HIDE_FROM_ABI explicit basic_ostringstream(ios_base::openmode __wch)
-      : basic_ostream<_CharT, _Traits>(&__sb_), __sb_(__wch | ios_base::out) {}
+      : basic_ostream<_CharT, _Traits>(std::addressof(__sb_)), __sb_(__wch | ios_base::out) {}
 
   _LIBCPP_HIDE_FROM_ABI explicit basic_ostringstream(const string_type& __s, ios_base::openmode __wch = ios_base::out)
-      : basic_ostream<_CharT, _Traits>(&__sb_), __sb_(__s, __wch | ios_base::out) {}
+      : basic_ostream<_CharT, _Traits>(std::addressof(__sb_)), __sb_(__s, __wch | ios_base::out) {}
 
   basic_ostringstream(const basic_ostringstream&) = delete;
   _LIBCPP_HIDE_FROM_ABI basic_ostringstream(basic_ostringstream&& __rhs)
       : basic_ostream<_CharT, _Traits>(std::move(__rhs)), __sb_(std::move(__rhs.__sb_)) {
-    basic_ostream<_CharT, _Traits>::set_rdbuf(&__sb_);
+    basic_ostream<_CharT, _Traits>::set_rdbuf(std::addressof(__sb_));
   }
 
   // [ostringstream.assign] Assign and swap:
@@ -803,7 +805,7 @@ public:
 
   // [ostringstream.members] Member functions:
   _LIBCPP_HIDE_FROM_ABI basic_stringbuf<char_type, traits_type, allocator_type>* rdbuf() const {
-    return const_cast<basic_stringbuf<char_type, traits_type, allocator_type>*>(&__sb_);
+    return const_cast<basic_stringbuf<char_type, traits_type, allocator_type>*>(std::addressof(__sb_));
   }
 
   _LIBCPP_HIDE_FROM_ABI string_type str() const { return __sb_.str(); }
@@ -836,19 +838,19 @@ private:
 public:
   // [stringstream.cons] constructors
   _LIBCPP_HIDE_FROM_ABI basic_stringstream()
-      : basic_iostream<_CharT, _Traits>(&__sb_), __sb_(ios_base::in | ios_base::out) {}
+      : basic_iostream<_CharT, _Traits>(std::addressof(__sb_)), __sb_(ios_base::in | ios_base::out) {}
 
   _LIBCPP_HIDE_FROM_ABI explicit basic_stringstream(ios_base::openmode __wch)
-      : basic_iostream<_CharT, _Traits>(&__sb_), __sb_(__wch) {}
+      : basic_iostream<_CharT, _Traits>(std::addressof(__sb_)), __sb_(__wch) {}
 
   _LIBCPP_HIDE_FROM_ABI explicit basic_stringstream(const string_type& __s,
                                                     ios_base::openmode __wch = ios_base::in | ios_base::out)
-      : basic_iostream<_CharT, _Traits>(&__sb_), __sb_(__s, __wch) {}
+      : basic_iostream<_CharT, _Traits>(std::addressof(__sb_)), __sb_(__s, __wch) {}
 
   basic_stringstream(const basic_stringstream&) = delete;
   _LIBCPP_HIDE_FROM_ABI basic_stringstream(basic_stringstream&& __rhs)
       : basic_iostream<_CharT, _Traits>(std::move(__rhs)), __sb_(std::move(__rhs.__sb_)) {
-    basic_istream<_CharT, _Traits>::set_rdbuf(&__sb_);
+    basic_istream<_CharT, _Traits>::set_rdbuf(std::addressof(__sb_));
   }
 
   // [stringstream.assign] Assign and swap:
@@ -865,7 +867,7 @@ public:
 
   // [stringstream.members] Member functions:
   _LIBCPP_HIDE_FROM_ABI basic_stringbuf<char_type, traits_type, allocator_type>* rdbuf() const {
-    return const_cast<basic_stringbuf<char_type, traits_type, allocator_type>*>(&__sb_);
+    return const_cast<basic_stringbuf<char_type, traits_type, allocator_type>*>(std::addressof(__sb_));
   }
 
   _LIBCPP_HIDE_FROM_ABI string_type str() const { return __sb_.str(); }
diff --git a/libcxx/include/__cxx03/string b/libcxx/include/__cxx03/string
index 1781404..6c43fca 100644
--- a/libcxx/include/__cxx03/string
+++ b/libcxx/include/__cxx03/string
@@ -2866,13 +2866,13 @@ inline void basic_string<_CharT, _Traits, _Allocator>::swap(basic_string& __str)
       "swapping non-equal allocators");
   if (!__is_long())
     __annotate_delete();
-  if (this != &__str && !__str.__is_long())
+  if (this != std::addressof(__str) && !__str.__is_long())
     __str.__annotate_delete();
   std::swap(__r_.first(), __str.__r_.first());
   std::__swap_allocator(__alloc(), __str.__alloc());
   if (!__is_long())
     __annotate_new(__get_short_size());
-  if (this != &__str && !__str.__is_long())
+  if (this != std::addressof(__str) && !__str.__is_long())
     __str.__annotate_new(__str.__get_short_size());
 }
 
diff --git a/libcxx/include/__split_buffer b/libcxx/include/__split_buffer
index 21e58f4..15368a3 100644
--- a/libcxx/include/__split_buffer
+++ b/libcxx/include/__split_buffer
@@ -13,10 +13,12 @@
 #include <__algorithm/max.h>
 #include <__algorithm/move.h>
 #include <__algorithm/move_backward.h>
+#include <__assert>
 #include <__config>
 #include <__iterator/distance.h>
 #include <__iterator/iterator_traits.h>
 #include <__iterator/move_iterator.h>
+#include <__memory/addressof.h>
 #include <__memory/allocate_at_least.h>
 #include <__memory/allocator.h>
 #include <__memory/allocator_traits.h>
@@ -45,25 +47,434 @@ _LIBCPP_PUSH_MACROS
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
-// __split_buffer allocates a contiguous chunk of memory and stores objects in the range [__begin_, __end_).
-// It has uninitialized memory in the ranges  [__first_, __begin_) and [__end_, __cap_). That allows
-// it to grow both in the front and back without having to move the data.
+template <class _Tp, class _Allocator, template <class, class, class> class _Layout>
+class __split_buffer;
+
+template <class _SplitBuffer, class _Tp, class _Allocator>
+class __split_buffer_pointer_layout {
+protected:
+  using value_type                      = _Tp;
+  using allocator_type                  = _Allocator;
+  using __alloc_rr _LIBCPP_NODEBUG      = __libcpp_remove_reference_t<allocator_type>;
+  using __alloc_traits _LIBCPP_NODEBUG  = allocator_traits<__alloc_rr>;
+  using reference                       = value_type&;
+  using const_reference                 = const value_type&;
+  using size_type                       = typename __alloc_traits::size_type;
+  using difference_type                 = typename __alloc_traits::difference_type;
+  using pointer                         = typename __alloc_traits::pointer;
+  using const_pointer                   = typename __alloc_traits::const_pointer;
+  using iterator                        = pointer;
+  using const_iterator                  = const_pointer;
+  using __sentinel_type _LIBCPP_NODEBUG = pointer;
 
-template <class _Tp, class _Allocator = allocator<_Tp> >
-struct __split_buffer {
 public:
-  using value_type                     = _Tp;
-  using allocator_type                 = _Allocator;
-  using __alloc_rr _LIBCPP_NODEBUG     = __libcpp_remove_reference_t<allocator_type>;
-  using __alloc_traits _LIBCPP_NODEBUG = allocator_traits<__alloc_rr>;
-  using reference                      = value_type&;
-  using const_reference                = const value_type&;
-  using size_type                      = typename __alloc_traits::size_type;
-  using difference_type                = typename __alloc_traits::difference_type;
-  using pointer                        = typename __alloc_traits::pointer;
-  using const_pointer                  = typename __alloc_traits::const_pointer;
-  using iterator                       = pointer;
-  using const_iterator                 = const_pointer;
+  // Can't be defaulted due to _LIBCPP_COMPRESSED_PAIR not being an aggregate in C++03 and C++11.
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __split_buffer_pointer_layout() : __back_cap_(nullptr) {}
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20
+  _LIBCPP_HIDE_FROM_ABI explicit __split_buffer_pointer_layout(const allocator_type& __alloc)
+      : __back_cap_(nullptr), __alloc_(__alloc) {}
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI pointer __front_cap() _NOEXCEPT { return __front_cap_; }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI const_pointer __front_cap() const _NOEXCEPT {
+    return __front_cap_;
+  }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI pointer begin() _NOEXCEPT { return __begin_; }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI const_pointer begin() const _NOEXCEPT { return __begin_; }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI pointer end() _NOEXCEPT { return __end_; }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI pointer end() const _NOEXCEPT { return __end_; }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI size_type size() const _NOEXCEPT {
+    return static_cast<size_type>(__end_ - __begin_);
+  }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI bool empty() const _NOEXCEPT { return __begin_ == __end_; }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI size_type capacity() const _NOEXCEPT {
+    return static_cast<size_type>(__back_cap_ - __front_cap_);
+  }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI allocator_type& __get_allocator() _NOEXCEPT { return __alloc_; }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI allocator_type const& __get_allocator() const _NOEXCEPT {
+    return __alloc_;
+  }
+
+  // Returns the sentinel object directly. Should be used in conjunction with automatic type deduction,
+  // not explicit types.
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __sentinel_type __raw_sentinel() const _NOEXCEPT {
+    return __end_;
+  }
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __sentinel_type __raw_capacity() const _NOEXCEPT {
+    return __back_cap_;
+  }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __set_data(pointer __new_first) _NOEXCEPT {
+    __front_cap_ = __new_first;
+  }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void
+  __set_valid_range(pointer __new_begin, pointer __new_end) _NOEXCEPT {
+    __begin_ = __new_begin;
+    __end_   = __new_end;
+  }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void
+  __set_valid_range(pointer __new_begin, size_type __new_size) _NOEXCEPT {
+    __begin_ = __new_begin;
+    __end_   = __begin_ + __new_size;
+  }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __set_sentinel(pointer __new_end) _NOEXCEPT {
+    _LIBCPP_ASSERT_INTERNAL(__front_cap_ <= __new_end, "__new_end cannot precede __front_cap_");
+    __end_ = __new_end;
+  }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __set_sentinel(size_type __new_size) _NOEXCEPT {
+    __end_ = __begin_ + __new_size;
+  }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __set_capacity(size_type __new_capacity) _NOEXCEPT {
+    __back_cap_ = __front_cap_ + __new_capacity;
+  }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __set_capacity(pointer __new_capacity) _NOEXCEPT {
+    __back_cap_ = __new_capacity;
+  }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI size_type __front_spare() const _NOEXCEPT {
+    return static_cast<size_type>(__begin_ - __front_cap_);
+  }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI size_type __back_spare() const _NOEXCEPT {
+    return static_cast<size_type>(__back_cap_ - __end_);
+  }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI reference back() _NOEXCEPT { return *(__end_ - 1); }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI const_reference back() const _NOEXCEPT { return *(__end_ - 1); }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __swap_without_allocator(
+      __split_buffer_pointer_layout<__split_buffer<value_type, __alloc_rr&, __split_buffer_pointer_layout>,
+                                    value_type,
+                                    __alloc_rr&>& __other) _NOEXCEPT {
+    std::swap(__front_cap_, __other.__front_cap_);
+    std::swap(__begin_, __other.__begin_);
+    std::swap(__back_cap_, __other.__back_cap_);
+    std::swap(__end_, __other.__end_);
+  }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void swap(__split_buffer_pointer_layout& __other) _NOEXCEPT {
+    std::swap(__front_cap_, __other.__front_cap_);
+    std::swap(__begin_, __other.__begin_);
+    std::swap(__back_cap_, __other.__back_cap_);
+    std::swap(__end_, __other.__end_);
+    std::__swap_allocator(__alloc_, __other.__alloc_);
+  }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __reset() _NOEXCEPT {
+    __front_cap_ = nullptr;
+    __begin_     = nullptr;
+    __end_       = nullptr;
+    __back_cap_  = nullptr;
+  }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void
+  __copy_without_alloc(__split_buffer_pointer_layout const& __other)
+      _NOEXCEPT_(is_nothrow_copy_assignable<pointer>::value) {
+    __front_cap_ = __other.__front_cap_;
+    __begin_     = __other.__begin_;
+    __end_       = __other.__end_;
+    __back_cap_  = __other.__back_cap_;
+  }
+
+private:
+  pointer __front_cap_ = nullptr;
+  pointer __begin_     = nullptr;
+  pointer __end_       = nullptr;
+  _LIBCPP_COMPRESSED_PAIR(pointer, __back_cap_, allocator_type, __alloc_);
+
+  template <class, class, class>
+  friend class __split_buffer_pointer_layout;
+};
+
+template <class _SplitBuffer, class _Tp, class _Allocator>
+class __split_buffer_size_layout {
+protected:
+  using value_type                      = _Tp;
+  using allocator_type                  = _Allocator;
+  using __alloc_rr _LIBCPP_NODEBUG      = __libcpp_remove_reference_t<allocator_type>;
+  using __alloc_traits _LIBCPP_NODEBUG  = allocator_traits<__alloc_rr>;
+  using reference                       = value_type&;
+  using const_reference                 = const value_type&;
+  using size_type                       = typename __alloc_traits::size_type;
+  using difference_type                 = typename __alloc_traits::difference_type;
+  using pointer                         = typename __alloc_traits::pointer;
+  using const_pointer                   = typename __alloc_traits::const_pointer;
+  using iterator                        = pointer;
+  using const_iterator                  = const_pointer;
+  using __sentinel_type _LIBCPP_NODEBUG = size_type;
+
+public:
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __split_buffer_size_layout() = default;
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI explicit __split_buffer_size_layout(const allocator_type& __alloc)
+      : __alloc_(__alloc) {}
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI pointer __front_cap() _NOEXCEPT { return __front_cap_; }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI const_pointer __front_cap() const _NOEXCEPT {
+    return __front_cap_;
+  }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI pointer begin() _NOEXCEPT { return __begin_; }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI const_pointer begin() const _NOEXCEPT { return __begin_; }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI pointer end() _NOEXCEPT { return __begin_ + __size_; }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI pointer end() const _NOEXCEPT { return __begin_ + __size_; }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI size_type size() const _NOEXCEPT { return __size_; }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI bool empty() const _NOEXCEPT { return __size_ == 0; }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI size_type capacity() const _NOEXCEPT { return __cap_; }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI allocator_type& __get_allocator() _NOEXCEPT { return __alloc_; }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI allocator_type const& __get_allocator() const _NOEXCEPT {
+    return __alloc_;
+  }
+
+  // Returns the sentinel object directly. Should be used in conjunction with automatic type deduction,
+  // not explicit types.
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __sentinel_type __raw_sentinel() const _NOEXCEPT {
+    return __size_;
+  }
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __sentinel_type __raw_capacity() const _NOEXCEPT {
+    return __cap_;
+  }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __set_data(pointer __new_first) _NOEXCEPT {
+    __front_cap_ = __new_first;
+  }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void
+  __set_valid_range(pointer __new_begin, pointer __new_end) _NOEXCEPT {
+    // Size-based __split_buffers track their size directly: we need to explicitly update the size
+    // when the front is adjusted.
+    __size_ -= __new_begin - __begin_;
+    __begin_ = __new_begin;
+    __set_sentinel(__new_end);
+  }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void
+  __set_valid_range(pointer __new_begin, size_type __new_size) _NOEXCEPT {
+    // Size-based __split_buffers track their size directly: we need to explicitly update the size
+    // when the front is adjusted.
+    __size_ -= __new_begin - __begin_;
+    __begin_ = __new_begin;
+    __set_sentinel(__new_size);
+  }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __set_sentinel(pointer __new_end) _NOEXCEPT {
+    _LIBCPP_ASSERT_INTERNAL(__front_cap_ <= __new_end, "__new_end cannot precede __front_cap_");
+    __size_ += __new_end - end();
+  }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __set_sentinel(size_type __new_size) _NOEXCEPT {
+    __size_ = __new_size;
+  }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __set_capacity(size_type __new_capacity) _NOEXCEPT {
+    __cap_ = __new_capacity;
+  }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __set_capacity(pointer __new_capacity) _NOEXCEPT {
+    __cap_ = __new_capacity - __begin_;
+  }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI size_type __front_spare() const _NOEXCEPT {
+    return static_cast<size_type>(__begin_ - __front_cap_);
+  }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI size_type __back_spare() const _NOEXCEPT {
+    // `__cap_ - __end_` tells us the total number of spares when in size-mode. We need to remove
+    // the __front_spare from the count.
+    return __cap_ - __size_ - __front_spare();
+  }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI reference back() _NOEXCEPT { return __begin_[__size_ - 1]; }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI const_reference back() const _NOEXCEPT {
+    return __begin_[__size_ - 1];
+  }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __swap_without_allocator(
+      __split_buffer_pointer_layout<__split_buffer<value_type, __alloc_rr&, __split_buffer_pointer_layout>,
+                                    value_type,
+                                    __alloc_rr&>& __other) _NOEXCEPT {
+    std::swap(__front_cap_, __other.__front_cap_);
+    std::swap(__begin_, __other.__begin_);
+    std::swap(__cap_, __other.__cap_);
+    std::swap(__size_, __other.__size_);
+  }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void swap(__split_buffer_size_layout& __other) _NOEXCEPT {
+    std::swap(__front_cap_, __other.__front_cap_);
+    std::swap(__begin_, __other.__begin_);
+    std::swap(__cap_, __other.__cap_);
+    std::swap(__size_, __other.__size_);
+    std::__swap_allocator(__alloc_, __other.__alloc_);
+  }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __reset() _NOEXCEPT {
+    __front_cap_ = nullptr;
+    __begin_     = nullptr;
+    __size_      = 0;
+    __cap_       = 0;
+  }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void
+  __copy_without_alloc(__split_buffer_size_layout const& __other)
+      _NOEXCEPT_(is_nothrow_copy_assignable<pointer>::value) {
+    __front_cap_ = __other.__front_cap_;
+    __begin_     = __other.__begin_;
+    __cap_       = __other.__cap_;
+    __size_      = __other.__size_;
+  }
+
+private:
+  pointer __front_cap_ = nullptr;
+  pointer __begin_     = nullptr;
+  size_type __size_    = 0;
+  size_type __cap_     = 0;
+  _LIBCPP_NO_UNIQUE_ADDRESS allocator_type __alloc_;
+
+  template <class, class, class>
+  friend class __split_buffer_size_layout;
+};
+
+// `__split_buffer` is a contiguous array data structure. It may hold spare capacity at both ends of
+// the sequence. This allows for a `__split_buffer` to grow from both the front and the back without
+// relocating its contents until it runs out of room. This characteristic sets it apart from
+// `std::vector`, which only holds spare capacity at its end. As such, `__split_buffer` is useful
+// for implementing both `std::vector` and `std::deque`.
+//
+// The sequence is stored as a contiguous chunk of memory delimited by the following "pointers" (`o` denotes
+// uninitialized memory and `x` denotes a valid object):
+//
+//     |oooooooooooooooooooxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxoooooooooooooooooooooooo|
+//      ^                  ^                                    ^                       ^
+//  __front_cap_        __begin_                              __end_               __back_cap_
+//
+// The range [__front_cap_, __begin_) contains uninitialized memory. It is referred to as the "front spare capacity".
+// The range [__begin_, __end_) contains valid objects. It is referred to as the "valid range".
+// The range [__end_, __back_cap_) contains uninitialized memory. It is referred to as the "back spare capacity".
+//
+// The layout of `__split_buffer` is determined by the `_Layout` template template parameter. This
+// `_Layout` allows the above pointers to be stored as different representations, such as integer
+// offsets. A layout class template must provide the following interface:
+//
+//    template<class _Tp, class _Allocator, class _Layout>
+//    class __layout {
+//    protected:
+//      using value_type                     = _Tp;
+//      using allocator_type                 = _Allocator;
+//      using __alloc_rr                     = __libcpp_remove_reference_t<allocator_type>;
+//      using __alloc_traits                 = allocator_traits<__alloc_rr>;
+//      using reference                      = value_type&;
+//      using const_reference                = const value_type&;
+//      using size_type                      = typename __alloc_traits::size_type;
+//      using difference_type                = typename __alloc_traits::difference_type;
+//      using pointer                        = typename __alloc_traits::pointer;
+//      using const_pointer                  = typename __alloc_traits::const_pointer;
+//      using iterator                       = pointer;
+//      using const_iterator                 = const_pointer;
+//      using __sentinel_type                = /* type that represents the layout's sentinel */;
+//
+//    public:
+//      __layout() = default;
+//      explicit __layout(const allocator_type&);
+//
+//      pointer __front_cap();
+//      const_pointer __front_cap() const;
+//
+//      pointer begin();
+//      const_pointer begin() const;
+//
+//      pointer end();
+//      pointer end() const;
+//
+//      size_type size() const;
+//      bool empty() const;
+//      size_type capacity() const;
+//
+//      allocator_type& __get_allocator();
+//      allocator_type const& __get_allocator() const;
+//
+//      __sentinel_type __raw_sentinel() const;
+//      __sentinel_type __raw_capacity() const;
+//
+//      void __set_data(pointer);
+//      void __set_valid_range(pointer __begin, pointer __end);
+//      void __set_valid_range(pointer __begin, size_type __size);
+//      void __set_sentinel(pointer __end);
+//      void __set_sentinel(size_type __size);
+//
+//      void __set_capacity(size_type __capacity);
+//      void __set_capacity(pointer __capacity);
+//
+//      size_type __front_spare() const;
+//      size_type __back_spare() const;
+//
+//      reference back();
+//      const_reference back() const;
+//
+//      template<class _OtherLayout>
+//      void __swap_without_allocator(_OtherLayout&);
+//      void swap(__layout&);
+//
+//      void __reset();
+//      void __copy_without_alloc(__layout const&);
+//    };
+//
+template <class _Tp, class _Allocator, template <class, class, class> class _Layout>
+class __split_buffer : _Layout<__split_buffer<_Tp, _Allocator, _Layout>, _Tp, _Allocator> {
+  using __base_type _LIBCPP_NODEBUG = _Layout<__split_buffer<_Tp, _Allocator, _Layout>, _Tp, _Allocator>;
+
+public:
+  using __base_type::__back_spare;
+  using __base_type::__copy_without_alloc;
+  using __base_type::__front_cap;
+  using __base_type::__front_spare;
+  using __base_type::__get_allocator;
+  using __base_type::__raw_capacity;
+  using __base_type::__raw_sentinel;
+  using __base_type::__reset;
+  using __base_type::__set_capacity;
+  using __base_type::__set_data;
+  using __base_type::__set_sentinel;
+  using __base_type::__set_valid_range;
+
+  using typename __base_type::__alloc_rr;
+  using typename __base_type::__alloc_traits;
+  using typename __base_type::allocator_type;
+  using typename __base_type::const_iterator;
+  using typename __base_type::const_pointer;
+  using typename __base_type::const_reference;
+  using typename __base_type::difference_type;
+  using typename __base_type::iterator;
+  using typename __base_type::pointer;
+  using typename __base_type::reference;
+  using typename __base_type::size_type;
+  using typename __base_type::value_type;
 
   // A __split_buffer contains the following members which may be trivially relocatable:
   // - pointer: may be trivially relocatable, so it's checked
@@ -78,23 +489,15 @@ public:
                       __split_buffer,
                       void>;
 
-  pointer __first_;
-  pointer __begin_;
-  pointer __end_;
-  _LIBCPP_COMPRESSED_PAIR(pointer, __cap_, allocator_type, __alloc_);
-
   __split_buffer(const __split_buffer&)            = delete;
   __split_buffer& operator=(const __split_buffer&) = delete;
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __split_buffer()
-      _NOEXCEPT_(is_nothrow_default_constructible<allocator_type>::value)
-      : __first_(nullptr), __begin_(nullptr), __end_(nullptr), __cap_(nullptr) {}
+  _LIBCPP_HIDE_FROM_ABI __split_buffer() = default;
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI explicit __split_buffer(__alloc_rr& __a)
-      : __first_(nullptr), __begin_(nullptr), __end_(nullptr), __cap_(nullptr), __alloc_(__a) {}
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI explicit __split_buffer(__alloc_rr& __a) : __base_type(__a) {}
 
   _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI explicit __split_buffer(const __alloc_rr& __a)
-      : __first_(nullptr), __begin_(nullptr), __end_(nullptr), __cap_(nullptr), __alloc_(__a) {}
+      : __base_type(__a) {}
 
   _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI
   __split_buffer(size_type __cap, size_type __start, __alloc_rr& __a);
@@ -111,36 +514,16 @@ public:
 
   _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI ~__split_buffer();
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI iterator begin() _NOEXCEPT { return __begin_; }
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI const_iterator begin() const _NOEXCEPT { return __begin_; }
-
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI iterator end() _NOEXCEPT { return __end_; }
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI const_iterator end() const _NOEXCEPT { return __end_; }
-
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void clear() _NOEXCEPT { __destruct_at_end(__begin_); }
+  using __base_type::back;
+  using __base_type::begin;
+  using __base_type::capacity;
+  using __base_type::empty;
+  using __base_type::end;
+  using __base_type::size;
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI size_type size() const {
-    return static_cast<size_type>(__end_ - __begin_);
-  }
-
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI bool empty() const { return __end_ == __begin_; }
-
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI size_type capacity() const {
-    return static_cast<size_type>(__cap_ - __first_);
-  }
-
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI size_type __front_spare() const {
-    return static_cast<size_type>(__begin_ - __first_);
-  }
-
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI size_type __back_spare() const {
-    return static_cast<size_type>(__cap_ - __end_);
-  }
-
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI reference front() { return *__begin_; }
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI const_reference front() const { return *__begin_; }
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI reference back() { return *(__end_ - 1); }
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI const_reference back() const { return *(__end_ - 1); }
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void clear() _NOEXCEPT { __destruct_at_end(begin()); }
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI reference front() { return *begin(); }
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI const_reference front() const { return *begin(); }
 
   _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void shrink_to_fit() _NOEXCEPT;
 
@@ -149,8 +532,8 @@ public:
   template <class... _Args>
   _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void emplace_back(_Args&&... __args);
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void pop_front() { __destruct_at_begin(__begin_ + 1); }
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void pop_back() { __destruct_at_end(__end_ - 1); }
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void pop_front() { __destruct_at_begin(begin() + 1); }
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void pop_back() { __destruct_at_end(end() - 1); }
 
   _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __construct_at_end(size_type __n);
   _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __construct_at_end(size_type __n, const_reference __x);
@@ -184,242 +567,240 @@ public:
   _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void swap(__split_buffer& __x)
       _NOEXCEPT_(!__alloc_traits::propagate_on_container_swap::value || __is_nothrow_swappable_v<__alloc_rr>);
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI bool __invariants() const;
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI bool __invariants() const {
+    if (__front_cap() == nullptr) {
+      if (begin() != nullptr)
+        return false;
+
+      if (!empty())
+        return false;
+
+      if (capacity() != 0)
+        return false;
+
+      return true;
+    } else {
+      if (begin() < __front_cap())
+        return false;
+
+      if (capacity() < size())
+        return false;
+
+      if (end() < begin())
+        return false;
+
+      return true;
+    }
+  }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void
+  __swap_without_allocator(__split_buffer<value_type, __alloc_rr&, _Layout>& __other) _NOEXCEPT {
+    __base_type::__swap_without_allocator(__other);
+  }
 
 private:
   _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __move_assign_alloc(__split_buffer& __c, true_type)
       _NOEXCEPT_(is_nothrow_move_assignable<allocator_type>::value) {
-    __alloc_ = std::move(__c.__alloc_);
+    __get_allocator() = std::move(__c.__get_allocator());
   }
 
   _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __move_assign_alloc(__split_buffer&, false_type) _NOEXCEPT {}
 
   struct _ConstructTransaction {
     _LIBCPP_CONSTEXPR_SINCE_CXX20
-    _LIBCPP_HIDE_FROM_ABI explicit _ConstructTransaction(pointer* __p, size_type __n) _NOEXCEPT
-        : __pos_(*__p),
-          __end_(*__p + __n),
-          __dest_(__p) {}
+    _LIBCPP_HIDE_FROM_ABI explicit _ConstructTransaction(__split_buffer* __parent, pointer __p, size_type __n) _NOEXCEPT
+        : __pos_(__p),
+          __end_(__p + __n),
+          __parent_(__parent) {}
 
-    _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI ~_ConstructTransaction() { *__dest_ = __pos_; }
+    _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI ~_ConstructTransaction() { __parent_->__set_sentinel(__pos_); }
 
     pointer __pos_;
     const pointer __end_;
 
   private:
-    pointer* __dest_;
+    __split_buffer* __parent_;
   };
-};
 
-template <class _Tp, class _Allocator>
-_LIBCPP_CONSTEXPR_SINCE_CXX20 bool __split_buffer<_Tp, _Allocator>::__invariants() const {
-  if (__first_ == nullptr) {
-    if (__begin_ != nullptr)
-      return false;
-    if (__end_ != nullptr)
-      return false;
-    if (__cap_ != nullptr)
-      return false;
-  } else {
-    if (__begin_ < __first_)
-      return false;
-    if (__end_ < __begin_)
-      return false;
-    if (__cap_ < __end_)
-      return false;
-  }
-  return true;
-}
+  template <class _T2, class _A2, template <class, class, class> class _L2>
+  friend class __split_buffer;
+};
 
-//  Default constructs __n objects starting at __end_
+//  Default constructs __n objects starting at `end()`
 //  throws if construction throws
 //  Precondition:  __n > 0
 //  Precondition:  size() + __n <= capacity()
 //  Postcondition:  size() == size() + __n
-template <class _Tp, class _Allocator>
-_LIBCPP_CONSTEXPR_SINCE_CXX20 void __split_buffer<_Tp, _Allocator>::__construct_at_end(size_type __n) {
-  _ConstructTransaction __tx(std::addressof(this->__end_), __n);
+template <class _Tp, class _Allocator, template <class, class, class> class _Layout>
+_LIBCPP_CONSTEXPR_SINCE_CXX20 void __split_buffer<_Tp, _Allocator, _Layout>::__construct_at_end(size_type __n) {
+  _ConstructTransaction __tx(this, end(), __n);
   for (; __tx.__pos_ != __tx.__end_; ++__tx.__pos_) {
-    __alloc_traits::construct(__alloc_, std::__to_address(__tx.__pos_));
+    __alloc_traits::construct(__get_allocator(), std::__to_address(__tx.__pos_));
   }
 }
 
-//  Copy constructs __n objects starting at __end_ from __x
+//  Copy constructs __n objects starting at `end()` from __x
 //  throws if construction throws
 //  Precondition:  __n > 0
 //  Precondition:  size() + __n <= capacity()
 //  Postcondition:  size() == old size() + __n
 //  Postcondition:  [i] == __x for all i in [size() - __n, __n)
-template <class _Tp, class _Allocator>
+template <class _Tp, class _Allocator, template <class, class, class> class _Layout>
 _LIBCPP_CONSTEXPR_SINCE_CXX20 void
-__split_buffer<_Tp, _Allocator>::__construct_at_end(size_type __n, const_reference __x) {
-  _ConstructTransaction __tx(std::addressof(this->__end_), __n);
+__split_buffer<_Tp, _Allocator, _Layout>::__construct_at_end(size_type __n, const_reference __x) {
+  _ConstructTransaction __tx(this, end(), __n);
   for (; __tx.__pos_ != __tx.__end_; ++__tx.__pos_) {
-    __alloc_traits::construct(__alloc_, std::__to_address(__tx.__pos_), __x);
+    __alloc_traits::construct(__get_allocator(), std::__to_address(__tx.__pos_), __x);
   }
 }
 
-template <class _Tp, class _Allocator>
+template <class _Tp, class _Allocator, template <class, class, class> class _Layout>
 template <class _Iterator, class _Sentinel>
 _LIBCPP_CONSTEXPR_SINCE_CXX20 void
-__split_buffer<_Tp, _Allocator>::__construct_at_end_with_sentinel(_Iterator __first, _Sentinel __last) {
-  __alloc_rr& __a = __alloc_;
+__split_buffer<_Tp, _Allocator, _Layout>::__construct_at_end_with_sentinel(_Iterator __first, _Sentinel __last) {
+  __alloc_rr& __a = __get_allocator();
   for (; __first != __last; ++__first) {
-    if (__end_ == __cap_) {
-      size_type __old_cap = __cap_ - __first_;
+    if (__back_spare() == 0) {
+      size_type __old_cap = capacity();
       size_type __new_cap = std::max<size_type>(2 * __old_cap, 8);
       __split_buffer __buf(__new_cap, 0, __a);
-      for (pointer __p = __begin_; __p != __end_; ++__p, (void)++__buf.__end_)
-        __alloc_traits::construct(__buf.__alloc_, std::__to_address(__buf.__end_), std::move(*__p));
+      pointer __buf_end = __buf.end();
+      pointer __end     = end();
+      for (pointer __p = begin(); __p != __end; ++__p) {
+        __alloc_traits::construct(__buf.__get_allocator(), std::__to_address(__buf_end), std::move(*__p));
+        __buf.__set_sentinel(++__buf_end);
+      }
       swap(__buf);
     }
-    __alloc_traits::construct(__a, std::__to_address(this->__end_), *__first);
-    ++this->__end_;
+
+    __alloc_traits::construct(__a, std::__to_address(end()), *__first);
+    __set_sentinel(size() + 1);
   }
 }
-template <class _Tp, class _Allocator>
+
+template <class _Tp, class _Allocator, template <class, class, class> class _Layout>
 template <class _ForwardIterator, __enable_if_t<__has_forward_iterator_category<_ForwardIterator>::value, int> >
 _LIBCPP_CONSTEXPR_SINCE_CXX20 void
-__split_buffer<_Tp, _Allocator>::__construct_at_end(_ForwardIterator __first, _ForwardIterator __last) {
+__split_buffer<_Tp, _Allocator, _Layout>::__construct_at_end(_ForwardIterator __first, _ForwardIterator __last) {
   __construct_at_end_with_size(__first, std::distance(__first, __last));
 }
 
-template <class _Tp, class _Allocator>
+template <class _Tp, class _Allocator, template <class, class, class> class _Layout>
 template <class _ForwardIterator>
 _LIBCPP_CONSTEXPR_SINCE_CXX20 void
-__split_buffer<_Tp, _Allocator>::__construct_at_end_with_size(_ForwardIterator __first, size_type __n) {
-  _ConstructTransaction __tx(std::addressof(this->__end_), __n);
+__split_buffer<_Tp, _Allocator, _Layout>::__construct_at_end_with_size(_ForwardIterator __first, size_type __n) {
+  _ConstructTransaction __tx(this, end(), __n);
   for (; __tx.__pos_ != __tx.__end_; ++__tx.__pos_, (void)++__first) {
-    __alloc_traits::construct(__alloc_, std::__to_address(__tx.__pos_), *__first);
+    __alloc_traits::construct(__get_allocator(), std::__to_address(__tx.__pos_), *__first);
   }
 }
 
-template <class _Tp, class _Allocator>
+template <class _Tp, class _Allocator, template <class, class, class> class _Layout>
 _LIBCPP_CONSTEXPR_SINCE_CXX20 inline void
-__split_buffer<_Tp, _Allocator>::__destruct_at_begin(pointer __new_begin, false_type) {
-  while (__begin_ != __new_begin)
-    __alloc_traits::destroy(__alloc_, std::__to_address(__begin_++));
+__split_buffer<_Tp, _Allocator, _Layout>::__destruct_at_begin(pointer __new_begin, false_type) {
+  pointer __begin = begin();
+  // Updating begin at every iteration is unnecessary because destruction can't throw.
+  while (__begin != __new_begin)
+    __alloc_traits::destroy(__get_allocator(), std::__to_address(__begin++));
+  __set_valid_range(__begin, end());
 }
 
-template <class _Tp, class _Allocator>
+template <class _Tp, class _Allocator, template <class, class, class> class _Layout>
 _LIBCPP_CONSTEXPR_SINCE_CXX20 inline void
-__split_buffer<_Tp, _Allocator>::__destruct_at_begin(pointer __new_begin, true_type) {
-  __begin_ = __new_begin;
-}
-
-template <class _Tp, class _Allocator>
-_LIBCPP_CONSTEXPR_SINCE_CXX20 inline _LIBCPP_HIDE_FROM_ABI void
-__split_buffer<_Tp, _Allocator>::__destruct_at_end(pointer __new_last, false_type) _NOEXCEPT {
-  while (__new_last != __end_)
-    __alloc_traits::destroy(__alloc_, std::__to_address(--__end_));
+__split_buffer<_Tp, _Allocator, _Layout>::__destruct_at_begin(pointer __new_begin, true_type) {
+  __set_valid_range(__new_begin, end());
 }
 
-template <class _Tp, class _Allocator>
+template <class _Tp, class _Allocator, template <class, class, class> class _Layout>
 _LIBCPP_CONSTEXPR_SINCE_CXX20 inline _LIBCPP_HIDE_FROM_ABI void
-__split_buffer<_Tp, _Allocator>::__destruct_at_end(pointer __new_last, true_type) _NOEXCEPT {
-  __end_ = __new_last;
+__split_buffer<_Tp, _Allocator, _Layout>::__destruct_at_end(pointer __new_last, false_type) _NOEXCEPT {
+  pointer __end = end();
+  // Updating begin at every iteration is unnecessary because destruction can't throw.
+  while (__new_last != __end)
+    __alloc_traits::destroy(__get_allocator(), std::__to_address(--__end));
+  __set_sentinel(__end);
 }
 
-template <class _Tp, class _Allocator>
+template <class _Tp, class _Allocator, template <class, class, class> class _Layout>
 _LIBCPP_CONSTEXPR_SINCE_CXX20
-__split_buffer<_Tp, _Allocator>::__split_buffer(size_type __cap, size_type __start, __alloc_rr& __a)
-    : __cap_(nullptr), __alloc_(__a) {
-  if (__cap == 0) {
-    __first_ = nullptr;
-  } else {
-    auto __allocation = std::__allocate_at_least(__alloc_, __cap);
-    __first_          = __allocation.ptr;
-    __cap             = __allocation.count;
+__split_buffer<_Tp, _Allocator, _Layout>::__split_buffer(size_type __cap, size_type __start, __alloc_rr& __a)
+    : __base_type(__a) {
+  _LIBCPP_ASSERT_INTERNAL(__cap >= __start, "can't have a start point outside the capacity");
+  if (__cap > 0) {
+    auto __allocation = std::__allocate_at_least(__get_allocator(), __cap);
+    __set_data(__allocation.ptr);
+    __cap = __allocation.count;
   }
-  __begin_ = __end_ = __first_ + __start;
-  __cap_            = __first_ + __cap;
+
+  pointer __begin = __front_cap() + __start;
+  __set_valid_range(__begin, __begin);
+  __set_capacity(__cap);
 }
 
-template <class _Tp, class _Allocator>
-_LIBCPP_CONSTEXPR_SINCE_CXX20 __split_buffer<_Tp, _Allocator>::~__split_buffer() {
+template <class _Tp, class _Allocator, template <class, class, class> class _Layout>
+_LIBCPP_CONSTEXPR_SINCE_CXX20 __split_buffer<_Tp, _Allocator, _Layout>::~__split_buffer() {
   clear();
-  if (__first_)
-    __alloc_traits::deallocate(__alloc_, __first_, capacity());
+  if (__front_cap())
+    __alloc_traits::deallocate(__get_allocator(), __front_cap(), capacity());
 }
 
-template <class _Tp, class _Allocator>
-_LIBCPP_CONSTEXPR_SINCE_CXX20 __split_buffer<_Tp, _Allocator>::__split_buffer(__split_buffer&& __c)
+template <class _Tp, class _Allocator, template <class, class, class> class _Layout>
+_LIBCPP_CONSTEXPR_SINCE_CXX20 __split_buffer<_Tp, _Allocator, _Layout>::__split_buffer(__split_buffer&& __c)
     _NOEXCEPT_(is_nothrow_move_constructible<allocator_type>::value)
-    : __first_(std::move(__c.__first_)),
-      __begin_(std::move(__c.__begin_)),
-      __end_(std::move(__c.__end_)),
-      __cap_(std::move(__c.__cap_)),
-      __alloc_(std::move(__c.__alloc_)) {
-  __c.__first_ = nullptr;
-  __c.__begin_ = nullptr;
-  __c.__end_   = nullptr;
-  __c.__cap_   = nullptr;
+    : __base_type(std::move(__c)) {
+  __c.__reset();
 }
 
-template <class _Tp, class _Allocator>
+template <class _Tp, class _Allocator, template <class, class, class> class _Layout>
 _LIBCPP_CONSTEXPR_SINCE_CXX20
-__split_buffer<_Tp, _Allocator>::__split_buffer(__split_buffer&& __c, const __alloc_rr& __a)
-    : __cap_(nullptr), __alloc_(__a) {
-  if (__a == __c.__alloc_) {
-    __first_     = __c.__first_;
-    __begin_     = __c.__begin_;
-    __end_       = __c.__end_;
-    __cap_       = __c.__cap_;
-    __c.__first_ = nullptr;
-    __c.__begin_ = nullptr;
-    __c.__end_   = nullptr;
-    __c.__cap_   = nullptr;
+__split_buffer<_Tp, _Allocator, _Layout>::__split_buffer(__split_buffer&& __c, const __alloc_rr& __a)
+    : __base_type(__a) {
+  if (__a == __c.__get_allocator()) {
+    __set_data(__c.__front_cap());
+    __set_valid_range(__c.begin(), __c.end());
+    __set_capacity(__c.capacity());
+    __c.__reset();
   } else {
-    auto __allocation = std::__allocate_at_least(__alloc_, __c.size());
-    __first_          = __allocation.ptr;
-    __begin_ = __end_ = __first_;
-    __cap_            = __first_ + __allocation.count;
+    auto __allocation = std::__allocate_at_least(__get_allocator(), __c.size());
+    __set_data(__allocation.ptr);
+    __set_valid_range(__front_cap(), __front_cap());
+    __set_capacity(__allocation.count);
     typedef move_iterator<iterator> _Ip;
     __construct_at_end(_Ip(__c.begin()), _Ip(__c.end()));
   }
 }
 
-template <class _Tp, class _Allocator>
-_LIBCPP_CONSTEXPR_SINCE_CXX20 __split_buffer<_Tp, _Allocator>&
-__split_buffer<_Tp, _Allocator>::operator=(__split_buffer&& __c)
+template <class _Tp, class _Allocator, template <class, class, class> class _Layout>
+_LIBCPP_CONSTEXPR_SINCE_CXX20 __split_buffer<_Tp, _Allocator, _Layout>&
+__split_buffer<_Tp, _Allocator, _Layout>::operator=(__split_buffer&& __c)
     _NOEXCEPT_((__alloc_traits::propagate_on_container_move_assignment::value &&
                 is_nothrow_move_assignable<allocator_type>::value) ||
                !__alloc_traits::propagate_on_container_move_assignment::value) {
   clear();
   shrink_to_fit();
-  __first_ = __c.__first_;
-  __begin_ = __c.__begin_;
-  __end_   = __c.__end_;
-  __cap_   = __c.__cap_;
+  __copy_without_alloc(__c);
   __move_assign_alloc(__c, integral_constant<bool, __alloc_traits::propagate_on_container_move_assignment::value>());
-  __c.__first_ = __c.__begin_ = __c.__end_ = __c.__cap_ = nullptr;
+  __c.__reset();
   return *this;
 }
 
-template <class _Tp, class _Allocator>
-_LIBCPP_CONSTEXPR_SINCE_CXX20 void __split_buffer<_Tp, _Allocator>::swap(__split_buffer& __x)
+template <class _Tp, class _Allocator, template <class, class, class> class _Layout>
+_LIBCPP_CONSTEXPR_SINCE_CXX20 void __split_buffer<_Tp, _Allocator, _Layout>::swap(__split_buffer& __x)
     _NOEXCEPT_(!__alloc_traits::propagate_on_container_swap::value || __is_nothrow_swappable_v<__alloc_rr>) {
-  std::swap(__first_, __x.__first_);
-  std::swap(__begin_, __x.__begin_);
-  std::swap(__end_, __x.__end_);
-  std::swap(__cap_, __x.__cap_);
-  std::__swap_allocator(__alloc_, __x.__alloc_);
+  __base_type::swap(__x);
 }
 
-template <class _Tp, class _Allocator>
-_LIBCPP_CONSTEXPR_SINCE_CXX20 void __split_buffer<_Tp, _Allocator>::shrink_to_fit() _NOEXCEPT {
+template <class _Tp, class _Allocator, template <class, class, class> class _Layout>
+_LIBCPP_CONSTEXPR_SINCE_CXX20 void __split_buffer<_Tp, _Allocator, _Layout>::shrink_to_fit() _NOEXCEPT {
   if (capacity() > size()) {
 #if _LIBCPP_HAS_EXCEPTIONS
     try {
 #endif // _LIBCPP_HAS_EXCEPTIONS
-      __split_buffer<value_type, __alloc_rr&> __t(size(), 0, __alloc_);
+      __split_buffer<value_type, __alloc_rr&, _Layout> __t(size(), 0, __get_allocator());
       if (__t.capacity() < capacity()) {
-        __t.__construct_at_end(move_iterator<pointer>(__begin_), move_iterator<pointer>(__end_));
-        __t.__end_ = __t.__begin_ + (__end_ - __begin_);
-        std::swap(__first_, __t.__first_);
-        std::swap(__begin_, __t.__begin_);
-        std::swap(__end_, __t.__end_);
-        std::swap(__cap_, __t.__cap_);
+        __t.__construct_at_end(move_iterator<pointer>(begin()), move_iterator<pointer>(end()));
+        __t.__set_sentinel(size());
+        __swap_without_allocator(__t);
       }
 #if _LIBCPP_HAS_EXCEPTIONS
     } catch (...) {
@@ -428,55 +809,56 @@ _LIBCPP_CONSTEXPR_SINCE_CXX20 void __split_buffer<_Tp, _Allocator>::shrink_to_fi
   }
 }
 
-template <class _Tp, class _Allocator>
+template <class _Tp, class _Allocator, template <class, class, class> class _Layout>
 template <class... _Args>
-_LIBCPP_CONSTEXPR_SINCE_CXX20 void __split_buffer<_Tp, _Allocator>::emplace_front(_Args&&... __args) {
-  if (__begin_ == __first_) {
-    if (__end_ < __cap_) {
-      difference_type __d = __cap_ - __end_;
+_LIBCPP_CONSTEXPR_SINCE_CXX20 void __split_buffer<_Tp, _Allocator, _Layout>::emplace_front(_Args&&... __args) {
+  if (__front_spare() == 0) {
+    pointer __end = end();
+    if (__back_spare() > 0) {
+      // The elements are pressed up against the front of the buffer: we need to move them back a
+      // little bit to make `emplace_front` have amortised O(1) complexity.
+      difference_type __d = __back_spare();
       __d                 = (__d + 1) / 2;
-      __begin_            = std::move_backward(__begin_, __end_, __end_ + __d);
-      __end_ += __d;
+      auto __new_end      = __end + __d;
+      __set_valid_range(std::move_backward(begin(), __end, __new_end), __new_end);
     } else {
-      size_type __c = std::max<size_type>(2 * static_cast<size_type>(__cap_ - __first_), 1);
-      __split_buffer<value_type, __alloc_rr&> __t(__c, (__c + 3) / 4, __alloc_);
-      __t.__construct_at_end(move_iterator<pointer>(__begin_), move_iterator<pointer>(__end_));
-      std::swap(__first_, __t.__first_);
-      std::swap(__begin_, __t.__begin_);
-      std::swap(__end_, __t.__end_);
-      std::swap(__cap_, __t.__cap_);
+      size_type __c = std::max<size_type>(2 * capacity(), 1);
+      __split_buffer<value_type, __alloc_rr&, _Layout> __t(__c, (__c + 3) / 4, __get_allocator());
+      __t.__construct_at_end(move_iterator<pointer>(begin()), move_iterator<pointer>(__end));
+      __base_type::__swap_without_allocator(__t);
     }
   }
-  __alloc_traits::construct(__alloc_, std::__to_address(__begin_ - 1), std::forward<_Args>(__args)...);
-  --__begin_;
+
+  __alloc_traits::construct(__get_allocator(), std::__to_address(begin() - 1), std::forward<_Args>(__args)...);
+  __set_valid_range(begin() - 1, size() + 1);
 }
 
-template <class _Tp, class _Allocator>
+template <class _Tp, class _Allocator, template <class, class, class> class _Layout>
 template <class... _Args>
-_LIBCPP_CONSTEXPR_SINCE_CXX20 void __split_buffer<_Tp, _Allocator>::emplace_back(_Args&&... __args) {
-  if (__end_ == __cap_) {
-    if (__begin_ > __first_) {
-      difference_type __d = __begin_ - __first_;
+_LIBCPP_CONSTEXPR_SINCE_CXX20 void __split_buffer<_Tp, _Allocator, _Layout>::emplace_back(_Args&&... __args) {
+  pointer __end = end();
+  if (__back_spare() == 0) {
+    if (__front_spare() > 0) {
+      difference_type __d = __front_spare();
       __d                 = (__d + 1) / 2;
-      __end_              = std::move(__begin_, __end_, __begin_ - __d);
-      __begin_ -= __d;
+      __end               = std::move(begin(), __end, begin() - __d);
+      __set_valid_range(begin() - __d, __end);
     } else {
-      size_type __c = std::max<size_type>(2 * static_cast<size_type>(__cap_ - __first_), 1);
-      __split_buffer<value_type, __alloc_rr&> __t(__c, __c / 4, __alloc_);
-      __t.__construct_at_end(move_iterator<pointer>(__begin_), move_iterator<pointer>(__end_));
-      std::swap(__first_, __t.__first_);
-      std::swap(__begin_, __t.__begin_);
-      std::swap(__end_, __t.__end_);
-      std::swap(__cap_, __t.__cap_);
+      size_type __c = std::max<size_type>(2 * capacity(), 1);
+      __split_buffer<value_type, __alloc_rr&, _Layout> __t(__c, __c / 4, __get_allocator());
+      __t.__construct_at_end(move_iterator<pointer>(begin()), move_iterator<pointer>(__end));
+      __base_type::__swap_without_allocator(__t);
     }
   }
-  __alloc_traits::construct(__alloc_, std::__to_address(__end_), std::forward<_Args>(__args)...);
-  ++__end_;
+
+  __alloc_traits::construct(__get_allocator(), std::__to_address(__end), std::forward<_Args>(__args)...);
+  __set_sentinel(++__end);
 }
 
-template <class _Tp, class _Allocator>
+template <class _Tp, class _Allocator, template <class, class, class> class _Layout>
 _LIBCPP_CONSTEXPR_SINCE_CXX20 inline _LIBCPP_HIDE_FROM_ABI void
-swap(__split_buffer<_Tp, _Allocator>& __x, __split_buffer<_Tp, _Allocator>& __y) _NOEXCEPT_(_NOEXCEPT_(__x.swap(__y))) {
+swap(__split_buffer<_Tp, _Allocator, _Layout>& __x, __split_buffer<_Tp, _Allocator, _Layout>& __y)
+    _NOEXCEPT_(_NOEXCEPT_(__x.swap(__y))) {
   __x.swap(__y);
 }
 
diff --git a/libcxx/include/__type_traits/desugars_to.h b/libcxx/include/__type_traits/desugars_to.h
index b67baae..029b3c63 100644
--- a/libcxx/include/__type_traits/desugars_to.h
+++ b/libcxx/include/__type_traits/desugars_to.h
@@ -10,6 +10,7 @@
 #define _LIBCPP___TYPE_TRAITS_DESUGARS_TO_H
 
 #include <__config>
+#include <__type_traits/integral_constant.h>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #  pragma GCC system_header
@@ -64,6 +65,9 @@ template <class _CanonicalTag, class _Operation, class... _Args>
 inline const bool __desugars_to_v<_CanonicalTag, _Operation&&, _Args...> =
     __desugars_to_v<_CanonicalTag, _Operation, _Args...>;
 
+template <class _CanonicalTag, class _Operation, class... _Args>
+struct __desugars_to : integral_constant<bool, __desugars_to_v<_CanonicalTag, _Operation, _Args...> > {};
+
 _LIBCPP_END_NAMESPACE_STD
 
 #endif // _LIBCPP___TYPE_TRAITS_DESUGARS_TO_H
diff --git a/libcxx/include/__utility/default_three_way_comparator.h b/libcxx/include/__utility/default_three_way_comparator.h
index ce423c6..438ab55 100644
--- a/libcxx/include/__utility/default_three_way_comparator.h
+++ b/libcxx/include/__utility/default_three_way_comparator.h
@@ -27,9 +27,11 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 template <class _LHS, class _RHS, class = void>
 struct __default_three_way_comparator;
 
-template <class _Tp>
-struct __default_three_way_comparator<_Tp, _Tp, __enable_if_t<is_arithmetic<_Tp>::value> > {
-  _LIBCPP_HIDE_FROM_ABI static int operator()(_Tp __lhs, _Tp __rhs) {
+template <class _LHS, class _RHS>
+struct __default_three_way_comparator<_LHS,
+                                      _RHS,
+                                      __enable_if_t<is_arithmetic<_LHS>::value && is_arithmetic<_RHS>::value> > {
+  _LIBCPP_HIDE_FROM_ABI static int operator()(_LHS __lhs, _RHS __rhs) {
     if (__lhs < __rhs)
       return -1;
     if (__lhs > __rhs)
@@ -38,12 +40,30 @@ struct __default_three_way_comparator<_Tp, _Tp, __enable_if_t<is_arithmetic<_Tp>
   }
 };
 
+#if _LIBCPP_STD_VER >= 20 && __has_builtin(__builtin_lt_synthesises_from_spaceship)
+template <class _LHS, class _RHS>
+struct __default_three_way_comparator<
+    _LHS,
+    _RHS,
+    __enable_if_t<!(is_arithmetic<_LHS>::value && is_arithmetic<_RHS>::value) &&
+                  __builtin_lt_synthesises_from_spaceship(const _LHS&, const _RHS&)>> {
+  _LIBCPP_HIDE_FROM_ABI static int operator()(const _LHS& __lhs, const _RHS& __rhs) {
+    auto __res = __lhs <=> __rhs;
+    if (__res < 0)
+      return -1;
+    if (__res > 0)
+      return 1;
+    return 0;
+  }
+};
+#endif
+
 template <class _LHS, class _RHS, bool = true>
-inline const bool __has_default_three_way_comparator_v = false;
+struct __has_default_three_way_comparator : false_type {};
 
 template <class _LHS, class _RHS>
-inline const bool
-    __has_default_three_way_comparator_v< _LHS, _RHS, sizeof(__default_three_way_comparator<_LHS, _RHS>) >= 0> = true;
+struct __has_default_three_way_comparator<_LHS, _RHS, sizeof(__default_three_way_comparator<_LHS, _RHS>) >= 0>
+    : true_type {};
 
 _LIBCPP_END_NAMESPACE_STD
 
diff --git a/libcxx/include/__utility/lazy_synth_three_way_comparator.h b/libcxx/include/__utility/lazy_synth_three_way_comparator.h
index ca98845..9105d05 100644
--- a/libcxx/include/__utility/lazy_synth_three_way_comparator.h
+++ b/libcxx/include/__utility/lazy_synth_three_way_comparator.h
@@ -10,6 +10,7 @@
 #define _LIBCPP___UTILITY_LAZY_SYNTH_THREE_WAY_COMPARATOR_H
 
 #include <__config>
+#include <__type_traits/conjunction.h>
 #include <__type_traits/desugars_to.h>
 #include <__type_traits/enable_if.h>
 #include <__utility/default_three_way_comparator.h>
@@ -69,11 +70,12 @@ struct __eager_compare_result {
 };
 
 template <class _Comparator, class _LHS, class _RHS>
-struct __lazy_synth_three_way_comparator<_Comparator,
-                                         _LHS,
-                                         _RHS,
-                                         __enable_if_t<__desugars_to_v<__less_tag, _Comparator, _LHS, _RHS> &&
-                                                       __has_default_three_way_comparator_v<_LHS, _RHS> > > {
+struct __lazy_synth_three_way_comparator<
+    _Comparator,
+    _LHS,
+    _RHS,
+    __enable_if_t<_And<__desugars_to<__less_tag, _Comparator, _LHS, _RHS>,
+                       __has_default_three_way_comparator<_LHS, _RHS> >::value> > {
   // This lifetimebound annotation is technically incorrect, but other specializations actually capture the lifetime of
   // the comparator.
   _LIBCPP_HIDE_FROM_ABI __lazy_synth_three_way_comparator(_LIBCPP_CTOR_LIFETIMEBOUND const _Comparator&) {}
diff --git a/libcxx/include/__vector/vector.h b/libcxx/include/__vector/vector.h
index 4307e78..5e6572b 100644
--- a/libcxx/include/__vector/vector.h
+++ b/libcxx/include/__vector/vector.h
@@ -86,6 +86,9 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 
 template <class _Tp, class _Allocator /* = allocator<_Tp> */>
 class vector {
+  template <class _Up, class _Alloc>
+  using __split_buffer _LIBCPP_NODEBUG = std::__split_buffer<_Up, _Alloc, __split_buffer_pointer_layout>;
+
 public:
   //
   // Types
@@ -820,6 +823,24 @@ private:
   __add_alignment_assumption(_Ptr __p) _NOEXCEPT {
     return __p;
   }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __swap_layouts(__split_buffer<_Tp, allocator_type&>& __sb) {
+    auto __vector_begin    = __begin_;
+    auto __vector_sentinel = __end_;
+    auto __vector_cap      = __cap_;
+
+    auto __sb_begin    = __sb.begin();
+    auto __sb_sentinel = __sb.__raw_sentinel();
+    auto __sb_cap      = __sb.__raw_capacity();
+
+    // TODO: replace with __set_valid_range and __set_capacity when vector supports it.
+    __begin_ = __sb_begin;
+    __end_   = __sb_sentinel;
+    __cap_   = __sb_cap;
+
+    __sb.__set_valid_range(__vector_begin, __vector_sentinel);
+    __sb.__set_capacity(__vector_cap);
+  }
 };
 
 #if _LIBCPP_STD_VER >= 17
@@ -850,15 +871,14 @@ template <class _Tp, class _Allocator>
 _LIBCPP_CONSTEXPR_SINCE_CXX20 void
 vector<_Tp, _Allocator>::__swap_out_circular_buffer(__split_buffer<value_type, allocator_type&>& __v) {
   __annotate_delete();
-  auto __new_begin = __v.__begin_ - (__end_ - __begin_);
+  auto __new_begin = __v.begin() - size();
   std::__uninitialized_allocator_relocate(
       this->__alloc_, std::__to_address(__begin_), std::__to_address(__end_), std::__to_address(__new_begin));
-  __v.__begin_ = __new_begin;
+  __v.__set_valid_range(__new_begin, __v.end());
   __end_       = __begin_; // All the objects have been destroyed by relocating them.
-  std::swap(this->__begin_, __v.__begin_);
-  std::swap(this->__end_, __v.__end_);
-  std::swap(this->__cap_, __v.__cap_);
-  __v.__first_ = __v.__begin_;
+
+  __swap_layouts(__v);
+  __v.__set_data(__v.begin());
   __annotate_new(size());
 }
 
@@ -870,25 +890,23 @@ template <class _Tp, class _Allocator>
 _LIBCPP_CONSTEXPR_SINCE_CXX20 typename vector<_Tp, _Allocator>::pointer
 vector<_Tp, _Allocator>::__swap_out_circular_buffer(__split_buffer<value_type, allocator_type&>& __v, pointer __p) {
   __annotate_delete();
-  pointer __ret = __v.__begin_;
+  pointer __ret = __v.begin();
 
   // Relocate [__p, __end_) first to avoid having a hole in [__begin_, __end_)
   // in case something in [__begin_, __p) throws.
   std::__uninitialized_allocator_relocate(
-      this->__alloc_, std::__to_address(__p), std::__to_address(__end_), std::__to_address(__v.__end_));
-  __v.__end_ += (__end_ - __p);
+      this->__alloc_, std::__to_address(__p), std::__to_address(__end_), std::__to_address(__v.end()));
+  auto __relocated_so_far = __end_ - __p;
+  __v.__set_sentinel(__v.end() + __relocated_so_far);
   __end_           = __p; // The objects in [__p, __end_) have been destroyed by relocating them.
-  auto __new_begin = __v.__begin_ - (__p - __begin_);
+  auto __new_begin = __v.begin() - (__p - __begin_);
 
   std::__uninitialized_allocator_relocate(
       this->__alloc_, std::__to_address(__begin_), std::__to_address(__p), std::__to_address(__new_begin));
-  __v.__begin_ = __new_begin;
-  __end_       = __begin_; // All the objects have been destroyed by relocating them.
-
-  std::swap(this->__begin_, __v.__begin_);
-  std::swap(this->__end_, __v.__end_);
-  std::swap(this->__cap_, __v.__cap_);
-  __v.__first_ = __v.__begin_;
+  __v.__set_valid_range(__new_begin, __v.end());
+  __end_ = __begin_; // All the objects have been destroyed by relocating them.
+  __swap_layouts(__v);
+  __v.__set_data(__v.begin());
   __annotate_new(size());
   return __ret;
 }
@@ -1136,8 +1154,9 @@ _LIBCPP_CONSTEXPR_SINCE_CXX20 typename vector<_Tp, _Allocator>::pointer
 vector<_Tp, _Allocator>::__emplace_back_slow_path(_Args&&... __args) {
   __split_buffer<value_type, allocator_type&> __v(__recommend(size() + 1), size(), this->__alloc_);
   //    __v.emplace_back(std::forward<_Args>(__args)...);
-  __alloc_traits::construct(this->__alloc_, std::__to_address(__v.__end_), std::forward<_Args>(__args)...);
-  __v.__end_++;
+  pointer __end = __v.end();
+  __alloc_traits::construct(this->__alloc_, std::__to_address(__end), std::forward<_Args>(__args)...);
+  __v.__set_sentinel(++__end);
   __swap_out_circular_buffer(__v);
   return this->__end_;
 }
@@ -1312,14 +1331,14 @@ vector<_Tp, _Allocator>::__insert_with_sentinel(const_iterator __position, _Inpu
     __split_buffer<value_type, allocator_type&> __merged(
         __recommend(size() + __v.size()), __off, __alloc_); // has `__off` positions available at the front
     std::__uninitialized_allocator_relocate(
-        __alloc_, std::__to_address(__old_last), std::__to_address(this->__end_), std::__to_address(__merged.__end_));
+        __alloc_, std::__to_address(__old_last), std::__to_address(this->__end_), std::__to_address(__merged.end()));
     __guard.__complete(); // Release the guard once objects in [__old_last_, __end_) have been successfully relocated.
-    __merged.__end_ += this->__end_ - __old_last;
+    __merged.__set_sentinel(__merged.end() + (this->__end_ - __old_last));
     this->__end_ = __old_last;
     std::__uninitialized_allocator_relocate(
-        __alloc_, std::__to_address(__v.__begin_), std::__to_address(__v.__end_), std::__to_address(__merged.__end_));
-    __merged.__end_ += __v.size();
-    __v.__end_ = __v.__begin_;
+        __alloc_, std::__to_address(__v.begin()), std::__to_address(__v.end()), std::__to_address(__merged.end()));
+    __merged.__set_sentinel(__merged.size() + __v.size());
+    __v.__set_sentinel(__v.begin());
     __p        = __swap_out_circular_buffer(__merged, __p);
   }
   return __make_iter(__p);
diff --git a/libcxx/include/deque b/libcxx/include/deque
index 395a107..98d1dbb 100644
--- a/libcxx/include/deque
+++ b/libcxx/include/deque
@@ -487,6 +487,9 @@ const _DiffType __deque_iterator<_ValueType, _Pointer, _Reference, _MapPointer,
 
 template <class _Tp, class _Allocator /*= allocator<_Tp>*/>
 class deque {
+  template <class _Up, class _Alloc>
+  using __split_buffer _LIBCPP_NODEBUG = std::__split_buffer<_Up, _Alloc, __split_buffer_pointer_layout>;
+
 public:
   // types:
 
@@ -1238,8 +1241,8 @@ private:
       clear();
       shrink_to_fit();
     }
-    __alloc()       = __c.__alloc();
-    __map_.__alloc_ = __c.__map_.__alloc_;
+    __alloc()                = __c.__alloc();
+    __map_.__get_allocator() = __c.__map_.__get_allocator();
   }
 
   _LIBCPP_HIDE_FROM_ABI void __copy_assign_alloc(const deque&, false_type) {}
@@ -1318,7 +1321,7 @@ deque<_Tp, _Allocator>::deque(const deque& __c)
     : __map_(__pointer_allocator(__alloc_traits::select_on_container_copy_construction(__c.__alloc()))),
       __start_(0),
       __size_(0),
-      __alloc_(__map_.__alloc_) {
+      __alloc_(__map_.__get_allocator()) {
   __annotate_new(0);
   __append(__c.begin(), __c.end());
 }
@@ -2071,7 +2074,7 @@ void deque<_Tp, _Allocator>::__add_front_capacity() {
   // Else need to allocate 1 buffer, *and* we need to reallocate __map_.
   else {
     __split_buffer<pointer, __pointer_allocator&> __buf(
-        std::max<size_type>(2 * __map_.capacity(), 1), 0, __map_.__alloc_);
+        std::max<size_type>(2 * __map_.capacity(), 1), 0, __map_.__get_allocator());
 
     typedef __allocator_destructor<_Allocator> _Dp;
     unique_ptr<pointer, _Dp> __hold(__alloc_traits::allocate(__a, __block_size), _Dp(__a, __block_size));
@@ -2080,10 +2083,7 @@ void deque<_Tp, _Allocator>::__add_front_capacity() {
 
     for (__map_pointer __i = __map_.begin(); __i != __map_.end(); ++__i)
       __buf.emplace_back(*__i);
-    std::swap(__map_.__first_, __buf.__first_);
-    std::swap(__map_.__begin_, __buf.__begin_);
-    std::swap(__map_.__end_, __buf.__end_);
-    std::swap(__map_.__cap_, __buf.__cap_);
+    __map_.__swap_without_allocator(__buf);
     __start_ = __map_.size() == 1 ? __block_size / 2 : __start_ + __block_size;
   }
   __annotate_whole_block(0, __asan_poison);
@@ -2134,7 +2134,7 @@ void deque<_Tp, _Allocator>::__add_front_capacity(size_type __n) {
   else {
     size_type __ds = (__nb + __back_capacity) * __block_size - __map_.empty();
     __split_buffer<pointer, __pointer_allocator&> __buf(
-        std::max<size_type>(2 * __map_.capacity(), __nb + __map_.size()), 0, __map_.__alloc_);
+        std::max<size_type>(2 * __map_.capacity(), __nb + __map_.size()), 0, __map_.__get_allocator());
 #  if _LIBCPP_HAS_EXCEPTIONS
     try {
 #  endif // _LIBCPP_HAS_EXCEPTIONS
@@ -2157,10 +2157,7 @@ void deque<_Tp, _Allocator>::__add_front_capacity(size_type __n) {
     }
     for (__map_pointer __i = __map_.begin(); __i != __map_.end(); ++__i)
       __buf.emplace_back(*__i);
-    std::swap(__map_.__first_, __buf.__first_);
-    std::swap(__map_.__begin_, __buf.__begin_);
-    std::swap(__map_.__end_, __buf.__end_);
-    std::swap(__map_.__cap_, __buf.__cap_);
+    __map_.__swap_without_allocator(__buf);
     __start_ += __ds;
   }
 }
@@ -2194,7 +2191,7 @@ void deque<_Tp, _Allocator>::__add_back_capacity() {
   // Else need to allocate 1 buffer, *and* we need to reallocate __map_.
   else {
     __split_buffer<pointer, __pointer_allocator&> __buf(
-        std::max<size_type>(2 * __map_.capacity(), 1), __map_.size(), __map_.__alloc_);
+        std::max<size_type>(2 * __map_.capacity(), 1), __map_.size(), __map_.__get_allocator());
 
     typedef __allocator_destructor<_Allocator> _Dp;
     unique_ptr<pointer, _Dp> __hold(__alloc_traits::allocate(__a, __block_size), _Dp(__a, __block_size));
@@ -2203,10 +2200,7 @@ void deque<_Tp, _Allocator>::__add_back_capacity() {
 
     for (__map_pointer __i = __map_.end(); __i != __map_.begin();)
       __buf.emplace_front(*--__i);
-    std::swap(__map_.__first_, __buf.__first_);
-    std::swap(__map_.__begin_, __buf.__begin_);
-    std::swap(__map_.__end_, __buf.__end_);
-    std::swap(__map_.__cap_, __buf.__cap_);
+    __map_.__swap_without_allocator(__buf);
     __annotate_whole_block(__map_.size() - 1, __asan_poison);
   }
 }
@@ -2259,7 +2253,7 @@ void deque<_Tp, _Allocator>::__add_back_capacity(size_type __n) {
     __split_buffer<pointer, __pointer_allocator&> __buf(
         std::max<size_type>(2 * __map_.capacity(), __nb + __map_.size()),
         __map_.size() - __front_capacity,
-        __map_.__alloc_);
+        __map_.__get_allocator());
 #  if _LIBCPP_HAS_EXCEPTIONS
     try {
 #  endif // _LIBCPP_HAS_EXCEPTIONS
@@ -2282,10 +2276,7 @@ void deque<_Tp, _Allocator>::__add_back_capacity(size_type __n) {
     }
     for (__map_pointer __i = __map_.end(); __i != __map_.begin();)
       __buf.emplace_front(*--__i);
-    std::swap(__map_.__first_, __buf.__first_);
-    std::swap(__map_.__begin_, __buf.__begin_);
-    std::swap(__map_.__end_, __buf.__end_);
-    std::swap(__map_.__cap_, __buf.__cap_);
+    __map_.__swap_without_allocator(__buf);
     __start_ -= __ds;
   }
 }
diff --git a/libcxx/include/string b/libcxx/include/string
index 0abdfeb..f13a764 100644
--- a/libcxx/include/string
+++ b/libcxx/include/string
@@ -2521,6 +2521,7 @@ _LIBCPP_STRING_V1_EXTERN_TEMPLATE_LIST(_LIBCPP_DECLARE, wchar_t)
 #  endif
 #  undef _LIBCPP_DECLARE
 
+#  if _LIBCPP_STD_VER <= 17 || !__has_builtin(__builtin_lt_synthesises_from_spaceship)
 template <class _CharT, class _Traits, class _Alloc>
 struct __default_three_way_comparator<basic_string<_CharT, _Traits, _Alloc>, basic_string<_CharT, _Traits, _Alloc> > {
   using __string_t _LIBCPP_NODEBUG = basic_string<_CharT, _Traits, _Alloc>;
@@ -2533,6 +2534,7 @@ struct __default_three_way_comparator<basic_string<_CharT, _Traits, _Alloc>, bas
     return __ret;
   }
 };
+#  endif
 
 #  if _LIBCPP_STD_VER >= 17
 template <class _InputIterator,
diff --git a/libcxx/test/libcxx/type_traits/is_replaceable.compile.pass.cpp b/libcxx/test/libcxx/type_traits/is_replaceable.compile.pass.cpp
index 546240a..c04e944 100644
--- a/libcxx/test/libcxx/type_traits/is_replaceable.compile.pass.cpp
+++ b/libcxx/test/libcxx/type_traits/is_replaceable.compile.pass.cpp
@@ -133,16 +133,58 @@ static_assert(!std::__is_replaceable<CustomMoveAssignment>::value, "");
 // ----------------------
 
 // __split_buffer
-static_assert(std::__is_replaceable<std::__split_buffer<int> >::value, "");
-static_assert(std::__is_replaceable<std::__split_buffer<NotTriviallyCopyable> >::value, "");
-static_assert(!std::__is_replaceable<std::__split_buffer<int, NonPropagatingStatefulCopyAssignAlloc<int> > >::value,
-              "");
-static_assert(!std::__is_replaceable<std::__split_buffer<int, NonPropagatingStatefulMoveAssignAlloc<int> > >::value,
-              "");
-static_assert(std::__is_replaceable<std::__split_buffer<int, NonPropagatingStatelessCopyAssignAlloc<int> > >::value,
+static_assert(
+    std::__is_replaceable<std::__split_buffer<int, std::allocator<int>, std::__split_buffer_pointer_layout> >::value,
+    "");
+static_assert(std::__is_replaceable<std::__split_buffer<NotTriviallyCopyable,
+                                                        std::allocator<NotTriviallyCopyable>,
+                                                        std::__split_buffer_pointer_layout> >::value,
               "");
-static_assert(std::__is_replaceable<std::__split_buffer<int, NonPropagatingStatelessMoveAssignAlloc<int> > >::value,
+static_assert(
+    !std::__is_replaceable<
+        std::__split_buffer<int, NonPropagatingStatefulCopyAssignAlloc<int>, std::__split_buffer_pointer_layout > >::
+        value,
+    "");
+static_assert(
+    !std::__is_replaceable<
+        std::__split_buffer<int, NonPropagatingStatefulMoveAssignAlloc<int>, std::__split_buffer_pointer_layout > >::
+        value,
+    "");
+static_assert(
+    std::__is_replaceable<
+        std::__split_buffer<int, NonPropagatingStatelessCopyAssignAlloc<int>, std::__split_buffer_pointer_layout > >::
+        value,
+    "");
+static_assert(
+    std::__is_replaceable<
+        std::__split_buffer<int, NonPropagatingStatelessMoveAssignAlloc<int>, std::__split_buffer_pointer_layout > >::
+        value,
+    "");
+
+static_assert(
+    std::__is_replaceable<std::__split_buffer<int, std::allocator<int>, std::__split_buffer_size_layout> >::value, "");
+static_assert(std::__is_replaceable<std::__split_buffer<NotTriviallyCopyable,
+                                                        std::allocator<NotTriviallyCopyable>,
+                                                        std::__split_buffer_size_layout> >::value,
               "");
+static_assert(
+    !std::__is_replaceable<
+        std::__split_buffer<int, NonPropagatingStatefulCopyAssignAlloc<int>, std::__split_buffer_size_layout > >::value,
+    "");
+static_assert(
+    !std::__is_replaceable<
+        std::__split_buffer<int, NonPropagatingStatefulMoveAssignAlloc<int>, std::__split_buffer_size_layout > >::value,
+    "");
+static_assert(
+    std::__is_replaceable<
+        std::__split_buffer<int, NonPropagatingStatelessCopyAssignAlloc<int>, std::__split_buffer_size_layout > >::
+        value,
+    "");
+static_assert(
+    std::__is_replaceable<
+        std::__split_buffer<int, NonPropagatingStatelessMoveAssignAlloc<int>, std::__split_buffer_size_layout > >::
+        value,
+    "");
 
 // standard library types
 // ----------------------
diff --git a/libcxx/test/libcxx/type_traits/is_trivially_relocatable.compile.pass.cpp b/libcxx/test/libcxx/type_traits/is_trivially_relocatable.compile.pass.cpp
index c462672..10889eb 100644
--- a/libcxx/test/libcxx/type_traits/is_trivially_relocatable.compile.pass.cpp
+++ b/libcxx/test/libcxx/type_traits/is_trivially_relocatable.compile.pass.cpp
@@ -68,9 +68,27 @@ static_assert(!std::__libcpp_is_trivially_relocatable<NonTrivialDestructor>::val
 // ----------------------
 
 // __split_buffer
-static_assert(std::__libcpp_is_trivially_relocatable<std::__split_buffer<int> >::value, "");
-static_assert(std::__libcpp_is_trivially_relocatable<std::__split_buffer<NotTriviallyCopyable> >::value, "");
-static_assert(!std::__libcpp_is_trivially_relocatable<std::__split_buffer<int, test_allocator<int> > >::value, "");
+static_assert(std::__libcpp_is_trivially_relocatable<
+                  std::__split_buffer<int, std::allocator<int>, std::__split_buffer_pointer_layout> >::value,
+              "");
+static_assert(std::__libcpp_is_trivially_relocatable<std::__split_buffer<NotTriviallyCopyable,
+                                                                         std::allocator<NotTriviallyCopyable>,
+                                                                         std::__split_buffer_pointer_layout> >::value,
+              "");
+static_assert(!std::__libcpp_is_trivially_relocatable<
+                  std::__split_buffer<int, test_allocator<int>, std::__split_buffer_pointer_layout > >::value,
+              "");
+
+static_assert(std::__libcpp_is_trivially_relocatable<
+                  std::__split_buffer<int, std::allocator<int>, std::__split_buffer_size_layout> >::value,
+              "");
+static_assert(std::__libcpp_is_trivially_relocatable<std::__split_buffer<NotTriviallyCopyable,
+                                                                         std::allocator<NotTriviallyCopyable>,
+                                                                         std::__split_buffer_size_layout> >::value,
+              "");
+static_assert(!std::__libcpp_is_trivially_relocatable<
+                  std::__split_buffer<int, test_allocator<int>, std::__split_buffer_size_layout > >::value,
+              "");
 
 // standard library types
 // ----------------------
diff --git a/libcxx/test/libcxx/utilities/utility/has_default_three_way.compile.pass.cpp b/libcxx/test/libcxx/utilities/utility/has_default_three_way.compile.pass.cpp
new file mode 100644
index 0000000..42b4855
--- /dev/null
+++ b/libcxx/test/libcxx/utilities/utility/has_default_three_way.compile.pass.cpp
@@ -0,0 +1,38 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17
+
+#include <__utility/default_three_way_comparator.h>
+#include <string>
+#include <vector>
+
+static_assert(std::__has_default_three_way_comparator<int, int>::value);
+static_assert(std::__has_default_three_way_comparator<int, long>::value);
+static_assert(std::__has_default_three_way_comparator<long, int>::value);
+static_assert(std::__has_default_three_way_comparator<long, long>::value);
+static_assert(std::__has_default_three_way_comparator<std::string, std::string>::value);
+
+#if __has_builtin(__builtin_lt_synthesises_from_spaceship)
+static_assert(std::__has_default_three_way_comparator<const std::string&, const std::string&>::value);
+static_assert(std::__has_default_three_way_comparator<const std::string&, const std::string_view&>::value);
+static_assert(std::__has_default_three_way_comparator<std::string, std::string_view>::value);
+static_assert(std::__has_default_three_way_comparator<const std::string&, const char*>::value);
+static_assert(std::__has_default_three_way_comparator<std::string, const char*>::value);
+static_assert(!std::__has_default_three_way_comparator<const std::string&, const wchar_t*>::value);
+
+static_assert(std::__has_default_three_way_comparator<const std::vector<int>&, const std::vector<int>&>::value);
+
+struct MyStruct {
+  int i;
+
+  friend auto operator<=>(MyStruct, MyStruct) = default;
+};
+
+static_assert(std::__has_default_three_way_comparator<const MyStruct&, const MyStruct&>::value);
+#endif
diff --git a/libcxx/test/std/input.output/file.streams/fstreams/fstream.cons/default.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/fstream.cons/default.pass.cpp
index d2efaf1..d15276b 100644
--- a/libcxx/test/std/input.output/file.streams/fstreams/fstream.cons/default.pass.cpp
+++ b/libcxx/test/std/input.output/file.streams/fstreams/fstream.cons/default.pass.cpp
@@ -13,8 +13,6 @@
 
 // basic_fstream();
 
-// XFAIL: FROZEN-CXX03-HEADERS-FIXME
-
 #include <fstream>
 
 #include "test_macros.h"
diff --git a/libcxx/test/std/input.output/file.streams/fstreams/fstream.cons/move.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/fstream.cons/move.pass.cpp
index 1534878..95a04bd 100644
--- a/libcxx/test/std/input.output/file.streams/fstreams/fstream.cons/move.pass.cpp
+++ b/libcxx/test/std/input.output/file.streams/fstreams/fstream.cons/move.pass.cpp
@@ -13,8 +13,6 @@
 
 // basic_fstream(basic_fstream&& rhs);
 
-// XFAIL: FROZEN-CXX03-HEADERS-FIXME
-
 #include <fstream>
 #include <cassert>
 
diff --git a/libcxx/test/std/input.output/file.streams/fstreams/fstream.cons/pointer.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/fstream.cons/pointer.pass.cpp
index ca22624..2e0ebcd 100644
--- a/libcxx/test/std/input.output/file.streams/fstreams/fstream.cons/pointer.pass.cpp
+++ b/libcxx/test/std/input.output/file.streams/fstreams/fstream.cons/pointer.pass.cpp
@@ -18,8 +18,6 @@
 
 // XFAIL: LIBCXX-AIX-FIXME
 
-// XFAIL: FROZEN-CXX03-HEADERS-FIXME
-
 #include <fstream>
 #include <cassert>
 
diff --git a/libcxx/test/std/input.output/file.streams/fstreams/fstream.cons/string.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/fstream.cons/string.pass.cpp
index 28cefc7..ca0921a 100644
--- a/libcxx/test/std/input.output/file.streams/fstreams/fstream.cons/string.pass.cpp
+++ b/libcxx/test/std/input.output/file.streams/fstreams/fstream.cons/string.pass.cpp
@@ -13,8 +13,6 @@
 
 // explicit basic_fstream(const string& s, ios_base::openmode mode = ios_base::in|ios_base::out);
 
-// XFAIL: FROZEN-CXX03-HEADERS-FIXME
-
 #include <fstream>
 #include <cassert>
 
diff --git a/libcxx/test/std/input.output/file.streams/fstreams/ifstream.cons/default.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/ifstream.cons/default.pass.cpp
index 256380d..70d1efc 100644
--- a/libcxx/test/std/input.output/file.streams/fstreams/ifstream.cons/default.pass.cpp
+++ b/libcxx/test/std/input.output/file.streams/fstreams/ifstream.cons/default.pass.cpp
@@ -13,8 +13,6 @@
 
 // basic_ifstream();
 
-// XFAIL: FROZEN-CXX03-HEADERS-FIXME
-
 #include <fstream>
 
 #include "test_macros.h"
diff --git a/libcxx/test/std/input.output/file.streams/fstreams/ifstream.cons/move.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/ifstream.cons/move.pass.cpp
index c8be388..81ec800 100644
--- a/libcxx/test/std/input.output/file.streams/fstreams/ifstream.cons/move.pass.cpp
+++ b/libcxx/test/std/input.output/file.streams/fstreams/ifstream.cons/move.pass.cpp
@@ -8,8 +8,6 @@
 
 // FILE_DEPENDENCIES: test.dat
 
-// XFAIL: FROZEN-CXX03-HEADERS-FIXME
-
 // <fstream>
 
 // template <class charT, class traits = char_traits<charT> >
diff --git a/libcxx/test/std/input.output/file.streams/fstreams/ifstream.cons/pointer.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/ifstream.cons/pointer.pass.cpp
index 711ab2a..6bbe6f1 100644
--- a/libcxx/test/std/input.output/file.streams/fstreams/ifstream.cons/pointer.pass.cpp
+++ b/libcxx/test/std/input.output/file.streams/fstreams/ifstream.cons/pointer.pass.cpp
@@ -8,8 +8,6 @@
 
 // FILE_DEPENDENCIES: test.dat
 
-// XFAIL: FROZEN-CXX03-HEADERS-FIXME
-
 // <fstream>
 
 // template <class charT, class traits = char_traits<charT> >
diff --git a/libcxx/test/std/input.output/file.streams/fstreams/ifstream.cons/string.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/ifstream.cons/string.pass.cpp
index d4bbb3c..e1a9b53 100644
--- a/libcxx/test/std/input.output/file.streams/fstreams/ifstream.cons/string.pass.cpp
+++ b/libcxx/test/std/input.output/file.streams/fstreams/ifstream.cons/string.pass.cpp
@@ -15,8 +15,6 @@
 
 // explicit basic_ifstream(const string& s, ios_base::openmode mode = ios_base::in);
 
-// XFAIL: FROZEN-CXX03-HEADERS-FIXME
-
 #include <fstream>
 #include <cassert>
 
diff --git a/libcxx/test/std/input.output/file.streams/fstreams/ofstream.cons/default.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/ofstream.cons/default.pass.cpp
index 4cda1db..a7b0918 100644
--- a/libcxx/test/std/input.output/file.streams/fstreams/ofstream.cons/default.pass.cpp
+++ b/libcxx/test/std/input.output/file.streams/fstreams/ofstream.cons/default.pass.cpp
@@ -13,8 +13,6 @@
 
 // basic_ofstream();
 
-// XFAIL: FROZEN-CXX03-HEADERS-FIXME
-
 #include <fstream>
 
 #include "test_macros.h"
diff --git a/libcxx/test/std/input.output/file.streams/fstreams/ofstream.cons/move.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/ofstream.cons/move.pass.cpp
index 501a4c9..ec02fa2 100644
--- a/libcxx/test/std/input.output/file.streams/fstreams/ofstream.cons/move.pass.cpp
+++ b/libcxx/test/std/input.output/file.streams/fstreams/ofstream.cons/move.pass.cpp
@@ -13,8 +13,6 @@
 
 // basic_ofstream(basic_ofstream&& rhs);
 
-// XFAIL: FROZEN-CXX03-HEADERS-FIXME
-
 #include <fstream>
 #include <cassert>
 
diff --git a/libcxx/test/std/input.output/file.streams/fstreams/ofstream.cons/pointer.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/ofstream.cons/pointer.pass.cpp
index 1b5a55d..fbb03f1 100644
--- a/libcxx/test/std/input.output/file.streams/fstreams/ofstream.cons/pointer.pass.cpp
+++ b/libcxx/test/std/input.output/file.streams/fstreams/ofstream.cons/pointer.pass.cpp
@@ -18,8 +18,6 @@
 
 // XFAIL: LIBCXX-AIX-FIXME
 
-// XFAIL: FROZEN-CXX03-HEADERS-FIXME
-
 #include <fstream>
 #include <cassert>
 #include <ios>
diff --git a/libcxx/test/std/input.output/file.streams/fstreams/ofstream.cons/string.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/ofstream.cons/string.pass.cpp
index bb18c88b..33a7e9b 100644
--- a/libcxx/test/std/input.output/file.streams/fstreams/ofstream.cons/string.pass.cpp
+++ b/libcxx/test/std/input.output/file.streams/fstreams/ofstream.cons/string.pass.cpp
@@ -13,8 +13,6 @@
 
 // explicit basic_ofstream(const string& s, ios_base::openmode mode = ios_base::out);
 
-// XFAIL: FROZEN-CXX03-HEADERS-FIXME
-
 #include <fstream>
 #include <cassert>
 #include <ios>
diff --git a/libcxx/test/std/input.output/iostreams.base/ios/basic.ios.members/copyfmt.pass.cpp b/libcxx/test/std/input.output/iostreams.base/ios/basic.ios.members/copyfmt.pass.cpp
index 7689221..d78f7df 100644
--- a/libcxx/test/std/input.output/iostreams.base/ios/basic.ios.members/copyfmt.pass.cpp
+++ b/libcxx/test/std/input.output/iostreams.base/ios/basic.ios.members/copyfmt.pass.cpp
@@ -15,8 +15,6 @@
 
 // basic_ios& copyfmt(const basic_ios& rhs);
 
-// XFAIL: FROZEN-CXX03-HEADERS-FIXME
-
 #include <ios>
 #include <memory>
 #include <streambuf>
diff --git a/libcxx/test/std/input.output/string.streams/istringstream/istringstream.cons/default.pass.cpp b/libcxx/test/std/input.output/string.streams/istringstream/istringstream.cons/default.pass.cpp
index 8cd23d4..8c73df4 100644
--- a/libcxx/test/std/input.output/string.streams/istringstream/istringstream.cons/default.pass.cpp
+++ b/libcxx/test/std/input.output/string.streams/istringstream/istringstream.cons/default.pass.cpp
@@ -15,8 +15,6 @@
 // basic_istringstream() : basic_istringstream(ios_base::in) {}           // C++20
 // explicit basic_istringstream(ios_base::openmode which);                // C++20
 
-// XFAIL: FROZEN-CXX03-HEADERS-FIXME
-
 #include <sstream>
 #include <cassert>
 
diff --git a/libcxx/test/std/input.output/string.streams/istringstream/istringstream.cons/move.pass.cpp b/libcxx/test/std/input.output/string.streams/istringstream/istringstream.cons/move.pass.cpp
index 1af3304..00ac7cc 100644
--- a/libcxx/test/std/input.output/string.streams/istringstream/istringstream.cons/move.pass.cpp
+++ b/libcxx/test/std/input.output/string.streams/istringstream/istringstream.cons/move.pass.cpp
@@ -13,8 +13,6 @@
 
 // basic_istringstream(basic_istringstream&& rhs);
 
-// XFAIL: FROZEN-CXX03-HEADERS-FIXME
-
 #include <sstream>
 #include <cassert>
 
diff --git a/libcxx/test/std/input.output/string.streams/istringstream/istringstream.cons/string.pass.cpp b/libcxx/test/std/input.output/string.streams/istringstream/istringstream.cons/string.pass.cpp
index 7755dd9..4a5965e7 100644
--- a/libcxx/test/std/input.output/string.streams/istringstream/istringstream.cons/string.pass.cpp
+++ b/libcxx/test/std/input.output/string.streams/istringstream/istringstream.cons/string.pass.cpp
@@ -14,8 +14,6 @@
 // explicit basic_istringstream(const basic_string<charT,traits,allocator>& str,
 //                              ios_base::openmode which = ios_base::in);
 
-// XFAIL: FROZEN-CXX03-HEADERS-FIXME
-
 #include <sstream>
 #include <cassert>
 
diff --git a/libcxx/test/std/input.output/string.streams/ostringstream/ostringstream.cons/default.pass.cpp b/libcxx/test/std/input.output/string.streams/ostringstream/ostringstream.cons/default.pass.cpp
index eb248a7..a6b98a4 100644
--- a/libcxx/test/std/input.output/string.streams/ostringstream/ostringstream.cons/default.pass.cpp
+++ b/libcxx/test/std/input.output/string.streams/ostringstream/ostringstream.cons/default.pass.cpp
@@ -15,8 +15,6 @@
 // basic_ostringstream() : basic_ostringstream(ios_base::out) {}           // C++20
 // explicit basic_ostringstream(ios_base::openmode which);                 // C++20
 
-// XFAIL: FROZEN-CXX03-HEADERS-FIXME
-
 #include <sstream>
 #include <cassert>
 
diff --git a/libcxx/test/std/input.output/string.streams/ostringstream/ostringstream.cons/move.pass.cpp b/libcxx/test/std/input.output/string.streams/ostringstream/ostringstream.cons/move.pass.cpp
index 62ece7b..596a3e7 100644
--- a/libcxx/test/std/input.output/string.streams/ostringstream/ostringstream.cons/move.pass.cpp
+++ b/libcxx/test/std/input.output/string.streams/ostringstream/ostringstream.cons/move.pass.cpp
@@ -13,8 +13,6 @@
 
 // basic_ostringstream(basic_ostringstream&& rhs);
 
-// XFAIL: FROZEN-CXX03-HEADERS-FIXME
-
 #include <sstream>
 #include <cassert>
 
diff --git a/libcxx/test/std/input.output/string.streams/ostringstream/ostringstream.cons/string.pass.cpp b/libcxx/test/std/input.output/string.streams/ostringstream/ostringstream.cons/string.pass.cpp
index bbec8f7..9e9405a 100644
--- a/libcxx/test/std/input.output/string.streams/ostringstream/ostringstream.cons/string.pass.cpp
+++ b/libcxx/test/std/input.output/string.streams/ostringstream/ostringstream.cons/string.pass.cpp
@@ -14,8 +14,6 @@
 // explicit basic_ostringstream(const basic_string<charT,traits,allocator>& str,
 //                              ios_base::openmode which = ios_base::in);
 
-// XFAIL: FROZEN-CXX03-HEADERS-FIXME
-
 #include <sstream>
 #include <cassert>
 
diff --git a/libcxx/test/std/input.output/string.streams/stringstream/stringstream.cons/default.pass.cpp b/libcxx/test/std/input.output/string.streams/stringstream/stringstream.cons/default.pass.cpp
index 0e535814..4f9e7e0 100644
--- a/libcxx/test/std/input.output/string.streams/stringstream/stringstream.cons/default.pass.cpp
+++ b/libcxx/test/std/input.output/string.streams/stringstream/stringstream.cons/default.pass.cpp
@@ -15,8 +15,6 @@
 // basic_stringstream() : basic_stringstream(ios_base::out | ios_base::in) {}            // C++20
 // explicit basic_stringstream(ios_base::openmode which);                                // C++20
 
-// XFAIL: FROZEN-CXX03-HEADERS-FIXME
-
 #include <sstream>
 #include <cassert>
 
diff --git a/libcxx/test/std/input.output/string.streams/stringstream/stringstream.cons/move.pass.cpp b/libcxx/test/std/input.output/string.streams/stringstream/stringstream.cons/move.pass.cpp
index e905f5f..0702d9a 100644
--- a/libcxx/test/std/input.output/string.streams/stringstream/stringstream.cons/move.pass.cpp
+++ b/libcxx/test/std/input.output/string.streams/stringstream/stringstream.cons/move.pass.cpp
@@ -13,8 +13,6 @@
 
 // basic_stringstream(basic_stringstream&& rhs);
 
-// XFAIL: FROZEN-CXX03-HEADERS-FIXME
-
 #include <sstream>
 #include <cassert>
 
diff --git a/libcxx/test/std/input.output/string.streams/stringstream/stringstream.cons/string.pass.cpp b/libcxx/test/std/input.output/string.streams/stringstream/stringstream.cons/string.pass.cpp
index d4fe18a..0888087 100644
--- a/libcxx/test/std/input.output/string.streams/stringstream/stringstream.cons/string.pass.cpp
+++ b/libcxx/test/std/input.output/string.streams/stringstream/stringstream.cons/string.pass.cpp
@@ -14,8 +14,6 @@
 // explicit basic_stringstream(const basic_string<charT,traits,Allocator>& str,
 //                             ios_base::openmode which = ios_base::out|ios_base::in);
 
-// XFAIL: FROZEN-CXX03-HEADERS-FIXME
-
 #include <sstream>
 #include <cassert>
 
diff --git a/libcxx/utils/ci/Dockerfile b/libcxx/utils/ci/Dockerfile
index 79e1156..8e1c341 100644
--- a/libcxx/utils/ci/Dockerfile
+++ b/libcxx/utils/ci/Dockerfile
@@ -76,6 +76,9 @@ RUN sudo apt-get update \
     && sudo apt-get install -y \
         tzdata
 
+# Install various tools used by the build or the test suite
+# TODO add ninja-build once 1.11 is available in Ubuntu, also remove the manual
+# installation below.
 RUN sudo apt-get update \
     && sudo apt-get install -y \
         bash \
@@ -108,9 +111,6 @@ RUN sudo apt-get update \
         xz-utils \
     && sudo rm -rf /var/lib/apt/lists/*
 
-# Install various tools used by the build or the test suite
-#RUN apt-get update && apt-get install -y ninja-build python3 python3-distutils python3-psutil git gdb ccache
-# TODO add ninja-build once 1.11 is available in Ubuntu, also remove the manual installation.
 RUN <<EOF
   set -e
   wget -qO /tmp/ninja.gz https://github.com/ninja-build/ninja/releases/latest/download/ninja-linux.zip
diff --git a/lldb/examples/synthetic/libcxx.py b/lldb/examples/synthetic/libcxx.py
index 5abeb30..549255e 100644
--- a/lldb/examples/synthetic/libcxx.py
+++ b/lldb/examples/synthetic/libcxx.py
@@ -1,3 +1,6 @@
+from enum import Enum
+from sys import stderr
+import sys
 import lldb
 import lldb.formatters.Logger
 
@@ -74,6 +77,59 @@ def stdstring_SummaryProvider(valobj, dict):
             return '"' + strval + '"'
 
 
+def get_buffer_end(buffer, begin):
+    """
+    Returns a pointer to where the next element would be pushed.
+
+    For libc++'s stable ABI and unstable < LLVM 22, returns `__end_`.
+    For libc++'s unstable ABI, returns `__begin_ + __size_`.
+    """
+    map_end = buffer.GetChildMemberWithName("__end_")
+    if map_end.IsValid():
+        return map_end.GetValueAsUnsigned(0)
+    map_size = buffer.GetChildMemberWithName("__size_").GetValueAsUnsigned(0)
+    return begin + map_size
+
+
+def get_buffer_endcap(parent, buffer, begin, has_compressed_pair_layout, is_size_based):
+    """
+    Returns a pointer to the end of the buffer.
+
+    For libc++'s stable ABI and unstable < LLVM 22, returns:
+        * `__end_cap_`, if `__compressed_pair` is being used
+        * `__cap_`, otherwise
+    For libc++'s unstable ABI, returns `__begin_ + __cap_`.
+    """
+    if has_compressed_pair_layout:
+        map_endcap = parent._get_value_of_compressed_pair(
+            buffer.GetChildMemberWithName("__end_cap_")
+        )
+    elif buffer.GetType().GetNumberOfDirectBaseClasses() == 1:
+        # LLVM 22's __split_buffer is derived from a base class that describes its layout. When the
+        # compressed pair ABI is required, we also use an anonymous struct. Per [#158131], LLDB
+        # is unable to access members of an anonymous struct to a base class, through the derived
+        # class. This means that in order to access the compressed pair's pointer, we need to first
+        # get to its base class.
+        #
+        # [#158131]: https://github.com/llvm/llvm-project/issues/158131
+        buffer = buffer.GetChildAtIndex(0)
+        if is_size_based:
+            map_endcap = buffer.GetChildMemberWithName("__cap_")
+        else:
+            map_endcap = buffer.GetChildMemberWithName("__back_cap_")
+        map_endcap = map_endcap.GetValueAsUnsigned(0)
+    else:
+        map_endcap = buffer.GetChildMemberWithName("__cap_")
+        if not map_endcap.IsValid():
+            map_endcap = buffer.GetChildMemberWithName("__end_cap_")
+        map_endcap = map_endcap.GetValueAsUnsigned(0)
+
+    if is_size_based:
+        return begin + map_endcap
+
+    return map_endcap
+
+
 class stdvector_SynthProvider:
     def __init__(self, valobj, dict):
         logger = lldb.formatters.Logger.Logger()
@@ -755,23 +811,21 @@ class stddeque_SynthProvider:
             if self.block_size < 0:
                 logger.write("block_size < 0")
                 return
-            map_ = self.valobj.GetChildMemberWithName("__map_")
             start = self.valobj.GetChildMemberWithName("__start_").GetValueAsUnsigned(0)
+
+            map_ = self.valobj.GetChildMemberWithName("__map_")
+            is_size_based = map_.GetChildMemberWithName("__size_").IsValid()
             first = map_.GetChildMemberWithName("__first_")
+            # LLVM 22 renames __map_.__begin_ to __map_.__front_cap_
+            if not first:
+                first = map_.GetChildMemberWithName("__front_cap_")
             map_first = first.GetValueAsUnsigned(0)
             self.map_begin = map_.GetChildMemberWithName("__begin_")
             map_begin = self.map_begin.GetValueAsUnsigned(0)
-            map_end = map_.GetChildMemberWithName("__end_").GetValueAsUnsigned(0)
-
-            if has_compressed_pair_layout:
-                map_endcap = self._get_value_of_compressed_pair(
-                    map_.GetChildMemberWithName("__end_cap_")
-                )
-            else:
-                map_endcap = map_.GetChildMemberWithName("__cap_")
-                if not map_endcap.IsValid():
-                    map_endcap = map_.GetChildMemberWithName("__end_cap_")
-                map_endcap = map_endcap.GetValueAsUnsigned(0)
+            map_end = get_buffer_end(map_, map_begin)
+            map_endcap = get_buffer_endcap(
+                self, map_, map_begin, has_compressed_pair_layout, is_size_based
+            )
 
             # check consistency
             if not map_first <= map_begin <= map_end <= map_endcap:
diff --git a/lldb/include/lldb/Target/StackID.h b/lldb/include/lldb/Target/StackID.h
index c2a5d73..1846153 100644
--- a/lldb/include/lldb/Target/StackID.h
+++ b/lldb/include/lldb/Target/StackID.h
@@ -26,7 +26,11 @@ public:
 
   lldb::addr_t GetPC() const { return m_pc; }
 
-  lldb::addr_t GetCallFrameAddress() const { return m_cfa; }
+  lldb::addr_t GetCallFrameAddressWithMetadata() const {
+    return m_cfa_with_metadata;
+  }
+
+  lldb::addr_t GetCallFrameAddressWithoutMetadata() const { return m_cfa; }
 
   SymbolContextScope *GetSymbolContextScope() const { return m_symbol_scope; }
 
@@ -62,6 +66,9 @@ protected:
   /// below)
   lldb::addr_t m_cfa = LLDB_INVALID_ADDRESS;
 
+  /// The cfa with metadata (i.e. prior to Process::FixAddress).
+  lldb::addr_t m_cfa_with_metadata = LLDB_INVALID_ADDRESS;
+
   /// If nullptr, there is no block or symbol for this frame. If not nullptr,
   /// this will either be the scope for the lexical block for the frame, or the
   /// scope for the symbol. Symbol context scopes are always be unique pointers
diff --git a/lldb/include/lldb/Utility/ArchSpec.h b/lldb/include/lldb/Utility/ArchSpec.h
index 96bd5e3..361108fd 100644
--- a/lldb/include/lldb/Utility/ArchSpec.h
+++ b/lldb/include/lldb/Utility/ArchSpec.h
@@ -327,6 +327,11 @@ public:
   ///  \return a boolean value.
   bool IsMIPS() const;
 
+  /// If NVPTX architecture return true.
+  ///
+  ///  \return a boolean value.
+  bool IsNVPTX() const;
+
   /// Returns a string representing current architecture as a target CPU for
   /// tools like compiler, disassembler etc.
   ///
diff --git a/lldb/scripts/framework-header-fix.py b/lldb/scripts/framework-header-fix.py
index 36c5c67..3447dfc 100755
--- a/lldb/scripts/framework-header-fix.py
+++ b/lldb/scripts/framework-header-fix.py
@@ -115,8 +115,10 @@ def main():
         unifdef_guards = ["-U" + guard for guard in args.unifdef_guards]
 
     # Create the framework's header dir if it doesn't already exist
-    if not os.path.exists(os.path.dirname(output_file_path)):
+    try:
         os.makedirs(os.path.dirname(output_file_path))
+    except FileExistsError:
+        pass
 
     if framework_version == "lldb_main":
         modify_main_includes(input_file_path, output_file_path)
diff --git a/lldb/source/API/SBFrame.cpp b/lldb/source/API/SBFrame.cpp
index b6724bb..42dbed4 100644
--- a/lldb/source/API/SBFrame.cpp
+++ b/lldb/source/API/SBFrame.cpp
@@ -267,7 +267,7 @@ lldb::addr_t SBFrame::GetCFA() const {
   }
 
   if (StackFrame *frame = exe_ctx->GetFramePtr())
-    return frame->GetStackID().GetCallFrameAddress();
+    return frame->GetStackID().GetCallFrameAddressWithoutMetadata();
   return LLDB_INVALID_ADDRESS;
 }
 
diff --git a/lldb/source/Expression/DWARFExpression.cpp b/lldb/source/Expression/DWARFExpression.cpp
index 332cf2c..5040351 100644
--- a/lldb/source/Expression/DWARFExpression.cpp
+++ b/lldb/source/Expression/DWARFExpression.cpp
@@ -2195,7 +2195,7 @@ llvm::Expected<Value> DWARFExpression::Evaluate(
         // Note that we don't have to parse FDEs because this DWARF expression
         // is commonly evaluated with a valid stack frame.
         StackID id = frame->GetStackID();
-        addr_t cfa = id.GetCallFrameAddress();
+        addr_t cfa = id.GetCallFrameAddressWithMetadata();
         if (cfa != LLDB_INVALID_ADDRESS) {
           stack.push_back(Scalar(cfa));
           stack.back().SetValueType(Value::ValueType::LoadAddress);
diff --git a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp
index 277de8f..1f7b8d4 100644
--- a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp
+++ b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp
@@ -190,14 +190,16 @@ static bool IsTrivialBasename(const llvm::StringRef &basename) {
   if (basename.size() <= idx)
     return false; // Empty string or "~"
 
-  if (!std::isalpha(basename[idx]) && basename[idx] != '_')
+  if (!std::isalpha(static_cast<unsigned char>(basename[idx])) &&
+      basename[idx] != '_')
     return false; // First character (after removing the possible '~'') isn't in
                   // [A-Za-z_]
 
   // Read all characters matching [A-Za-z_0-9]
   ++idx;
   while (idx < basename.size()) {
-    if (!std::isalnum(basename[idx]) && basename[idx] != '_')
+    if (!std::isalnum(static_cast<unsigned char>(basename[idx])) &&
+        basename[idx] != '_')
       break;
     ++idx;
   }
diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp
index 6053d04..141c5c9 100644
--- a/lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp
+++ b/lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp
@@ -40,8 +40,10 @@ using namespace lldb_private::formatters;
 static void consumeInlineNamespace(llvm::StringRef &name) {
   // Delete past an inline namespace, if any: __[a-zA-Z0-9_]+::
   auto scratch = name;
-  if (scratch.consume_front("__") && std::isalnum(scratch[0])) {
-    scratch = scratch.drop_while([](char c) { return std::isalnum(c); });
+  if (scratch.consume_front("__") &&
+      std::isalnum(static_cast<unsigned char>(scratch[0]))) {
+    scratch = scratch.drop_while(
+        [](char c) { return std::isalnum(static_cast<unsigned char>(c)); });
     if (scratch.consume_front("::")) {
       // Successfully consumed a namespace.
       name = scratch;
diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp
index 39aacdb..e4544cd 100644
--- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp
+++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp
@@ -3962,8 +3962,6 @@ TypeSystemClang::GetTypeInfo(lldb::opaque_compiler_type_t type,
     return 0;
   case clang::Type::DependentSizedExtVector:
     return eTypeHasChildren | eTypeIsVector;
-  case clang::Type::DependentTemplateSpecialization:
-    return eTypeIsTemplate;
 
   case clang::Type::Enum:
     if (pointee_or_element_clang_type)
@@ -4237,8 +4235,6 @@ TypeSystemClang::GetTypeClass(lldb::opaque_compiler_type_t type) {
     break;
   case clang::Type::DependentName:
     break;
-  case clang::Type::DependentTemplateSpecialization:
-    break;
   case clang::Type::PackExpansion:
     break;
 
@@ -5108,7 +5104,6 @@ lldb::Encoding TypeSystemClang::GetEncoding(lldb::opaque_compiler_type_t type,
   case clang::Type::SubstTemplateTypeParmPack:
   case clang::Type::InjectedClassName:
   case clang::Type::DependentName:
-  case clang::Type::DependentTemplateSpecialization:
   case clang::Type::PackExpansion:
   case clang::Type::ObjCObject:
 
@@ -5277,7 +5272,6 @@ lldb::Format TypeSystemClang::GetFormat(lldb::opaque_compiler_type_t type) {
   case clang::Type::SubstTemplateTypeParmPack:
   case clang::Type::InjectedClassName:
   case clang::Type::DependentName:
-  case clang::Type::DependentTemplateSpecialization:
   case clang::Type::PackExpansion:
   case clang::Type::ObjCObject:
 
@@ -6171,8 +6165,6 @@ uint32_t TypeSystemClang::GetNumPointeeChildren(clang::QualType type) {
     return 0;
   case clang::Type::DependentName:
     return 1;
-  case clang::Type::DependentTemplateSpecialization:
-    return 1;
   case clang::Type::ObjCObject:
     return 0;
   case clang::Type::ObjCInterface:
diff --git a/lldb/source/Target/RegisterContextUnwind.cpp b/lldb/source/Target/RegisterContextUnwind.cpp
index 787eb94..3b018c0 100644
--- a/lldb/source/Target/RegisterContextUnwind.cpp
+++ b/lldb/source/Target/RegisterContextUnwind.cpp
@@ -2039,8 +2039,6 @@ bool RegisterContextUnwind::ReadFrameAddress(
             reg_info, cfa_reg_contents, reg_info->byte_size, reg_value);
         if (error.Success()) {
           address = reg_value.GetAsUInt64();
-          if (abi_sp)
-            address = abi_sp->FixCodeAddress(address);
           UnwindLogMsg(
               "CFA value via dereferencing reg %s (%d): reg has val 0x%" PRIx64
               ", CFA value is 0x%" PRIx64,
@@ -2062,8 +2060,6 @@ bool RegisterContextUnwind::ReadFrameAddress(
     RegisterNumber cfa_reg(m_thread, row_register_kind,
                            fa.GetRegisterNumber());
     if (ReadGPRValue(cfa_reg, cfa_reg_contents)) {
-      if (abi_sp)
-        cfa_reg_contents = abi_sp->FixDataAddress(cfa_reg_contents);
       if (cfa_reg_contents == LLDB_INVALID_ADDRESS || cfa_reg_contents == 0 ||
           cfa_reg_contents == 1) {
         UnwindLogMsg(
@@ -2100,9 +2096,6 @@ bool RegisterContextUnwind::ReadFrameAddress(
         dwarfexpr.Evaluate(&exe_ctx, this, 0, nullptr, nullptr);
     if (result) {
       address = result->GetScalar().ULongLong();
-      if (ABISP abi_sp = m_thread.GetProcess()->GetABI())
-        address = abi_sp->FixCodeAddress(address);
-
       UnwindLogMsg("CFA value set by DWARF expression is 0x%" PRIx64,
                    address);
       return true;
@@ -2143,7 +2136,6 @@ bool RegisterContextUnwind::ReadFrameAddress(
   }
   case UnwindPlan::Row::FAValue::isConstant: {
     address = fa.GetConstant();
-    address = m_thread.GetProcess()->FixDataAddress(address);
     UnwindLogMsg("CFA value set by constant is 0x%" PRIx64, address);
     return true;
   }
diff --git a/lldb/source/Target/StackFrameList.cpp b/lldb/source/Target/StackFrameList.cpp
index fa5d159..ccf874f 100644
--- a/lldb/source/Target/StackFrameList.cpp
+++ b/lldb/source/Target/StackFrameList.cpp
@@ -449,7 +449,7 @@ bool StackFrameList::FetchFramesUpTo(uint32_t end_idx,
         }
       } else {
         unwind_frame_sp = m_frames.front();
-        cfa = unwind_frame_sp->m_id.GetCallFrameAddress();
+        cfa = unwind_frame_sp->m_id.GetCallFrameAddressWithoutMetadata();
       }
     } else {
       // Check for interruption when building the frames.
diff --git a/lldb/source/Target/StackID.cpp b/lldb/source/Target/StackID.cpp
index f879276..137c776 100644
--- a/lldb/source/Target/StackID.cpp
+++ b/lldb/source/Target/StackID.cpp
@@ -17,7 +17,8 @@ using namespace lldb_private;
 
 StackID::StackID(lldb::addr_t pc, lldb::addr_t cfa,
                  SymbolContextScope *symbol_scope, Process *process)
-    : m_pc(pc), m_cfa(cfa), m_symbol_scope(symbol_scope) {
+    : m_pc(pc), m_cfa(cfa), m_cfa_with_metadata(cfa),
+      m_symbol_scope(symbol_scope) {
   if (process) {
     m_pc = process->FixCodeAddress(m_pc);
     m_cfa = process->FixDataAddress(m_cfa);
@@ -29,6 +30,7 @@ void StackID::SetPC(lldb::addr_t pc, Process *process) {
 }
 
 void StackID::SetCFA(lldb::addr_t cfa, Process *process) {
+  m_cfa_with_metadata = cfa;
   m_cfa = process ? process->FixDataAddress(cfa) : cfa;
 }
 
@@ -49,7 +51,8 @@ void StackID::Dump(Stream *s) {
 }
 
 bool lldb_private::operator==(const StackID &lhs, const StackID &rhs) {
-  if (lhs.GetCallFrameAddress() != rhs.GetCallFrameAddress())
+  if (lhs.GetCallFrameAddressWithoutMetadata() !=
+      rhs.GetCallFrameAddressWithoutMetadata())
     return false;
 
   SymbolContextScope *lhs_scope = lhs.GetSymbolContextScope();
@@ -67,8 +70,8 @@ bool lldb_private::operator!=(const StackID &lhs, const StackID &rhs) {
 }
 
 bool lldb_private::operator<(const StackID &lhs, const StackID &rhs) {
-  const lldb::addr_t lhs_cfa = lhs.GetCallFrameAddress();
-  const lldb::addr_t rhs_cfa = rhs.GetCallFrameAddress();
+  const lldb::addr_t lhs_cfa = lhs.GetCallFrameAddressWithoutMetadata();
+  const lldb::addr_t rhs_cfa = rhs.GetCallFrameAddressWithoutMetadata();
 
   // FIXME: We are assuming that the stacks grow downward in memory.  That's not
   // necessary, but true on
diff --git a/lldb/source/Utility/ArchSpec.cpp b/lldb/source/Utility/ArchSpec.cpp
index 1b8dae3..2a87cc6 100644
--- a/lldb/source/Utility/ArchSpec.cpp
+++ b/lldb/source/Utility/ArchSpec.cpp
@@ -545,6 +545,8 @@ const char *ArchSpec::GetArchitectureName() const {
 
 bool ArchSpec::IsMIPS() const { return GetTriple().isMIPS(); }
 
+bool ArchSpec::IsNVPTX() const { return GetTriple().isNVPTX(); }
+
 std::string ArchSpec::GetTargetABI() const {
 
   std::string abi;
diff --git a/lldb/source/Utility/Scalar.cpp b/lldb/source/Utility/Scalar.cpp
index 7fbe46d..c8766bd 100644
--- a/lldb/source/Utility/Scalar.cpp
+++ b/lldb/source/Utility/Scalar.cpp
@@ -565,12 +565,13 @@ const Scalar lldb_private::operator-(Scalar lhs, Scalar rhs) {
 
 const Scalar lldb_private::operator/(Scalar lhs, Scalar rhs) {
   Scalar result;
-  if ((result.m_type = Scalar::PromoteToMaxType(lhs, rhs)) != Scalar::e_void &&
-      !rhs.IsZero()) {
+  if ((result.m_type = Scalar::PromoteToMaxType(lhs, rhs)) != Scalar::e_void) {
     switch (result.m_type) {
     case Scalar::e_void:
       break;
     case Scalar::e_int:
+      if (rhs.IsZero())
+        break;
       result.m_integer = lhs.m_integer / rhs.m_integer;
       return result;
     case Scalar::e_float:
diff --git a/lldb/test/API/macosx/arm-pointer-metadata-cfa-dwarf-expr/Makefile b/lldb/test/API/macosx/arm-pointer-metadata-cfa-dwarf-expr/Makefile
new file mode 100644
index 0000000..f0de8ff
--- /dev/null
+++ b/lldb/test/API/macosx/arm-pointer-metadata-cfa-dwarf-expr/Makefile
@@ -0,0 +1,11 @@
+ASM_SOURCES := main.s
+
+# This is to appease Makefile.rules, there is no main.c
+C_SOURCES := main.c
+
+ASM_OBJS := $(ASM_SOURCES:.s=.o)
+
+%.o: %.s
+	$(CC) -c -x assembler $< -o $@
+
+include Makefile.rules
diff --git a/lldb/test/API/macosx/arm-pointer-metadata-cfa-dwarf-expr/TestArmPointerMetadataCFADwarfExpr.py b/lldb/test/API/macosx/arm-pointer-metadata-cfa-dwarf-expr/TestArmPointerMetadataCFADwarfExpr.py
new file mode 100644
index 0000000..839e0e1
--- /dev/null
+++ b/lldb/test/API/macosx/arm-pointer-metadata-cfa-dwarf-expr/TestArmPointerMetadataCFADwarfExpr.py
@@ -0,0 +1,35 @@
+import lldb
+from lldbsuite.test.decorators import *
+from lldbsuite.test.lldbtest import *
+from lldbsuite.test import lldbutil
+
+
+@skipUnlessDarwin
+@skipIf(archs=no_match(["arm64"]))
+class TestArmPointerMetadataStripping(TestBase):
+    def test(self):
+        self.build()
+        target, process, thread, bkpt = lldbutil.run_to_name_breakpoint(self, "foo")
+
+        # Step over the first two instructions of foo in order to
+        # toggle the bit of fp and save it on the stack:
+        # orr   x29, x29, #0x1000000000000000
+        # stp	x29, x30, [sp, #-16]!
+        # This is effectively adding metadata to the CFA of the caller frame (main).
+        thread.StepInstruction(False)
+        thread.StepInstruction(False)
+
+        # The location of `argv` has been artificially made equal to the CFA of the frame.
+        # As such, it should have the metadata artificially set previously.
+        argv_addr = thread.frames[1].GetValueForVariablePath("&argv")
+        self.assertTrue(argv_addr.IsValid())
+        argv_addr_uint = argv_addr.GetValueAsUnsigned()
+        self.assertNotEqual((argv_addr_uint & (1 << 60)), 0)
+
+        # GetCFA strips metadata.
+        cfa = thread.frames[1].GetCFA()
+        self.assertEqual((cfa & (1 << 60)), 0)
+
+        # If the test worked correctly, the cfa and the location should be identical,
+        # modulo the metadata.
+        self.assertEqual(cfa | (1 << 60), argv_addr_uint)
diff --git a/lldb/test/API/macosx/arm-pointer-metadata-cfa-dwarf-expr/main.s b/lldb/test/API/macosx/arm-pointer-metadata-cfa-dwarf-expr/main.s
new file mode 100644
index 0000000..0825c5d
--- /dev/null
+++ b/lldb/test/API/macosx/arm-pointer-metadata-cfa-dwarf-expr/main.s
@@ -0,0 +1,226 @@
+; The assembly below corresponds to this program:
+; __attribute__((nodebug))
+; int foo() {
+;   return 10;
+; }
+; int main(int argc, char **argv) {
+;   foo();
+;   return 0;
+; }
+;
+; The assembly was edited in two places (search for "EDIT"):
+; 1. A "orr    x29, x29, #0x1000000000000000" instruction was added in foo. This
+; effectively changes the CFA value of the frame above foo (i.e. main).
+; 2. In main, the DWARF location of `argv` was changed to DW_AT_call_frame_cfa.
+;
+; This allows us to stop in foo, go to frame 1 (main) and do `v &argv`,
+; obtaining the result of evaluating DW_AT_call_frame_cfa.
+
+	.section	__TEXT,__text,regular,pure_instructions
+	.globl	_foo                            ; -- Begin function foo
+	.p2align	2
+_foo:                                   ; @foo
+Lfunc_begin0:
+	.cfi_startproc
+  orr    x29, x29, #0x1000000000000000    ; EDIT: Set top byte of fp.
+	stp	x29, x30, [sp, #-16]!           ; 16-byte Folded Spill
+	mov	x29, sp
+	.cfi_def_cfa w29, 16
+	.cfi_offset w30, -8
+	.cfi_offset w29, -16
+	mov	w0, #10                         ; =0xa
+	ldp	x29, x30, [sp], #16             ; 16-byte Folded Reload
+	ret
+Lfunc_end0:
+	.cfi_endproc
+                                        ; -- End function
+	.globl	_main                           ; -- Begin function main
+	.p2align	2
+_main:                                  ; @main
+Lfunc_begin1:
+	.file	1 "/test" "test.c"
+	.loc	1 6 0                           ; test.c:6:0
+	.cfi_startproc
+	sub	sp, sp, #48
+	stp	x29, x30, [sp, #32]             ; 16-byte Folded Spill
+	add	x29, sp, #32
+	.cfi_def_cfa w29, 16
+	.cfi_offset w30, -8
+	.cfi_offset w29, -16
+	mov	w8, #0                          ; =0x0
+	str	w8, [sp, #12]                   ; 4-byte Folded Spill
+	stur	wzr, [x29, #-4]
+	stur	w0, [x29, #-8]
+	str	x1, [sp, #16]
+Ltmp0:
+	bl	_foo
+	ldr	w0, [sp, #12]                   ; 4-byte Folded Reload
+	ldp	x29, x30, [sp, #32]             ; 16-byte Folded Reload
+	add	sp, sp, #48
+	ret
+Ltmp1:
+Lfunc_end1:
+	.cfi_endproc
+                                        ; -- End function
+	.section	__DWARF,__debug_abbrev,regular,debug
+Lsection_abbrev:
+	.byte	1                               ; Abbreviation Code
+	.byte	17                              ; DW_TAG_compile_unit
+	.byte	1                               ; DW_CHILDREN_yes
+	.byte	37                              ; DW_AT_producer
+	.byte	14                              ; DW_FORM_strp
+	.byte	19                              ; DW_AT_language
+	.byte	5                               ; DW_FORM_data2
+	.byte	3                               ; DW_AT_name
+	.byte	14                              ; DW_FORM_strp
+	.ascii	"\202|"                       ; DW_AT_LLVM_sysroot
+	.byte	14                              ; DW_FORM_strp
+	.ascii	"\357\177"                    ; DW_AT_APPLE_sdk
+	.byte	14                              ; DW_FORM_strp
+	.byte	16                              ; DW_AT_stmt_list
+	.byte	23                              ; DW_FORM_sec_offset
+	.byte	27                              ; DW_AT_comp_dir
+	.byte	14                              ; DW_FORM_strp
+	.byte	17                              ; DW_AT_low_pc
+	.byte	1                               ; DW_FORM_addr
+	.byte	18                              ; DW_AT_high_pc
+	.byte	6                               ; DW_FORM_data4
+	.byte	0                               ; EOM(1)
+	.byte	0                               ; EOM(2)
+	.byte	2                               ; Abbreviation Code
+	.byte	46                              ; DW_TAG_subprogram
+	.byte	1                               ; DW_CHILDREN_yes
+	.byte	17                              ; DW_AT_low_pc
+	.byte	1                               ; DW_FORM_addr
+	.byte	18                              ; DW_AT_high_pc
+	.byte	6                               ; DW_FORM_data4
+	.byte	64                              ; DW_AT_frame_base
+	.byte	24                              ; DW_FORM_exprloc
+	.byte	3                               ; DW_AT_name
+	.byte	14                              ; DW_FORM_strp
+	.byte	58                              ; DW_AT_decl_file
+	.byte	11                              ; DW_FORM_data1
+	.byte	59                              ; DW_AT_decl_line
+	.byte	11                              ; DW_FORM_data1
+	.byte	39                              ; DW_AT_prototyped
+	.byte	25                              ; DW_FORM_flag_present
+	.byte	73                              ; DW_AT_type
+	.byte	19                              ; DW_FORM_ref4
+	.byte	63                              ; DW_AT_external
+	.byte	25                              ; DW_FORM_flag_present
+	.byte	0                               ; EOM(1)
+	.byte	0                               ; EOM(2)
+	.byte	3                               ; Abbreviation Code
+	.byte	5                               ; DW_TAG_formal_parameter
+	.byte	0                               ; DW_CHILDREN_no
+	.byte	2                               ; DW_AT_location
+	.byte	24                              ; DW_FORM_exprloc
+	.byte	3                               ; DW_AT_name
+	.byte	14                              ; DW_FORM_strp
+	.byte	58                              ; DW_AT_decl_file
+	.byte	11                              ; DW_FORM_data1
+	.byte	59                              ; DW_AT_decl_line
+	.byte	11                              ; DW_FORM_data1
+	.byte	73                              ; DW_AT_type
+	.byte	19                              ; DW_FORM_ref4
+	.byte	0                               ; EOM(1)
+	.byte	0                               ; EOM(2)
+	.byte	4                               ; Abbreviation Code
+	.byte	36                              ; DW_TAG_base_type
+	.byte	0                               ; DW_CHILDREN_no
+	.byte	3                               ; DW_AT_name
+	.byte	14                              ; DW_FORM_strp
+	.byte	62                              ; DW_AT_encoding
+	.byte	11                              ; DW_FORM_data1
+	.byte	11                              ; DW_AT_byte_size
+	.byte	11                              ; DW_FORM_data1
+	.byte	0                               ; EOM(1)
+	.byte	0                               ; EOM(2)
+	.byte	5                               ; Abbreviation Code
+	.byte	15                              ; DW_TAG_pointer_type
+	.byte	0                               ; DW_CHILDREN_no
+	.byte	73                              ; DW_AT_type
+	.byte	19                              ; DW_FORM_ref4
+	.byte	0                               ; EOM(1)
+	.byte	0                               ; EOM(2)
+	.byte	0                               ; EOM(3)
+	.section	__DWARF,__debug_info,regular,debug
+Lsection_info:
+Lcu_begin0:
+.set Lset0, Ldebug_info_end0-Ldebug_info_start0 ; Length of Unit
+	.long	Lset0
+Ldebug_info_start0:
+	.short	4                               ; DWARF version number
+.set Lset1, Lsection_abbrev-Lsection_abbrev ; Offset Into Abbrev. Section
+	.long	Lset1
+	.byte	8                               ; Address Size (in bytes)
+	.byte	1                               ; Abbrev [1] 0xb:0x76 DW_TAG_compile_unit
+	.long	0                               ; DW_AT_producer
+	.short	12                              ; DW_AT_language
+	.long	47                              ; DW_AT_name
+	.long	54                              ; DW_AT_LLVM_sysroot
+	.long	165                             ; DW_AT_APPLE_sdk
+.set Lset2, Lline_table_start0-Lsection_line ; DW_AT_stmt_list
+	.long	Lset2
+	.long	180                             ; DW_AT_comp_dir
+	.quad	Lfunc_begin1                    ; DW_AT_low_pc
+.set Lset3, Lfunc_end1-Lfunc_begin1     ; DW_AT_high_pc
+	.long	Lset3
+	.byte	2                               ; Abbrev [2] 0x32:0x36 DW_TAG_subprogram
+	.quad	Lfunc_begin1                    ; DW_AT_low_pc
+.set Lset4, Lfunc_end1-Lfunc_begin1     ; DW_AT_high_pc
+	.long	Lset4
+	.byte	1                               ; DW_AT_frame_base
+	.byte	109
+	.long	247                             ; DW_AT_name
+	.byte	1                               ; DW_AT_decl_file
+	.byte	6                               ; DW_AT_decl_line
+                                        ; DW_AT_prototyped
+	.long	107                             ; DW_AT_type
+                                        ; DW_AT_external
+	.byte	3                               ; Abbrev [3] 0x4b:0xe DW_TAG_formal_parameter
+	.byte	2                               ; DW_AT_location
+	.byte	145
+	.byte	120
+	.long	256                             ; DW_AT_name
+	.byte	1                               ; DW_AT_decl_file
+	.byte	6                               ; DW_AT_decl_line
+	.long	103                             ; DW_AT_type
+	.byte	3                               ; Abbrev [3] 0x59:0xe DW_TAG_formal_parameter
+	.byte	1                               ; DW_AT_location
+	.byte	0x9c                            ; EDIT: DW_AT_call_frame_cfa
+	.long	261                             ; DW_AT_name
+	.byte	1                               ; DW_AT_decl_file
+	.byte	6                               ; DW_AT_decl_line
+	.long	110                             ; DW_AT_type
+	.byte	0                               ; End Of Children Mark
+	.byte	4                               ; Abbrev [4] 0x68:0x7 DW_TAG_base_type
+	.long	252                             ; DW_AT_name
+	.byte	5                               ; DW_AT_encoding
+	.byte	4                               ; DW_AT_byte_size
+	.byte	5                               ; Abbrev [5] 0x6f:0x5 DW_TAG_pointer_type
+	.long	115                             ; DW_AT_type
+	.byte	5                               ; Abbrev [5] 0x74:0x5 DW_TAG_pointer_type
+	.long	120                             ; DW_AT_type
+	.byte	4                               ; Abbrev [4] 0x79:0x7 DW_TAG_base_type
+	.long	266                             ; DW_AT_name
+	.byte	6                               ; DW_AT_encoding
+	.byte	1                               ; DW_AT_byte_size
+	.byte	0                               ; End Of Children Mark
+Ldebug_info_end0:
+	.section	__DWARF,__debug_str,regular,debug
+Linfo_string:
+	.asciz	"Apple clang                                   " ; string offset=0
+	.asciz	"test.c"                        ; string offset=47
+	.asciz	"/Applications/Xcode..........................................................................................." ; string offset=54
+	.asciz	".............."                ; string offset=165
+	.asciz	"......................................................../llvm_src1" ; string offset=180
+	.asciz	"main"                          ; string offset=247
+	.asciz	"int"                           ; string offset=252
+	.asciz	"argc"                          ; string offset=256
+	.asciz	"argv"                          ; string offset=261
+	.asciz	"char"                          ; string offset=266
+.subsections_via_symbols
+	.section	__DWARF,__debug_line,regular,debug
+Lsection_line:
+Lline_table_start0:
diff --git a/lldb/test/API/tools/lldb-dap/attach/TestDAP_attach.py b/lldb/test/API/tools/lldb-dap/attach/TestDAP_attach.py
index d7d25ca..d3952e1 100644
--- a/lldb/test/API/tools/lldb-dap/attach/TestDAP_attach.py
+++ b/lldb/test/API/tools/lldb-dap/attach/TestDAP_attach.py
@@ -56,6 +56,7 @@ class TestDAP_attach(lldbdap_testcase.DAPTestCaseBase):
         self.set_and_hit_breakpoint(continueToExit=True)
 
     @skipIfNetBSD  # Hangs on NetBSD as well
+    @skipIfWindows # https://github.com/llvm/llvm-project/issues/137660
     def test_by_name(self):
         """
         Tests attaching to a process by process name.
diff --git a/lldb/unittests/Utility/ScalarTest.cpp b/lldb/unittests/Utility/ScalarTest.cpp
index 256d456..6d5caef 100644
--- a/lldb/unittests/Utility/ScalarTest.cpp
+++ b/lldb/unittests/Utility/ScalarTest.cpp
@@ -337,6 +337,12 @@ TEST(ScalarTest, Division) {
   Scalar r = lhs / rhs;
   EXPECT_TRUE(r.IsValid());
   EXPECT_EQ(r, Scalar(2.5));
+
+  Scalar inf = Scalar(1) / Scalar(0.0f);
+  Scalar int0 = Scalar(1) / Scalar(0);
+  Scalar ref_inf = llvm::APFloat::getInf(llvm::APFloat::IEEEsingle());
+  EXPECT_EQ(inf, ref_inf);
+  EXPECT_FALSE(int0.IsValid());
 }
 
 TEST(ScalarTest, Promotion) {
diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index 3756320..cef87e0 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -1812,6 +1812,13 @@ The AMDGPU backend supports the following LLVM IR attributes.
                                                       offset by one less than the number of dynamic VGPR blocks required
                                                       by the function encoded in bits 5..3.
 
+     "amdgpu-cluster-dims"="x,y,z"                    Specify the cluster workgroup dimensions. A value of "0,0,0" indicates that
+                                                      cluster is disabled. A value of "1024,1024,1024" indicates that cluster is enabled,
+                                                      but the dimensions cannot be determined at compile time. Any other value explicitly
+                                                      specifies the cluster dimensions.
+
+                                                      This is only relevant on targets with cluster support.
+
      ================================================ ==========================================================
 
 Calling Conventions
diff --git a/llvm/docs/ConvergentOperations.rst b/llvm/docs/ConvergentOperations.rst
index 5081eff..cdd3e89 100644
--- a/llvm/docs/ConvergentOperations.rst
+++ b/llvm/docs/ConvergentOperations.rst
@@ -13,7 +13,7 @@ Some parallel execution environments execute threads in groups that allow
 efficient communication within the group using special primitives called
 *convergent* operations. The outcome of a convergent operation is sensitive to
 the set of threads that executes it "together", i.e., convergently. When control
-flow :ref:`diverges <convergence-and-uniformity>`, i.e. threads of the same
+flow :ref:`diverges <convergence-and-uniformity>`, i.e., threads of the same
 group follow different
 paths through the CFG, not all threads of the group may be available to
 participate in this communication. This is the defining characteristic that
@@ -41,7 +41,7 @@ In structured programming languages, there is often an intuitive and
 unambiguous way of determining the threads that are expected to communicate.
 However, this is not always the case even in structured programming languages,
 and the intuition breaks down entirely in unstructured control flow. This
-document describes the formal semantics in LLVM, i.e. how to determine the set
+document describes the formal semantics in LLVM, i.e., how to determine the set
 of communicating threads for convergent operations.
 
 The definitions in this document leave many details open, such as how groups of
@@ -449,15 +449,15 @@ Consider the following example:
     // E
   }
 
-In this program, the call to convergent_op() is lexically "inside" the ``for``
+In this program, the call to ``convergent_op()`` is lexically "inside" the ``for``
 loop. But when translated to LLVM IR, the basic block B is an exiting block
 ending in a divergent branch, and the basic block C is an exit of the loop.
-Thus, the call to convergent_op() is outside the loop. This causes a mismatch
+Thus, the call to ``convergent_op()`` is outside the loop. This causes a mismatch
 between the programmer's expectation and the compiled program. The call should
 be executed convergently on every iteration of the loop, by threads that
 together take the branch to exit the loop. But when compiled, all threads that
 take the divergent exit on different iterations first converge at the beginning
-of basic block C and then together execute the call to convergent_op().
+of basic block C and then together execute the call to ``convergent_op()``.
 
 In this case, :ref:`llvm.experimental.convergence.loop
 <llvm.experimental.convergence.loop>` can be used to express the desired
@@ -588,18 +588,18 @@ indirectly.
 
   token @llvm.experimental.convergence.entry() convergent readnone
 
-This intrinsic is used to tie the dynamic instances inside of a function to
+This intrinsic is used to tie the dynamic instances inside a function to
 those in the caller.
 
 1. If the function is called from outside the scope of LLVM, the convergence of
-   dynamic instances of this intrinsic are environment-defined. For example:
+   dynamic instances of this intrinsic is environment-defined. For example:
 
    a. In an OpenCL *kernel launch*, the maximal set of threads that
       can communicate outside the memory model is a *workgroup*.
       Hence, a suitable choice is to specify that all the threads from
       a single workgroup in OpenCL execute converged dynamic instances
       of this intrinsic.
-   b. In a C/C++ program, threads are launched independently and they can
+   b. In a C/C++ program, threads are launched independently and can
       communicate only through the memory model. Hence the dynamic instances of
       this intrinsic in a C/C++ program are never converged.
 2. If the function is called from a call-site in LLVM IR, then two
@@ -701,7 +701,7 @@ convergent operation in the same basic block.
 
   token @llvm.experimental.convergence.anchor() convergent readnone
 
-This intrinsic produces an initial convergence token that is independent from
+This intrinsic produces an initial convergence token that is independent of
 any "outer scope". The set of threads executing converged dynamic instances of
 this intrinsic is implementation-defined.
 
@@ -1483,7 +1483,7 @@ There is no guarantee about the value of ``%id`` in the threads where
 hoisting ``@subgroupShuffle`` might introduce UB.
 
 On the other hand, if ``@subgroupShuffle`` is defined such that it merely
-produces an undefined value or poison as result when ``%id`` is "out of range",
+produces an undefined value or poison as a result when ``%id`` is "out of range",
 then speculating is okay.
 
 Even though
@@ -1502,7 +1502,7 @@ Assuming that ``%tok`` is only used inside the conditional block, the anchor can
 be sunk. The rationale is two-fold. First, the anchor has implementation-defined
 behavior, and the sinking is part of the implementation. Second, already in the
 original program, the set of threads that communicates in the
-``@convergent.operation`` is automatically subset to the threads for which
+``@convergent.operation`` is automatically a subset of the threads for which
 ``condition`` is true.
 
 Anchors can be hoisted in acyclic control flow. For example:
diff --git a/llvm/docs/RISCVUsage.rst b/llvm/docs/RISCVUsage.rst
index d6c7b46..cfe090e 100644
--- a/llvm/docs/RISCVUsage.rst
+++ b/llvm/docs/RISCVUsage.rst
@@ -327,6 +327,9 @@ The primary goal of experimental support is to assist in the process of ratifica
 ``experimental-zalasr``
   LLVM implements the `0.0.5 draft specification <https://github.com/mehnadnerd/riscv-zalasr>`__.
 
+``experimental-zibi``
+  LLVM implements the `0.1 release specification <https://github.com/riscv/zibi/releases/tag/v0.1.0>`__.
+
 ``experimental-zicfilp``, ``experimental-zicfiss``
   LLVM implements the `1.0 release specification <https://github.com/riscv/riscv-cfi/releases/tag/v1.0>`__.
 
diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md
index 1617455..3c37993 100644
--- a/llvm/docs/ReleaseNotes.md
+++ b/llvm/docs/ReleaseNotes.md
@@ -124,6 +124,7 @@ Changes to the RISC-V Backend
   using `$x` with an architecture string suffix is not yet supported.
 * Ssctr and Smctr extensions are no longer experimental.
 * Add support for Zvfbfa (Additional BF16 vector compute support)
+* Adds experimental support for the 'Zibi` (Branch with Immediate) extension.
 
 Changes to the WebAssembly Backend
 ----------------------------------
diff --git a/llvm/include/llvm/ADT/PointerIntPair.h b/llvm/include/llvm/ADT/PointerIntPair.h
index 9cfc658..e48e35d 100644
--- a/llvm/include/llvm/ADT/PointerIntPair.h
+++ b/llvm/include/llvm/ADT/PointerIntPair.h
@@ -173,8 +173,7 @@ struct PointerIntPairInfo {
                 "PointerIntPair with integer size too large for pointer");
   enum MaskAndShiftConstants : uintptr_t {
     /// PointerBitMask - The bits that come from the pointer.
-    PointerBitMask =
-        ~(uintptr_t)(((intptr_t)1 << PtrTraits::NumLowBitsAvailable) - 1),
+    PointerBitMask = (~(uintptr_t)0) << PtrTraits::NumLowBitsAvailable,
 
     /// IntShift - The number of low bits that we reserve for other uses, and
     /// keep zero.
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index a5e98bb..a6f4e51 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -1847,6 +1847,10 @@ public:
   /// otherwise scalar epilogue loop.
   LLVM_ABI bool preferEpilogueVectorization() const;
 
+  /// \returns True if the loop vectorizer should discard any VFs where the
+  /// maximum register pressure exceeds getNumberOfRegisters.
+  LLVM_ABI bool shouldConsiderVectorizationRegPressure() const;
+
   /// \returns True if the target wants to expand the given reduction intrinsic
   /// into a shuffle sequence.
   LLVM_ABI bool shouldExpandReduction(const IntrinsicInst *II) const;
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index b58386b..566e1cf 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -1105,6 +1105,8 @@ public:
 
   virtual bool preferEpilogueVectorization() const { return true; }
 
+  virtual bool shouldConsiderVectorizationRegPressure() const { return false; }
+
   virtual bool shouldExpandReduction(const IntrinsicInst *II) const {
     return true;
   }
diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
index 087affcf..6a624a7 100644
--- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
@@ -135,10 +135,9 @@ public:
 
   /// Given a machine instruction descriptor, returns the register
   /// class constraint for OpNum, or NULL.
-  virtual
-  const TargetRegisterClass *getRegClass(const MCInstrDesc &MCID, unsigned OpNum,
-                                         const TargetRegisterInfo *TRI,
-                                         const MachineFunction &MF) const;
+  virtual const TargetRegisterClass *
+  getRegClass(const MCInstrDesc &MCID, unsigned OpNum,
+              const TargetRegisterInfo *TRI) const;
 
   /// Returns true if MI is an instruction we are unable to reason about
   /// (like a call or something with unmodeled side effects).
diff --git a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
index 73ccc8e..3f576b2 100644
--- a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
@@ -883,7 +883,7 @@ public:
   /// If a target supports multiple different pointer register classes,
   /// kind specifies which one is indicated.
   virtual const TargetRegisterClass *
-  getPointerRegClass(const MachineFunction &MF, unsigned Kind=0) const {
+  getPointerRegClass(unsigned Kind = 0) const {
     llvm_unreachable("Target didn't implement getPointerRegClass!");
   }
 
diff --git a/llvm/include/llvm/Frontend/OpenMP/ClauseT.h b/llvm/include/llvm/Frontend/OpenMP/ClauseT.h
index 5690585..1ed23ee 100644
--- a/llvm/include/llvm/Frontend/OpenMP/ClauseT.h
+++ b/llvm/include/llvm/Frontend/OpenMP/ClauseT.h
@@ -1046,6 +1046,12 @@ struct ReleaseT {
   using EmptyTrait = std::true_type;
 };
 
+// [6.0:440-441] `replayable` clause
+template <typename T, typename I, typename E> //
+struct ReplayableT {
+  using IncompleteTrait = std::true_type;
+};
+
 // V5.2: [8.2.1] `requirement` clauses
 template <typename T, typename I, typename E> //
 struct ReverseOffloadT {
@@ -1153,6 +1159,12 @@ struct ToT {
   std::tuple<OPT(Expectation), OPT(Mappers), OPT(Iterator), LocatorList> t;
 };
 
+// [6.0:440-441] `transparent` clause
+template <typename T, typename I, typename E> //
+struct TransparentT {
+  using IncompleteTrait = std::true_type;
+};
+
 // V5.2: [8.2.1] `requirement` clauses
 template <typename T, typename I, typename E> //
 struct UnifiedAddressT {
@@ -1279,7 +1291,8 @@ using EmptyClausesT = std::variant<
 template <typename T, typename I, typename E>
 using IncompleteClausesT =
     std::variant<AdjustArgsT<T, I, E>, AppendArgsT<T, I, E>, MatchT<T, I, E>,
-                 OtherwiseT<T, I, E>, WhenT<T, I, E>>;
+                 OtherwiseT<T, I, E>, ReplayableT<T, I, E>,
+                 TransparentT<T, I, E>, WhenT<T, I, E>>;
 
 template <typename T, typename I, typename E>
 using TupleClausesT =
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td
index ce13619..6a41c24 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMP.td
+++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td
@@ -456,6 +456,10 @@ def OMPC_Relaxed : Clause<[Spelling<"relaxed">]> {
 def OMPC_Release : Clause<[Spelling<"release">]> {
   let clangClass = "OMPReleaseClause";
 }
+def OMPC_Replayable : Clause<[Spelling<"replayable">]> {
+  let flangClass = "OmpReplayableClause";
+  let isValueOptional = true;
+}
 def OMPC_ReverseOffload : Clause<[Spelling<"reverse_offload">]> {
   let clangClass = "OMPReverseOffloadClause";
 }
@@ -523,6 +527,10 @@ def OMPC_To : Clause<[Spelling<"to">]> {
   let clangClass = "OMPToClause";
   let flangClass = "OmpToClause";
 }
+def OMPC_Transparent : Clause<[Spelling<"transparent">]> {
+  let flangClass = "OmpTransparentClause";
+  let isValueOptional = true;
+}
 def OMPC_UnifiedAddress : Clause<[Spelling<"unified_address">]> {
   let clangClass = "OMPUnifiedAddressClause";
 }
@@ -1128,6 +1136,7 @@ def OMP_Target : Directive<[Spelling<"target">]> {
     VersionedClause<OMPC_NoWait>,
     VersionedClause<OMPC_OMPX_Bare>,
     VersionedClause<OMPC_OMPX_DynCGroupMem>,
+    VersionedClause<OMPC_Replayable, 60>,
     VersionedClause<OMPC_ThreadLimit, 51>,
   ];
   let association = AS_Block;
@@ -1139,6 +1148,7 @@ def OMP_TargetData : Directive<[Spelling<"target data", 1, 52>,
     VersionedClause<OMPC_Device>,
     VersionedClause<OMPC_If>,
     VersionedClause<OMPC_Default, 60>,
+    VersionedClause<OMPC_Transparent, 60>,
   ];
   let requiredClauses = [
     VersionedClause<OMPC_Map>,
@@ -1157,6 +1167,7 @@ def OMP_TargetEnterData : Directive<[Spelling<"target enter data", 1, 52>,
     VersionedClause<OMPC_Device>,
     VersionedClause<OMPC_If>,
     VersionedClause<OMPC_NoWait>,
+    VersionedClause<OMPC_Replayable, 60>,
   ];
   let requiredClauses = [
     VersionedClause<OMPC_Map>,
@@ -1173,6 +1184,7 @@ def OMP_TargetExitData : Directive<[Spelling<"target exit data", 1, 52>,
     VersionedClause<OMPC_Device>,
     VersionedClause<OMPC_If>,
     VersionedClause<OMPC_NoWait>,
+    VersionedClause<OMPC_Replayable, 60>,
   ];
   let requiredClauses = [
     VersionedClause<OMPC_Map>,
@@ -1191,6 +1203,7 @@ def OMP_TargetUpdate : Directive<[Spelling<"target update", 1, 52>,
     VersionedClause<OMPC_Device>,
     VersionedClause<OMPC_If>,
     VersionedClause<OMPC_NoWait>,
+    VersionedClause<OMPC_Replayable, 60>,
   ];
   let association = AS_None;
   let category = CA_Executable;
@@ -1213,6 +1226,8 @@ def OMP_Task : Directive<[Spelling<"task">]> {
     VersionedClause<OMPC_Final>,
     VersionedClause<OMPC_If>,
     VersionedClause<OMPC_Priority>,
+    VersionedClause<OMPC_Replayable, 60>,
+    VersionedClause<OMPC_Transparent, 60>,
   ];
   let association = AS_Block;
   let category = CA_Executable;
@@ -1254,6 +1269,8 @@ def OMP_TaskLoop : Directive<[Spelling<"taskloop">]> {
     VersionedClause<OMPC_Final>,
     VersionedClause<OMPC_If>,
     VersionedClause<OMPC_Priority>,
+    VersionedClause<OMPC_Replayable, 60>,
+    VersionedClause<OMPC_Transparent, 60>,
   ];
   let allowedExclusiveClauses = [
     VersionedClause<OMPC_GrainSize>,
@@ -1267,6 +1284,9 @@ def OMP_TaskWait : Directive<[Spelling<"taskwait">]> {
     VersionedClause<OMPC_Depend, 50>,
     VersionedClause<OMPC_NoWait, 51>,
   ];
+  let allowedOnceClauses = [
+    VersionedClause<OMPC_Replayable, 60>,
+  ];
   let association = AS_None;
   let category = CA_Executable;
 }
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index 6d53bf8..7c9aef5 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -3147,13 +3147,8 @@ let TargetPrefix = "aarch64" in {
   // Counting elements
   //
 
-  class AdvSIMD_SME_CNTSB_Intrinsic
-    : DefaultAttrsIntrinsic<[llvm_i64_ty], [], [IntrNoMem]>;
-
-  def int_aarch64_sme_cntsb : AdvSIMD_SME_CNTSB_Intrinsic;
-  def int_aarch64_sme_cntsh : AdvSIMD_SME_CNTSB_Intrinsic;
-  def int_aarch64_sme_cntsw : AdvSIMD_SME_CNTSB_Intrinsic;
-  def int_aarch64_sme_cntsd : AdvSIMD_SME_CNTSB_Intrinsic;
+  def int_aarch64_sme_cntsd
+      : DefaultAttrsIntrinsic<[llvm_i64_ty], [], [IntrNoMem]>;
 
   //
   // PSTATE Functions
diff --git a/llvm/include/llvm/MC/MCParser/MCAsmParser.h b/llvm/include/llvm/MC/MCParser/MCAsmParser.h
index cb9bd5c..e3f44a0 100644
--- a/llvm/include/llvm/MC/MCParser/MCAsmParser.h
+++ b/llvm/include/llvm/MC/MCParser/MCAsmParser.h
@@ -279,6 +279,9 @@ public:
   /// Res to the identifier contents.
   virtual bool parseIdentifier(StringRef &Res) = 0;
 
+  /// Parse identifier and get or create symbol for it.
+  bool parseSymbol(MCSymbol *&Res);
+
   /// Parse up to the end of statement and return the contents from the
   /// current token until the end of the statement; the current token on exit
   /// will be either the EndOfStatement or EOF.
diff --git a/llvm/include/llvm/ObjectYAML/DXContainerYAML.h b/llvm/include/llvm/ObjectYAML/DXContainerYAML.h
index 359b277..62bfee7 100644
--- a/llvm/include/llvm/ObjectYAML/DXContainerYAML.h
+++ b/llvm/include/llvm/ObjectYAML/DXContainerYAML.h
@@ -92,7 +92,7 @@ struct RootDescriptorYaml {
 };
 
 struct DescriptorRangeYaml {
-  uint32_t RangeType;
+  dxil::ResourceClass RangeType;
   uint32_t NumDescriptors;
   uint32_t BaseShaderRegister;
   uint32_t RegisterSpace;
@@ -111,12 +111,12 @@ struct DescriptorTableYaml {
 };
 
 struct RootParameterHeaderYaml {
-  uint32_t Type;
-  uint32_t Visibility;
+  dxbc::RootParameterType Type;
+  dxbc::ShaderVisibility Visibility;
   uint32_t Offset;
 
   RootParameterHeaderYaml(){};
-  RootParameterHeaderYaml(uint32_t T) : Type(T) {}
+  RootParameterHeaderYaml(dxbc::RootParameterType T) : Type(T) {}
 };
 
 struct RootParameterLocationYaml {
@@ -165,21 +165,19 @@ struct RootParameterYamlDesc {
 };
 
 struct StaticSamplerYamlDesc {
-  uint32_t Filter = llvm::to_underlying(dxbc::SamplerFilter::Anisotropic);
-  uint32_t AddressU = llvm::to_underlying(dxbc::TextureAddressMode::Wrap);
-  uint32_t AddressV = llvm::to_underlying(dxbc::TextureAddressMode::Wrap);
-  uint32_t AddressW = llvm::to_underlying(dxbc::TextureAddressMode::Wrap);
+  dxbc::SamplerFilter Filter = dxbc::SamplerFilter::Anisotropic;
+  dxbc::TextureAddressMode AddressU = dxbc::TextureAddressMode::Wrap;
+  dxbc::TextureAddressMode AddressV = dxbc::TextureAddressMode::Wrap;
+  dxbc::TextureAddressMode AddressW = dxbc::TextureAddressMode::Wrap;
   float MipLODBias = 0.f;
   uint32_t MaxAnisotropy = 16u;
-  uint32_t ComparisonFunc =
-      llvm::to_underlying(dxbc::ComparisonFunc::LessEqual);
-  uint32_t BorderColor =
-      llvm::to_underlying(dxbc::StaticBorderColor::OpaqueWhite);
+  dxbc::ComparisonFunc ComparisonFunc = dxbc::ComparisonFunc::LessEqual;
+  dxbc::StaticBorderColor BorderColor = dxbc::StaticBorderColor::OpaqueWhite;
   float MinLOD = 0.f;
   float MaxLOD = std::numeric_limits<float>::max();
   uint32_t ShaderRegister;
   uint32_t RegisterSpace;
-  uint32_t ShaderVisibility;
+  dxbc::ShaderVisibility ShaderVisibility;
 };
 
 struct RootSignatureYamlDesc {
@@ -321,6 +319,13 @@ LLVM_YAML_DECLARE_ENUM_TRAITS(llvm::dxbc::PSV::ResourceKind)
 LLVM_YAML_DECLARE_ENUM_TRAITS(llvm::dxbc::D3DSystemValue)
 LLVM_YAML_DECLARE_ENUM_TRAITS(llvm::dxbc::SigComponentType)
 LLVM_YAML_DECLARE_ENUM_TRAITS(llvm::dxbc::SigMinPrecision)
+LLVM_YAML_DECLARE_ENUM_TRAITS(llvm::dxbc::RootParameterType)
+LLVM_YAML_DECLARE_ENUM_TRAITS(dxil::ResourceClass)
+LLVM_YAML_DECLARE_ENUM_TRAITS(llvm::dxbc::SamplerFilter)
+LLVM_YAML_DECLARE_ENUM_TRAITS(llvm::dxbc::StaticBorderColor)
+LLVM_YAML_DECLARE_ENUM_TRAITS(llvm::dxbc::TextureAddressMode)
+LLVM_YAML_DECLARE_ENUM_TRAITS(llvm::dxbc::ShaderVisibility)
+LLVM_YAML_DECLARE_ENUM_TRAITS(llvm::dxbc::ComparisonFunc)
 
 namespace llvm {
 
diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h
index a626071..c0e5d2d 100644
--- a/llvm/include/llvm/ProfileData/SampleProf.h
+++ b/llvm/include/llvm/ProfileData/SampleProf.h
@@ -62,7 +62,7 @@ enum class sampleprof_error {
   uncompress_failed,
   zlib_unavailable,
   hash_mismatch,
-  illegal_line_offset
+  illegal_line_offset,
 };
 
 inline std::error_code make_error_code(sampleprof_error E) {
@@ -91,6 +91,8 @@ struct is_error_code_enum<llvm::sampleprof_error> : std::true_type {};
 namespace llvm {
 namespace sampleprof {
 
+constexpr char kVTableProfPrefix[] = "vtables ";
+
 enum SampleProfileFormat {
   SPF_None = 0,
   SPF_Text = 0x1,
@@ -204,6 +206,9 @@ enum class SecProfSummaryFlags : uint32_t {
   /// SecFlagIsPreInlined means this profile contains ShouldBeInlined
   /// contexts thus this is CS preinliner computed.
   SecFlagIsPreInlined = (1 << 4),
+
+  /// SecFlagHasVTableTypeProf means this profile contains vtable type profiles.
+  SecFlagHasVTableTypeProf = (1 << 5),
 };
 
 enum class SecFuncMetadataFlags : uint32_t {
@@ -303,7 +308,7 @@ struct LineLocation {
   }
 
   uint64_t getHashCode() const {
-    return ((uint64_t) Discriminator << 32) | LineOffset;
+    return ((uint64_t)Discriminator << 32) | LineOffset;
   }
 
   uint32_t LineOffset;
@@ -318,16 +323,30 @@ struct LineLocationHash {
 
 LLVM_ABI raw_ostream &operator<<(raw_ostream &OS, const LineLocation &Loc);
 
+/// Key represents type of a C++ polymorphic class type by its vtable and value
+/// represents its counter.
+/// TODO: The class name FunctionId should be renamed to SymbolId in a refactor
+/// change.
+using TypeCountMap = std::map<FunctionId, uint64_t>;
+
+/// Write \p Map to the output stream. Keys are linearized using \p NameTable
+/// and written as ULEB128. Values are written as ULEB128 as well.
+std::error_code
+serializeTypeMap(const TypeCountMap &Map,
+                 const MapVector<FunctionId, uint32_t> &NameTable,
+                 raw_ostream &OS);
+
 /// Representation of a single sample record.
 ///
 /// A sample record is represented by a positive integer value, which
 /// indicates how frequently was the associated line location executed.
 ///
 /// Additionally, if the associated location contains a function call,
-/// the record will hold a list of all the possible called targets. For
-/// direct calls, this will be the exact function being invoked. For
-/// indirect calls (function pointers, virtual table dispatch), this
-/// will be a list of one or more functions.
+/// the record will hold a list of all the possible called targets and the types
+/// for virtual table dispatches. For direct calls, this will be the exact
+/// function being invoked. For indirect calls (function pointers, virtual table
+/// dispatch), this will be a list of one or more functions. For virtual table
+/// dispatches, this record will also hold the type of the object.
 class SampleRecord {
 public:
   using CallTarget = std::pair<FunctionId, uint64_t>;
@@ -746,6 +765,7 @@ using BodySampleMap = std::map<LineLocation, SampleRecord>;
 // memory, which is *very* significant for large profiles.
 using FunctionSamplesMap = std::map<FunctionId, FunctionSamples>;
 using CallsiteSampleMap = std::map<LineLocation, FunctionSamplesMap>;
+using CallsiteTypeMap = std::map<LineLocation, TypeCountMap>;
 using LocToLocMap =
     std::unordered_map<LineLocation, LineLocation, LineLocationHash>;
 
@@ -939,6 +959,14 @@ public:
     return &Iter->second;
   }
 
+  /// Returns the TypeCountMap for inlined callsites at the given \p Loc.
+  const TypeCountMap *findCallsiteTypeSamplesAt(const LineLocation &Loc) const {
+    auto Iter = VirtualCallsiteTypeCounts.find(mapIRLocToProfileLoc(Loc));
+    if (Iter == VirtualCallsiteTypeCounts.end())
+      return nullptr;
+    return &Iter->second;
+  }
+
   /// Returns a pointer to FunctionSamples at the given callsite location
   /// \p Loc with callee \p CalleeName. If no callsite can be found, relax
   /// the restriction to return the FunctionSamples at callsite location
@@ -1000,6 +1028,46 @@ public:
     return CallsiteSamples;
   }
 
+  /// Returns vtable access samples for the C++ types collected in this
+  /// function.
+  const CallsiteTypeMap &getCallsiteTypeCounts() const {
+    return VirtualCallsiteTypeCounts;
+  }
+
+  /// Returns the vtable access samples for the C++ types for \p Loc.
+  /// Under the hood, the caller-specified \p Loc will be un-drifted before the
+  /// type sample lookup if possible.
+  TypeCountMap &getTypeSamplesAt(const LineLocation &Loc) {
+    return VirtualCallsiteTypeCounts[mapIRLocToProfileLoc(Loc)];
+  }
+
+  /// Scale \p Other sample counts by \p Weight and add the scaled result to the
+  /// type samples for \p Loc. Under the hoold, the caller-provided \p Loc will
+  /// be un-drifted before the type sample lookup if possible.
+  /// typename T is either a std::map or a DenseMap.
+  template <typename T>
+  sampleprof_error addCallsiteVTableTypeProfAt(const LineLocation &Loc,
+                                               const T &Other,
+                                               uint64_t Weight = 1) {
+    static_assert((std::is_same_v<typename T::key_type, StringRef> ||
+                   std::is_same_v<typename T::key_type, FunctionId>) &&
+                      std::is_same_v<typename T::mapped_type, uint64_t>,
+                  "T must be a map with StringRef or FunctionId as key and "
+                  "uint64_t as value");
+    TypeCountMap &TypeCounts = getTypeSamplesAt(Loc);
+    bool Overflowed = false;
+
+    for (const auto [Type, Count] : Other) {
+      FunctionId TypeId(Type);
+      bool RowOverflow = false;
+      TypeCounts[TypeId] = SaturatingMultiplyAdd(
+          Count, Weight, TypeCounts[TypeId], &RowOverflow);
+      Overflowed |= RowOverflow;
+    }
+    return Overflowed ? sampleprof_error::counter_overflow
+                      : sampleprof_error::success;
+  }
+
   /// Return the maximum of sample counts in a function body. When SkipCallSite
   /// is false, which is the default, the return count includes samples in the
   /// inlined functions. When SkipCallSite is true, the return count only
@@ -1054,6 +1122,10 @@ public:
         mergeSampleProfErrors(Result,
                               FSMap[Rec.first].merge(Rec.second, Weight));
     }
+    for (const auto &[Loc, OtherTypeMap] : Other.getCallsiteTypeCounts())
+      mergeSampleProfErrors(
+          Result, addCallsiteVTableTypeProfAt(Loc, OtherTypeMap, Weight));
+
     return Result;
   }
 
@@ -1297,6 +1369,23 @@ private:
   /// collected in the call to baz() at line offset 8.
   CallsiteSampleMap CallsiteSamples;
 
+  /// Map a virtual callsite to the list of accessed vtables and vtable counts.
+  /// The callsite is referenced by its source location.
+  ///
+  /// For example, given:
+  ///
+  ///     void foo() {
+  ///       ...
+  ///  5    inlined_vcall_bar();
+  ///       ...
+  ///  5    inlined_vcall_baz();
+  ///       ...
+  ///  200  inlined_vcall_qux();
+  ///     }
+  /// This map will contain two entries. One with two types for line offset 5
+  /// and one with one type for line offset 200.
+  CallsiteTypeMap VirtualCallsiteTypeCounts;
+
   /// IR to profile location map generated by stale profile matching.
   ///
   /// Each entry is a mapping from the location on current build to the matched
diff --git a/llvm/include/llvm/ProfileData/SampleProfReader.h b/llvm/include/llvm/ProfileData/SampleProfReader.h
index bfe079f..799938a 100644
--- a/llvm/include/llvm/ProfileData/SampleProfReader.h
+++ b/llvm/include/llvm/ProfileData/SampleProfReader.h
@@ -589,6 +589,10 @@ protected:
   /// Whether the function profiles use FS discriminators.
   bool ProfileIsFS = false;
 
+  /// If true, the profile has vtable profiles and reader should decode them
+  /// to parse profiles correctly.
+  bool ReadVTableProf = false;
+
   /// \brief The format of sample.
   SampleProfileFormat Format = SPF_None;
 
@@ -703,6 +707,14 @@ protected:
   /// otherwise same as readStringFromTable, also return its hash value.
   ErrorOr<std::pair<SampleContext, uint64_t>> readSampleContextFromTable();
 
+  /// Read all virtual functions' vtable access counts for \p FProfile.
+  std::error_code readCallsiteVTableProf(FunctionSamples &FProfile);
+
+  /// Read bytes from the input buffer pointed by `Data` and decode them into
+  /// \p M. `Data` will be advanced to the end of the read bytes when this
+  /// function returns. Returns error if any.
+  std::error_code readVTableTypeCountMap(TypeCountMap &M);
+
   /// Points to the current location in the buffer.
   const uint8_t *Data = nullptr;
 
diff --git a/llvm/include/llvm/ProfileData/SampleProfWriter.h b/llvm/include/llvm/ProfileData/SampleProfWriter.h
index e84b209..9dbeaf5 100644
--- a/llvm/include/llvm/ProfileData/SampleProfWriter.h
+++ b/llvm/include/llvm/ProfileData/SampleProfWriter.h
@@ -217,13 +217,20 @@ protected:
   std::error_code writeBody(const FunctionSamples &S);
   inline void stablizeNameTable(MapVector<FunctionId, uint32_t> &NameTable,
                                 std::set<FunctionId> &V);
-  
+
   MapVector<FunctionId, uint32_t> NameTable;
-  
+
   void addName(FunctionId FName);
   virtual void addContext(const SampleContext &Context);
   void addNames(const FunctionSamples &S);
 
+  /// Write \p CallsiteTypeMap to the output stream \p OS.
+  std::error_code
+  writeCallsiteVTableProf(const CallsiteTypeMap &CallsiteTypeMap,
+                          raw_ostream &OS);
+
+  bool WriteVTableProf = false;
+
 private:
   LLVM_ABI friend ErrorOr<std::unique_ptr<SampleProfileWriter>>
   SampleProfileWriter::create(std::unique_ptr<raw_ostream> &OS,
@@ -412,8 +419,7 @@ private:
 class LLVM_ABI SampleProfileWriterExtBinary
     : public SampleProfileWriterExtBinaryBase {
 public:
-  SampleProfileWriterExtBinary(std::unique_ptr<raw_ostream> &OS)
-      : SampleProfileWriterExtBinaryBase(OS) {}
+  SampleProfileWriterExtBinary(std::unique_ptr<raw_ostream> &OS);
 
 private:
   std::error_code writeDefaultLayout(const SampleProfileMap &ProfileMap);
diff --git a/llvm/include/llvm/Support/DXILABI.h b/llvm/include/llvm/Support/DXILABI.h
index 307a1d1..e6600c3 100644
--- a/llvm/include/llvm/Support/DXILABI.h
+++ b/llvm/include/llvm/Support/DXILABI.h
@@ -102,7 +102,6 @@ const unsigned MinWaveSize = 4;
 const unsigned MaxWaveSize = 128;
 
 LLVM_ABI StringRef getResourceClassName(ResourceClass RC);
-
 } // namespace dxil
 } // namespace llvm
 
diff --git a/llvm/include/llvm/Support/Debug.h b/llvm/include/llvm/Support/Debug.h
index a7795d4..b73f2d7 100644
--- a/llvm/include/llvm/Support/Debug.h
+++ b/llvm/include/llvm/Support/Debug.h
@@ -44,11 +44,6 @@ class raw_ostream;
 /// level, return false.
 LLVM_ABI bool isCurrentDebugType(const char *Type, int Level = 0);
 
-/// Overload allowing to swap the order of the Type and Level arguments.
-LLVM_ABI inline bool isCurrentDebugType(int Level, const char *Type) {
-  return isCurrentDebugType(Type, Level);
-}
-
 /// setCurrentDebugType - Set the current debug type, as if the -debug-only=X
 /// option were specified.  Note that DebugFlag also needs to be set to true for
 /// debug output to be produced.
diff --git a/llvm/include/llvm/Support/DebugLog.h b/llvm/include/llvm/Support/DebugLog.h
index dce706e..c1e1648 100644
--- a/llvm/include/llvm/Support/DebugLog.h
+++ b/llvm/include/llvm/Support/DebugLog.h
@@ -19,52 +19,55 @@
 namespace llvm {
 #ifndef NDEBUG
 
-// LDBG() is a macro that can be used as a raw_ostream for debugging.
-// It will stream the output to the dbgs() stream, with a prefix of the
-// debug type and the file and line number. A trailing newline is added to the
-// output automatically. If the streamed content contains a newline, the prefix
-// is added to each beginning of a new line. Nothing is printed if the debug
-// output is not enabled or the debug type does not match.
-//
-// E.g.,
-//   LDBG() << "Bitset contains: " << Bitset;
-// is somehow equivalent to
-//   LLVM_DEBUG(dbgs() << "[" << DEBUG_TYPE << "] " << __FILE__ << ":" <<
-//   __LINE__ << " "
-//              << "Bitset contains: " << Bitset << "\n");
-//
+/// LDBG() is a macro that can be used as a raw_ostream for debugging.
+/// It will stream the output to the dbgs() stream, with a prefix of the
+/// debug type and the file and line number. A trailing newline is added to the
+/// output automatically. If the streamed content contains a newline, the prefix
+/// is added to each beginning of a new line. Nothing is printed if the debug
+/// output is not enabled or the debug type does not match.
+///
+/// E.g.,
+///   LDBG() << "Bitset contains: " << Bitset;
+/// is equivalent to
+///   LLVM_DEBUG(dbgs() << "[" << DEBUG_TYPE << "] " << __FILE__ << ":" <<
+///   __LINE__ << " "
+///              << "Bitset contains: " << Bitset << "\n");
+///
 // An optional `level` argument can be provided to control the verbosity of the
-// output. The default level is 1, and is in increasing level of verbosity.
-//
-// The `level` argument can be a literal integer, or a macro that evaluates to
-// an integer.
-//
-// An optional `type` argument can be provided to control the debug type. The
-// default type is DEBUG_TYPE. The `type` argument can be a literal string, or a
-// macro that evaluates to a string.
+/// output. The default level is 1, and is in increasing level of verbosity.
+///
+/// The `level` argument can be a literal integer, or a macro that evaluates to
+/// an integer.
+///
+/// An optional `type` argument can be provided to control the debug type. The
+/// default type is DEBUG_TYPE. The `type` argument can be a literal string, or
+/// a macro that evaluates to a string.
+///
+/// E.g.,
+///   LDBG(2) << "Bitset contains: " << Bitset;
+///   LDBG("debug_type") << "Bitset contains: " << Bitset;
+///   LDBG("debug_type", 2) << "Bitset contains: " << Bitset;
 #define LDBG(...) _GET_LDBG_MACRO(__VA_ARGS__)(__VA_ARGS__)
 
-// Helper macros to choose the correct macro based on the number of arguments.
-#define LDBG_FUNC_CHOOSER(_f1, _f2, _f3, ...) _f3
-#define LDBG_FUNC_RECOMPOSER(argsWithParentheses)                              \
-  LDBG_FUNC_CHOOSER argsWithParentheses
-#define LDBG_CHOOSE_FROM_ARG_COUNT(...)                                        \
-  LDBG_FUNC_RECOMPOSER(                                                        \
-      (__VA_ARGS__, LDBG_LOG_LEVEL_WITH_TYPE, LDBG_LOG_LEVEL, ))
-#define LDBG_NO_ARG_EXPANDER() , , LDBG_LOG_LEVEL_1
-#define _GET_LDBG_MACRO(...)                                                   \
-  LDBG_CHOOSE_FROM_ARG_COUNT(LDBG_NO_ARG_EXPANDER __VA_ARGS__())
-
-// Dispatch macros to support the `level` argument or none (default to 1)
-#define LDBG_LOG_LEVEL(LEVEL)                                                  \
-  DEBUGLOG_WITH_STREAM_AND_TYPE(llvm::dbgs(), LEVEL, DEBUG_TYPE)
-#define LDBG_LOG_LEVEL_1() LDBG_LOG_LEVEL(1)
-// This macro is a helper when LDBG() is called with 2 arguments.
-// In this case we want to allow the order of the arguments to be swapped.
-// We rely on the fact that the `level` argument is an integer, and the `type`
-// is a string and dispatch to a C++ API that is overloaded.
-#define LDBG_LOG_LEVEL_WITH_TYPE(LEVEL_OR_TYPE, TYPE_OR_LEVEL)                 \
-  DEBUGLOG_WITH_STREAM_AND_TYPE(llvm::dbgs(), (LEVEL_OR_TYPE), (TYPE_OR_LEVEL))
+/// LDBG_OS() is a macro that behaves like LDBG() but instead of directly using
+/// it to stream the output, it takes a callback function that will be called
+/// with a raw_ostream.
+/// This is useful when you need to pass a `raw_ostream` to a helper function to
+/// be able to print (when the `<<` operator is not available).
+///
+/// E.g.,
+///   LDBG_OS([&] (raw_ostream &Os) {
+///     Os << "Pass Manager contains: ";
+///     pm.printAsTextual(Os);
+///   });
+///
+/// Just like LDBG(), it optionally accepts a `level` and `type` arguments.
+/// E.g.,
+///   LDBG_OS(2, [&] (raw_ostream &Os) { ... });
+///   LDBG_OS("debug_type", [&] (raw_ostream &Os) { ... });
+///   LDBG_OS("debug_type", 2, [&] (raw_ostream &Os) { ... });
+///
+#define LDBG_OS(...) _GET_LDBG_OS_MACRO(__VA_ARGS__)(__VA_ARGS__)
 
 // We want the filename without the full path. We are using the __FILE__ macro
 // and a constexpr function to strip the path prefix. We can avoid the frontend
@@ -76,22 +79,168 @@ namespace llvm {
 #define __LLVM_FILE_NAME__ ::llvm::impl::getShortFileName(__FILE__)
 #endif
 
-#define DEBUGLOG_WITH_STREAM_TYPE_FILE_AND_LINE(STREAM, LEVEL, TYPE, FILE,     \
-                                                LINE)                          \
-  for (bool _c =                                                               \
-           (::llvm::DebugFlag && ::llvm::isCurrentDebugType(TYPE, LEVEL));     \
+// Everything below are implementation details of the macros above.
+namespace impl {
+
+/// This macro expands to the stream to use for output, we use a macro to allow
+/// unit-testing to override.
+#define LDBG_STREAM ::llvm::dbgs()
+
+// ----------------------------------------------------------------------------
+// LDBG() implementation
+// ----------------------------------------------------------------------------
+
+// Helper macros to choose the correct LDBG() macro based on the number of
+// arguments.
+#define LDBG_FUNC_CHOOSER(_f1, _f2, _f3, ...) _f3
+#define LDBG_FUNC_RECOMPOSER(argsWithParentheses)                              \
+  LDBG_FUNC_CHOOSER argsWithParentheses
+#define LDBG_CHOOSE_FROM_ARG_COUNT(...)                                        \
+  LDBG_FUNC_RECOMPOSER((__VA_ARGS__, LDBG_TYPE_AND_LEVEL, LDBG_LEVEL_OR_TYPE, ))
+#define LDBG_NO_ARG_EXPANDER() , , LDBG_NO_ARG
+#define _GET_LDBG_MACRO(...)                                                   \
+  LDBG_CHOOSE_FROM_ARG_COUNT(LDBG_NO_ARG_EXPANDER __VA_ARGS__())
+
+/// This macro is the core of the LDBG() implementation. It is used to print the
+/// debug output with the given stream, level, type, file, and line number.
+#define LDBG_STREAM_LEVEL_TYPE_FILE_AND_LINE(STREAM, LEVEL_OR_TYPE,            \
+                                             TYPE_OR_LEVEL, FILE, LINE)        \
+  for (bool _c = ::llvm::DebugFlag && ::llvm::impl::ldbgIsCurrentDebugType(    \
+                                          TYPE_OR_LEVEL, LEVEL_OR_TYPE);       \
        _c; _c = false)                                                         \
-    for (::llvm::impl::raw_ldbg_ostream LdbgOS{                                \
-             ::llvm::impl::computePrefix(TYPE, FILE, LINE, LEVEL), (STREAM)};  \
-         _c; _c = false)                                                       \
-  ::llvm::impl::RAIINewLineStream{LdbgOS}.asLvalue()
+  ::llvm::impl::raw_ldbg_ostream{                                              \
+      ::llvm::impl::computePrefix(TYPE_OR_LEVEL, FILE, LINE, LEVEL_OR_TYPE),   \
+      (STREAM), /*ShouldPrefixNextString=*/true,                               \
+      /*ShouldEmitNewLineOnDestruction=*/true}                                 \
+      .asLvalue()
 
-#define DEBUGLOG_WITH_STREAM_TYPE_AND_FILE(STREAM, LEVEL, TYPE, FILE)          \
-  DEBUGLOG_WITH_STREAM_TYPE_FILE_AND_LINE(STREAM, LEVEL, TYPE, FILE, __LINE__)
-#define DEBUGLOG_WITH_STREAM_AND_TYPE(STREAM, LEVEL, TYPE)                     \
-  DEBUGLOG_WITH_STREAM_TYPE_AND_FILE(STREAM, LEVEL, TYPE, __LLVM_FILE_NAME__)
+/// These macros are helpers to implement LDBG() with an increasing amount of
+/// optional arguments made explicit.
+#define LDBG_STREAM_LEVEL_TYPE_AND_FILE(STREAM, LEVEL_OR_TYPE, TYPE_OR_LEVEL,  \
+                                        FILE)                                  \
+  LDBG_STREAM_LEVEL_TYPE_FILE_AND_LINE(STREAM, LEVEL_OR_TYPE, TYPE_OR_LEVEL,   \
+                                       FILE, __LINE__)
+#define LDGB_STREAM_LEVEL_AND_TYPE(STREAM, LEVEL_OR_TYPE, TYPE_OR_LEVEL)       \
+  LDBG_STREAM_LEVEL_TYPE_AND_FILE(STREAM, LEVEL_OR_TYPE, TYPE_OR_LEVEL,        \
+                                  __LLVM_FILE_NAME__)
+/// This macro is a helper when LDBG() is called with 2 arguments.
+/// In this case we want to force the first argument to be the type for
+/// consistency in the codebase.
+/// We trick this by casting the first argument to a (const char *) which
+/// won't compile with an int.
+#define LDBG_TYPE_AND_LEVEL(TYPE, LEVEL)                                       \
+  LDGB_STREAM_LEVEL_AND_TYPE(LDBG_STREAM, static_cast<const char *>(TYPE),     \
+                             (LEVEL))
 
-namespace impl {
+/// When a single argument is provided. This can be either a level or the debug
+/// type. If a level is provided, we default the debug type to DEBUG_TYPE, if a
+/// string is provided, we default the level to 1.
+#define LDBG_LEVEL_OR_TYPE(LEVEL_OR_TYPE)                                      \
+  LDGB_STREAM_LEVEL_AND_TYPE(LDBG_STREAM, (LEVEL_OR_TYPE),                     \
+                             LDBG_GET_DEFAULT_TYPE_OR_LEVEL(LEVEL_OR_TYPE))
+#define LDBG_NO_ARG() LDBG_LEVEL_OR_TYPE(1)
+
+// ----------------------------------------------------------------------------
+// LDBG_OS() implementation
+// ----------------------------------------------------------------------------
+
+// Helper macros to choose the correct LDBG_OS() macro based on the number of
+// arguments.
+#define LDBG_OS_FUNC_CHOOSER(_f1, _f2, _f3, _f4, ...) _f4
+#define LDBG_OS_FUNC_RECOMPOSER(argsWithParentheses)                           \
+  LDBG_OS_FUNC_CHOOSER argsWithParentheses
+#define LDBG_OS_CHOOSE_FROM_ARG_COUNT(...)                                     \
+  LDBG_OS_FUNC_RECOMPOSER((__VA_ARGS__, LDBG_OS_TYPE_AND_LEVEL_AND_CALLBACK,   \
+                           LDBG_OS_LEVEL_OR_TYPE_AND_CALLBACK,                 \
+                           LDBG_OS_CALLBACK, ))
+#define LDBG_OS_NO_ARG_EXPANDER() , , , LDBG_OS_CALLBACK
+#define _GET_LDBG_OS_MACRO(...)                                                \
+  LDBG_OS_CHOOSE_FROM_ARG_COUNT(LDBG_OS_NO_ARG_EXPANDER __VA_ARGS__())
+
+/// This macro is the core of the LDBG_OS() macros. It is used to print the
+/// debug output with the given stream, level, type, file, and line number.
+#define LDBG_OS_IMPL(TYPE_OR_LEVEL, LEVEL_OR_TYPE, CALLBACK, STREAM, FILE,     \
+                     LINE)                                                     \
+  if (::llvm::DebugFlag &&                                                     \
+      ::llvm::impl::ldbgIsCurrentDebugType(TYPE_OR_LEVEL, LEVEL_OR_TYPE)) {    \
+    ::llvm::impl::raw_ldbg_ostream LdbgOS{                                     \
+        ::llvm::impl::computePrefix(TYPE_OR_LEVEL, FILE, LINE, LEVEL_OR_TYPE), \
+        (STREAM), /*ShouldPrefixNextString=*/true,                             \
+        /*ShouldEmitNewLineOnDestruction=*/true};                              \
+    CALLBACK(LdbgOS);                                                          \
+  }
+
+#define LDBG_OS_TYPE_AND_LEVEL_AND_CALLBACK(TYPE, LEVEL, CALLBACK)             \
+  LDBG_OS_IMPL(static_cast<const char *>(TYPE), LEVEL, CALLBACK, LDBG_STREAM,  \
+               __LLVM_FILE_NAME__, __LINE__)
+#define LDBG_OS_LEVEL_OR_TYPE_AND_CALLBACK(LEVEL_OR_TYPE, CALLBACK)            \
+  LDBG_OS_IMPL(LDBG_GET_DEFAULT_TYPE_OR_LEVEL(LEVEL_OR_TYPE), LEVEL_OR_TYPE,   \
+               CALLBACK, LDBG_STREAM, __LLVM_FILE_NAME__, __LINE__)
+#define LDBG_OS_CALLBACK(CALLBACK)                                             \
+  LDBG_OS_LEVEL_OR_TYPE_AND_CALLBACK(1, CALLBACK)
+
+// ----------------------------------------------------------------------------
+// General Helpers for the implementation above
+// ----------------------------------------------------------------------------
+
+/// Return the stringified macro as a StringRef.
+/// Also, strip out potential surrounding quotes: this comes from an artifact of
+/// the macro stringification, if DEBUG_TYPE is undefined we get the string
+/// "DEBUG_TYPE", however if it is defined we get the string with the quotes.
+/// For example if DEBUG_TYPE is "foo", we get "\"foo\"" but we want to return
+/// "foo" here.
+constexpr ::llvm::StringRef strip_quotes(const char *Str) {
+  ::llvm::StringRef S(Str);
+  if (Str[0] == '"' && Str[S.size() - 1] == '"')
+    return StringRef(Str + 1, S.size() - 2);
+  return S;
+}
+
+/// Fail compilation if DEBUG_TYPE is not defined.
+/// This is a workaround for GCC <=12 and clang <=16 which do not support
+/// static_assert in templated constexpr functions.
+#if (defined(__GNUC__) && !defined(__clang__) && __GNUC__ <= 12) ||            \
+    (defined(__clang__) && __clang_major__ <= 16)
+#define MISSING_DEBUG_TYPE()                                                   \
+  extern void missing_DEBUG_TYPE(void);                                        \
+  missing_DEBUG_TYPE();
+#else
+#define MISSING_DEBUG_TYPE() static_assert(false, "DEBUG_TYPE is not defined");
+#endif
+
+/// Helper to provide the default level (=1) or type (=DEBUG_TYPE). This is used
+/// when a single argument is passed to LDBG() (or LDBG_OS()), if it is an
+/// integer we return DEBUG_TYPE and if it is a string we return 1. This fails
+/// with a static_assert if we pass an integer and DEBUG_TYPE is not defined.
+#define LDBG_GET_DEFAULT_TYPE_OR_LEVEL(LEVEL_OR_TYPE)                          \
+  [](auto LevelOrType) {                                                       \
+    if constexpr (std::is_integral_v<decltype(LevelOrType)>) {                 \
+      constexpr const char *DebugType = LDBG_GET_DEBUG_TYPE_STR();             \
+      if constexpr (DebugType[0] == '"') {                                     \
+        return ::llvm::impl::strip_quotes(DebugType);                          \
+      } else {                                                                 \
+        MISSING_DEBUG_TYPE();                                                  \
+      }                                                                        \
+    } else {                                                                   \
+      return 1;                                                                \
+    }                                                                          \
+  }(LEVEL_OR_TYPE)
+
+/// Helpers to get DEBUG_TYPE as a StringRef, even when DEBUG_TYPE is not
+/// defined (in which case it expands to "DEBUG_TYPE")
+#define LDBG_GET_DEBUG_TYPE_STR__(X) #X
+#define LDBG_GET_DEBUG_TYPE_STR_(X) LDBG_GET_DEBUG_TYPE_STR__(X)
+#define LDBG_GET_DEBUG_TYPE_STR() LDBG_GET_DEBUG_TYPE_STR_(DEBUG_TYPE)
+
+/// Helper to call isCurrentDebugType with a StringRef.
+static LLVM_ATTRIBUTE_UNUSED bool ldbgIsCurrentDebugType(StringRef Type,
+                                                         int Level) {
+  return ::llvm::isCurrentDebugType(Type.str().c_str(), Level);
+}
+static LLVM_ATTRIBUTE_UNUSED bool ldbgIsCurrentDebugType(int Level,
+                                                         StringRef Type) {
+  return ::llvm::isCurrentDebugType(Type.str().c_str(), Level);
+}
 
 /// A raw_ostream that tracks `\n` and print the prefix after each
 /// newline.
@@ -99,6 +248,7 @@ class LLVM_ABI raw_ldbg_ostream final : public raw_ostream {
   std::string Prefix;
   raw_ostream &Os;
   bool ShouldPrefixNextString;
+  bool ShouldEmitNewLineOnDestruction;
 
   /// Split the line on newlines and insert the prefix before each
   /// newline. Forward everything to the underlying stream.
@@ -131,12 +281,17 @@ class LLVM_ABI raw_ldbg_ostream final : public raw_ostream {
 
 public:
   explicit raw_ldbg_ostream(std::string Prefix, raw_ostream &Os,
-                            bool ShouldPrefixNextString = true)
+                            bool ShouldPrefixNextString = true,
+                            bool ShouldEmitNewLineOnDestruction = false)
       : Prefix(std::move(Prefix)), Os(Os),
-        ShouldPrefixNextString(ShouldPrefixNextString) {
+        ShouldPrefixNextString(ShouldPrefixNextString),
+        ShouldEmitNewLineOnDestruction(ShouldEmitNewLineOnDestruction) {
     SetUnbuffered();
   }
-  ~raw_ldbg_ostream() final {}
+  ~raw_ldbg_ostream() final {
+    if (ShouldEmitNewLineOnDestruction)
+      Os << '\n';
+  }
 
   /// Forward the current_pos method to the underlying stream.
   uint64_t current_pos() const final { return Os.tell(); }
@@ -173,17 +328,17 @@ getShortFileName(const char *path) {
 /// "[DebugType] File:Line "
 /// Where the File is the file name without the path prefix.
 static LLVM_ATTRIBUTE_UNUSED std::string
-computePrefix(const char *DebugType, const char *File, int Line, int Level) {
+computePrefix(StringRef DebugType, const char *File, int Line, int Level) {
   std::string Prefix;
   raw_string_ostream OsPrefix(Prefix);
-  if (DebugType)
+  if (!DebugType.empty())
     OsPrefix << "[" << DebugType << ":" << Level << "] ";
   OsPrefix << File << ":" << Line << " ";
   return OsPrefix.str();
 }
 /// Overload allowing to swap the order of the DebugType and Level arguments.
 static LLVM_ATTRIBUTE_UNUSED std::string
-computePrefix(int Level, const char *File, int Line, const char *DebugType) {
+computePrefix(int Level, const char *File, int Line, StringRef DebugType) {
   return computePrefix(DebugType, File, Line, Level);
 }
 
@@ -194,6 +349,7 @@ computePrefix(int Level, const char *File, int Line, const char *DebugType) {
 #define LDBG(...)                                                              \
   for (bool _c = false; _c; _c = false)                                        \
   ::llvm::nulls()
+#define LDBG_OS(...)
 #endif
 } // end namespace llvm
 
diff --git a/llvm/include/llvm/Support/Endian.h b/llvm/include/llvm/Support/Endian.h
index 02a3194e..7eb1d7e8 100644
--- a/llvm/include/llvm/Support/Endian.h
+++ b/llvm/include/llvm/Support/Endian.h
@@ -96,9 +96,8 @@ inline void write(void *memory, value_type value, endianness endian) {
          &value, sizeof(value_type));
 }
 
-template<typename value_type,
-         endianness endian,
-         std::size_t alignment>
+template <typename value_type, endianness endian, std::size_t alignment>
+LLVM_DEPRECATED("Pass endian as a function argument instead", "write")
 inline void write(void *memory, value_type value) {
   write<value_type, alignment>(memory, value, endian);
 }
@@ -163,7 +162,7 @@ inline void writeAtBitAlignment(void *memory, value_type value,
                                 uint64_t startBit) {
   assert(startBit < 8);
   if (startBit == 0)
-    write<value_type, endian, alignment>(memory, value);
+    write<value_type, alignment>(memory, value, endian);
   else {
     // Read two values and shift the result into them.
     value_type val[2];
@@ -230,8 +229,8 @@ struct packed_endian_specific_integral {
   operator value_type() const { return value(); }
 
   void operator=(value_type newValue) {
-    endian::write<value_type, endian, alignment>(
-      (void*)Value.buffer, newValue);
+    endian::write<value_type, alignment>((void *)Value.buffer, newValue,
+                                         endian);
   }
 
   packed_endian_specific_integral &operator+=(value_type newValue) {
@@ -268,7 +267,7 @@ public:
     }
 
     void operator=(value_type NewValue) {
-      endian::write<value_type, endian, alignment>(Ptr, NewValue);
+      endian::write<value_type, alignment>(Ptr, NewValue, endian);
     }
 
   private:
diff --git a/llvm/include/llvm/Support/HashBuilder.h b/llvm/include/llvm/Support/HashBuilder.h
index 0971108..17fbc3f 100644
--- a/llvm/include/llvm/Support/HashBuilder.h
+++ b/llvm/include/llvm/Support/HashBuilder.h
@@ -366,18 +366,16 @@ private:
   HashBuilder &addRangeElementsImpl(ForwardIteratorT First,
                                     ForwardIteratorT Last,
                                     std::forward_iterator_tag) {
-    for (auto It = First; It != Last; ++It)
-      add(*It);
-    return *this;
-  }
-
-  template <typename T>
-  std::enable_if_t<hashbuilder_detail::IsHashableData<T>::value &&
-                       Endianness == llvm::endianness::native,
-                   HashBuilder &>
-  addRangeElementsImpl(T *First, T *Last, std::forward_iterator_tag) {
-    this->update(ArrayRef(reinterpret_cast<const uint8_t *>(First),
-                          (Last - First) * sizeof(T)));
+    using T = typename std::iterator_traits<ForwardIteratorT>::value_type;
+    if constexpr (std::is_pointer_v<ForwardIteratorT> &&
+                  hashbuilder_detail::IsHashableData<T>::value &&
+                  Endianness == llvm::endianness::native) {
+      this->update(ArrayRef(reinterpret_cast<const uint8_t *>(First),
+                            (Last - First) * sizeof(T)));
+    } else {
+      for (auto It = First; It != Last; ++It)
+        add(*It);
+    }
     return *this;
   }
 };
diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index ebe329a..7bff13d 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -6474,6 +6474,10 @@ Value *llvm::simplifyBinaryIntrinsic(Intrinsic::ID IID, Type *ReturnType,
                                      const CallBase *Call) {
   unsigned BitWidth = ReturnType->getScalarSizeInBits();
   switch (IID) {
+  case Intrinsic::get_active_lane_mask:
+    if (match(Op1, m_Zero()))
+      return ConstantInt::getFalse(ReturnType);
+    break;
   case Intrinsic::abs:
     // abs(abs(x)) -> abs(x). We don't need to worry about the nsw arg here.
     // It is always ok to pick the earlier abs. We'll just lose nsw if its only
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index ebb8630..a1703a2 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -3217,26 +3217,18 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
       }
 
       // Try to fold (C1 * D /u C2) -> C1/C2 * D, if C1 and C2 are powers-of-2,
-      // D is a multiple of C2, and C1 is a multiple of C2. If C2 is a multiple
-      // of C1, fold to (D /u (C2 /u C1)).
+      // D is a multiple of C2, and C1 is a multiple of C2.
       const SCEV *D;
       APInt C1V = LHSC->getAPInt();
-      // (C1 * D /u C2) == -1 * -C1 * D /u C2 when C1 != INT_MIN. Don't treat -1
-      // as -1 * 1, as it won't enable additional folds.
-      if (C1V.isNegative() && !C1V.isMinSignedValue() && !C1V.isAllOnes())
+      // (C1 * D /u C2) == -1 * -C1 * D /u C2 when C1 != INT_MIN.
+      if (C1V.isNegative() && !C1V.isMinSignedValue())
         C1V = C1V.abs();
       const SCEVConstant *C2;
       if (C1V.isPowerOf2() &&
           match(Ops[1], m_scev_UDiv(m_SCEV(D), m_SCEVConstant(C2))) &&
-          C2->getAPInt().isPowerOf2() &&
+          C2->getAPInt().isPowerOf2() && C1V.uge(C2->getAPInt()) &&
           C1V.logBase2() <= getMinTrailingZeros(D)) {
-        const SCEV *NewMul;
-        if (C1V.uge(C2->getAPInt())) {
-          NewMul = getMulExpr(getUDivExpr(getConstant(C1V), C2), D);
-        } else {
-          assert(C1V.ugt(1) && "C1 <= 1 should have been folded earlier");
-          NewMul = getUDivExpr(D, getUDivExpr(C2, getConstant(C1V)));
-        }
+        const SCEV *NewMul = getMulExpr(getUDivExpr(getConstant(C1V), C2), D);
         return C1V == LHSC->getAPInt() ? NewMul : getNegativeSCEV(NewMul);
       }
     }
@@ -15457,6 +15449,12 @@ void ScalarEvolution::LoopGuards::collectFromPHI(
     const BasicBlock *InBlock = Phi.getIncomingBlock(IncomingIdx);
     if (!VisitedBlocks.insert(InBlock).second)
       return {nullptr, scCouldNotCompute};
+
+    // Avoid analyzing unreachable blocks so that we don't get trapped
+    // traversing cycles with ill-formed dominance or infinite cycles
+    if (!SE.DT.isReachableFromEntry(InBlock))
+      return {nullptr, scCouldNotCompute};
+
     auto [G, Inserted] = IncomingGuards.try_emplace(InBlock, LoopGuards(SE));
     if (Inserted)
       collectFromBlock(SE, G->second, Phi.getParent(), InBlock, VisitedBlocks,
@@ -15511,6 +15509,9 @@ void ScalarEvolution::LoopGuards::collectFromBlock(
     ScalarEvolution &SE, ScalarEvolution::LoopGuards &Guards,
     const BasicBlock *Block, const BasicBlock *Pred,
     SmallPtrSetImpl<const BasicBlock *> &VisitedBlocks, unsigned Depth) {
+
+  assert(SE.DT.isReachableFromEntry(Block) && SE.DT.isReachableFromEntry(Pred));
+
   SmallVector<const SCEV *> ExprsToRewrite;
   auto CollectCondition = [&](ICmpInst::Predicate Predicate, const SCEV *LHS,
                               const SCEV *RHS,
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 899806b..09b50c5 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -1425,6 +1425,10 @@ bool TargetTransformInfo::preferEpilogueVectorization() const {
   return TTIImpl->preferEpilogueVectorization();
 }
 
+bool TargetTransformInfo::shouldConsiderVectorizationRegPressure() const {
+  return TTIImpl->shouldConsiderVectorizationRegPressure();
+}
+
 TargetTransformInfo::VPLegalization
 TargetTransformInfo::getVPLegalizationStrategy(const VPIntrinsic &VPI) const {
   return TTIImpl->getVPLegalizationStrategy(VPI);
diff --git a/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index 755be08..e0f80b0 100644
--- a/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -395,7 +395,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(
     // Note register reference...
     const TargetRegisterClass *RC = nullptr;
     if (i < MI.getDesc().getNumOperands())
-      RC = TII->getRegClass(MI.getDesc(), i, TRI, MF);
+      RC = TII->getRegClass(MI.getDesc(), i, TRI);
     AggressiveAntiDepState::RegisterReference RR = { &MO, RC };
     RegRefs.emplace(Reg.asMCReg(), RR);
   }
@@ -479,7 +479,7 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr &MI,
     // Note register reference...
     const TargetRegisterClass *RC = nullptr;
     if (i < MI.getDesc().getNumOperands())
-      RC = TII->getRegClass(MI.getDesc(), i, TRI, MF);
+      RC = TII->getRegClass(MI.getDesc(), i, TRI);
     AggressiveAntiDepState::RegisterReference RR = { &MO, RC };
     RegRefs.emplace(Reg.asMCReg(), RR);
   }
diff --git a/llvm/lib/CodeGen/BreakFalseDeps.cpp b/llvm/lib/CodeGen/BreakFalseDeps.cpp
index 7eef4a9..205020a 100644
--- a/llvm/lib/CodeGen/BreakFalseDeps.cpp
+++ b/llvm/lib/CodeGen/BreakFalseDeps.cpp
@@ -133,8 +133,7 @@ bool BreakFalseDeps::pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx,
   }
 
   // Get the undef operand's register class
-  const TargetRegisterClass *OpRC =
-    TII->getRegClass(MI->getDesc(), OpIdx, TRI, *MF);
+  const TargetRegisterClass *OpRC = TII->getRegClass(MI->getDesc(), OpIdx, TRI);
   assert(OpRC && "Not a valid register class");
 
   // If the instruction has a true dependency, we can hide the false depdency
diff --git a/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp b/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
index e8581f63..f873616 100644
--- a/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -187,7 +187,7 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr &MI) {
     const TargetRegisterClass *NewRC = nullptr;
 
     if (i < MI.getDesc().getNumOperands())
-      NewRC = TII->getRegClass(MI.getDesc(), i, TRI, MF);
+      NewRC = TII->getRegClass(MI.getDesc(), i, TRI);
 
     // For now, only allow the register to be changed if its register
     // class is consistent across all uses.
@@ -316,7 +316,7 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr &MI, unsigned Count) {
 
     const TargetRegisterClass *NewRC = nullptr;
     if (i < MI.getDesc().getNumOperands())
-      NewRC = TII->getRegClass(MI.getDesc(), i, TRI, MF);
+      NewRC = TII->getRegClass(MI.getDesc(), i, TRI);
 
     // For now, only allow the register to be changed if its register
     // class is consistent across all uses.
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 541269a..768e3713 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -1863,7 +1863,7 @@ bool IRTranslator::translateVectorDeinterleave2Intrinsic(
 void IRTranslator::getStackGuard(Register DstReg,
                                  MachineIRBuilder &MIRBuilder) {
   const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
-  MRI->setRegClass(DstReg, TRI->getPointerRegClass(*MF));
+  MRI->setRegClass(DstReg, TRI->getPointerRegClass());
   auto MIB =
       MIRBuilder.buildInstr(TargetOpcode::LOAD_STACK_GUARD, {DstReg}, {});
 
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index 1ccc549..055fdc6 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -114,7 +114,7 @@ Register llvm::constrainOperandRegClass(
   // Assume physical registers are properly constrained.
   assert(Reg.isVirtual() && "PhysReg not implemented");
 
-  const TargetRegisterClass *OpRC = TII.getRegClass(II, OpIdx, &TRI, MF);
+  const TargetRegisterClass *OpRC = TII.getRegClass(II, OpIdx, &TRI);
   // Some of the target independent instructions, like COPY, may not impose any
   // register class constraints on some of their operands: If it's a use, we can
   // skip constraining as the instruction defining the register would constrain
diff --git a/llvm/lib/CodeGen/InitUndef.cpp b/llvm/lib/CodeGen/InitUndef.cpp
index 500a73b..e07e598 100644
--- a/llvm/lib/CodeGen/InitUndef.cpp
+++ b/llvm/lib/CodeGen/InitUndef.cpp
@@ -232,7 +232,7 @@ bool InitUndef::processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB,
       MachineOperand &UseMO = MI.getOperand(UseOpIdx);
       if (UseMO.getReg() == MCRegister::NoRegister) {
         const TargetRegisterClass *RC =
-            TII->getRegClass(MI.getDesc(), UseOpIdx, TRI, MF);
+            TII->getRegClass(MI.getDesc(), UseOpIdx, TRI);
         Register NewDest = MRI->createVirtualRegister(RC);
         // We don't have a way to update dead lanes, so keep track of the
         // new register so that we avoid querying it later.
diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp
index 79047f7..2c06c5a 100644
--- a/llvm/lib/CodeGen/MachineInstr.cpp
+++ b/llvm/lib/CodeGen/MachineInstr.cpp
@@ -976,11 +976,9 @@ MachineInstr::getRegClassConstraint(unsigned OpIdx,
                                     const TargetRegisterInfo *TRI) const {
   assert(getParent() && "Can't have an MBB reference here!");
   assert(getMF() && "Can't have an MF reference here!");
-  const MachineFunction &MF = *getMF();
-
   // Most opcodes have fixed constraints in their MCInstrDesc.
   if (!isInlineAsm())
-    return TII->getRegClass(getDesc(), OpIdx, TRI, MF);
+    return TII->getRegClass(getDesc(), OpIdx, TRI);
 
   if (!getOperand(OpIdx).isReg())
     return nullptr;
@@ -1003,7 +1001,7 @@ MachineInstr::getRegClassConstraint(unsigned OpIdx,
 
   // Assume that all registers in a memory operand are pointers.
   if (F.isMemKind())
-    return TRI->getPointerRegClass(MF);
+    return TRI->getPointerRegClass();
 
   return nullptr;
 }
diff --git a/llvm/lib/CodeGen/MachineLICM.cpp b/llvm/lib/CodeGen/MachineLICM.cpp
index 286fbfd..4f164e2 100644
--- a/llvm/lib/CodeGen/MachineLICM.cpp
+++ b/llvm/lib/CodeGen/MachineLICM.cpp
@@ -1420,7 +1420,7 @@ MachineInstr *MachineLICMImpl::ExtractHoistableLoad(MachineInstr *MI,
   if (NewOpc == 0) return nullptr;
   const MCInstrDesc &MID = TII->get(NewOpc);
   MachineFunction &MF = *MI->getMF();
-  const TargetRegisterClass *RC = TII->getRegClass(MID, LoadRegIndex, TRI, MF);
+  const TargetRegisterClass *RC = TII->getRegClass(MID, LoadRegIndex, TRI);
   // Ok, we're unfolding. Create a temporary register and do the unfold.
   Register Reg = MRI->createVirtualRegister(RC);
 
diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp
index 01703fe..2b24fe4 100644
--- a/llvm/lib/CodeGen/MachineVerifier.cpp
+++ b/llvm/lib/CodeGen/MachineVerifier.cpp
@@ -2636,7 +2636,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
       }
       if (MONum < MCID.getNumOperands()) {
         if (const TargetRegisterClass *DRC =
-              TII->getRegClass(MCID, MONum, TRI, *MF)) {
+                TII->getRegClass(MCID, MONum, TRI)) {
           if (!DRC->contains(Reg)) {
             report("Illegal physical register for instruction", MO, MONum);
             OS << printReg(Reg, TRI) << " is not a "
@@ -2721,11 +2721,11 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
         // comply to it.
         if (!isPreISelGenericOpcode(MCID.getOpcode()) &&
             MONum < MCID.getNumOperands() &&
-            TII->getRegClass(MCID, MONum, TRI, *MF)) {
+            TII->getRegClass(MCID, MONum, TRI)) {
           report("Virtual register does not match instruction constraint", MO,
                  MONum);
           OS << "Expect register class "
-             << TRI->getRegClassName(TII->getRegClass(MCID, MONum, TRI, *MF))
+             << TRI->getRegClassName(TII->getRegClass(MCID, MONum, TRI))
              << " but got nothing\n";
           return;
         }
@@ -2752,7 +2752,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
       }
       if (MONum < MCID.getNumOperands()) {
         if (const TargetRegisterClass *DRC =
-              TII->getRegClass(MCID, MONum, TRI, *MF)) {
+                TII->getRegClass(MCID, MONum, TRI)) {
           if (SubIdx) {
             const TargetRegisterClass *SuperRC =
                 TRI->getLargestLegalSuperClass(RC, *MF);
diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp
index 514f2f0..b8486f6 100644
--- a/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -1374,7 +1374,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
   }
 
   const unsigned DefSubIdx = DefMI->getOperand(0).getSubReg();
-  const TargetRegisterClass *DefRC = TII->getRegClass(MCID, 0, TRI, *MF);
+  const TargetRegisterClass *DefRC = TII->getRegClass(MCID, 0, TRI);
   if (!DefMI->isImplicitDef()) {
     if (DstReg.isPhysical()) {
       Register NewDstReg = DstReg;
diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
index 9467ba1..851d445 100644
--- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -1964,8 +1964,7 @@ Register FastISel::createResultReg(const TargetRegisterClass *RC) {
 Register FastISel::constrainOperandRegClass(const MCInstrDesc &II, Register Op,
                                             unsigned OpNum) {
   if (Op.isVirtual()) {
-    const TargetRegisterClass *RegClass =
-        TII.getRegClass(II, OpNum, &TRI, *FuncInfo.MF);
+    const TargetRegisterClass *RegClass = TII.getRegClass(II, OpNum, &TRI);
     if (!MRI.constrainRegClass(Op, RegClass)) {
       // If it's not legal to COPY between the register classes, something
       // has gone very wrong before we got here.
diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 861f76e..11bc64c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -125,7 +125,7 @@ void InstrEmitter::EmitCopyFromReg(SDValue Op, bool IsClone, Register SrcReg,
           const TargetRegisterClass *RC = nullptr;
           if (i + II.getNumDefs() < II.getNumOperands()) {
             RC = TRI->getAllocatableClass(
-                TII->getRegClass(II, i + II.getNumDefs(), TRI, *MF));
+                TII->getRegClass(II, i + II.getNumDefs(), TRI));
           }
           if (!UseRC)
             UseRC = RC;
@@ -197,7 +197,7 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node,
     // register instead of creating a new vreg.
     Register VRBase;
     const TargetRegisterClass *RC =
-      TRI->getAllocatableClass(TII->getRegClass(II, i, TRI, *MF));
+        TRI->getAllocatableClass(TII->getRegClass(II, i, TRI));
     // Always let the value type influence the used register class. The
     // constraints on the instruction may be too lax to represent the value
     // type correctly. For example, a 64-bit float (X86::FR64) can't live in
@@ -330,7 +330,7 @@ InstrEmitter::AddRegisterOperand(MachineInstrBuilder &MIB,
   if (II) {
     const TargetRegisterClass *OpRC = nullptr;
     if (IIOpNum < II->getNumOperands())
-      OpRC = TII->getRegClass(*II, IIOpNum, TRI, *MF);
+      OpRC = TII->getRegClass(*II, IIOpNum, TRI);
 
     if (OpRC) {
       unsigned MinNumRegs = MinRCSize;
@@ -409,7 +409,7 @@ void InstrEmitter::AddOperand(MachineInstrBuilder &MIB, SDValue Op,
     Register VReg = R->getReg();
     MVT OpVT = Op.getSimpleValueType();
     const TargetRegisterClass *IIRC =
-        II ? TRI->getAllocatableClass(TII->getRegClass(*II, IIOpNum, TRI, *MF))
+        II ? TRI->getAllocatableClass(TII->getRegClass(*II, IIOpNum, TRI))
            : nullptr;
     const TargetRegisterClass *OpRC =
         TLI->isTypeLegal(OpVT)
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 9e85f08..87570e6 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -5254,9 +5254,9 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N,
     SDValue MulLo, MulHi;
     TLI.forceExpandWideMUL(DAG, dl, /*Signed=*/true, N->getOperand(0),
                            N->getOperand(1), MulLo, MulHi);
-    SDValue SRA =
-        DAG.getNode(ISD::SRA, dl, VT, MulLo,
-                    DAG.getConstant(VT.getScalarSizeInBits() - 1, dl, VT));
+    SDValue SRA = DAG.getNode(
+        ISD::SRA, dl, VT, MulLo,
+        DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, dl));
     SDValue Overflow =
         DAG.getSetCC(dl, N->getValueType(1), MulHi, SRA, ISD::SETNE);
     SplitInteger(MulLo, Lo, Hi);
diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index a570b71..f70b6cd 100644
--- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -340,7 +340,7 @@ static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos,
 
     unsigned Idx = RegDefPos.GetIdx();
     const MCInstrDesc &Desc = TII->get(Opcode);
-    const TargetRegisterClass *RC = TII->getRegClass(Desc, Idx, TRI, MF);
+    const TargetRegisterClass *RC = TII->getRegClass(Desc, Idx, TRI);
     assert(RC && "Not a valid register class");
     RegClass = RC->getID();
     // FIXME: Cost arbitrarily set to 1 because there doesn't seem to be a
diff --git a/llvm/lib/CodeGen/TailDuplicator.cpp b/llvm/lib/CodeGen/TailDuplicator.cpp
index 5d720fb..9b1420a 100644
--- a/llvm/lib/CodeGen/TailDuplicator.cpp
+++ b/llvm/lib/CodeGen/TailDuplicator.cpp
@@ -375,9 +375,14 @@ void TailDuplicator::processPHI(
   if (!Remove)
     return;
 
-  // Remove PredBB from the PHI node.
-  MI->removeOperand(SrcOpIdx + 1);
-  MI->removeOperand(SrcOpIdx);
+  // MI might have multiple entries for PredBB. Need to remove them all.
+  for (unsigned N = MI->getNumOperands(); N > 2; N -= 2) {
+    if (MI->getOperand(N - 1).getMBB() == PredBB) {
+      MI->removeOperand(N - 1);
+      MI->removeOperand(N - 2);
+    }
+  }
+
   if (MI->getNumOperands() == 1 && !TailBB->hasAddressTaken())
     MI->eraseFromParent();
   else if (MI->getNumOperands() == 1)
diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp
index 0d7b128..b0009560 100644
--- a/llvm/lib/CodeGen/TargetInstrInfo.cpp
+++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp
@@ -58,16 +58,15 @@ static cl::opt<unsigned int> MaxAccumulatorWidth(
 
 TargetInstrInfo::~TargetInstrInfo() = default;
 
-const TargetRegisterClass*
+const TargetRegisterClass *
 TargetInstrInfo::getRegClass(const MCInstrDesc &MCID, unsigned OpNum,
-                             const TargetRegisterInfo *TRI,
-                             const MachineFunction &MF) const {
+                             const TargetRegisterInfo *TRI) const {
   if (OpNum >= MCID.getNumOperands())
     return nullptr;
 
   short RegClass = MCID.operands()[OpNum].RegClass;
   if (MCID.operands()[OpNum].isLookupPtrRegClass())
-    return TRI->getPointerRegClass(MF, RegClass);
+    return TRI->getPointerRegClass(RegClass);
 
   // Instructions like INSERT_SUBREG do not have fixed register classes.
   if (RegClass < 0)
diff --git a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
index 8d94b40..414e414 100644
--- a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -1401,9 +1401,8 @@ bool TwoAddressInstructionImpl::tryInstructionTransform(
       if (UnfoldMCID.getNumDefs() == 1) {
         // Unfold the load.
         LLVM_DEBUG(dbgs() << "2addr:   UNFOLDING: " << MI);
-        const TargetRegisterClass *RC =
-          TRI->getAllocatableClass(
-            TII->getRegClass(UnfoldMCID, LoadRegIndex, TRI, *MF));
+        const TargetRegisterClass *RC = TRI->getAllocatableClass(
+            TII->getRegClass(UnfoldMCID, LoadRegIndex, TRI));
         Register Reg = MRI->createVirtualRegister(RC);
         SmallVector<MachineInstr *, 2> NewMIs;
         if (!TII->unfoldMemoryOperand(*MF, MI, Reg,
diff --git a/llvm/lib/IR/Instruction.cpp b/llvm/lib/IR/Instruction.cpp
index 5e87b5f..c1fafd7 100644
--- a/llvm/lib/IR/Instruction.cpp
+++ b/llvm/lib/IR/Instruction.cpp
@@ -553,16 +553,17 @@ void Instruction::dropUBImplyingAttrsAndUnknownMetadata(
 }
 
 void Instruction::dropUBImplyingAttrsAndMetadata(ArrayRef<unsigned> Keep) {
-  // !annotation metadata does not impact semantics.
+  // !annotation and !prof metadata does not impact semantics.
   // !range, !nonnull and !align produce poison, so they are safe to speculate.
   // !noundef and various AA metadata must be dropped, as it generally produces
   // immediate undefined behavior.
   static const unsigned KnownIDs[] = {
       LLVMContext::MD_annotation, LLVMContext::MD_range,
-      LLVMContext::MD_nonnull, LLVMContext::MD_align};
+      LLVMContext::MD_nonnull, LLVMContext::MD_align, LLVMContext::MD_prof};
   SmallVector<unsigned> KeepIDs;
   KeepIDs.reserve(Keep.size() + std::size(KnownIDs));
-  append_range(KeepIDs, KnownIDs);
+  append_range(KeepIDs, (!ProfcheckDisableMetadataFixes ? KnownIDs
+                                                        : drop_end(KnownIDs)));
   append_range(KeepIDs, Keep);
   dropUBImplyingAttrsAndUnknownMetadata(KeepIDs);
 }
diff --git a/llvm/lib/IR/Intrinsics.cpp b/llvm/lib/IR/Intrinsics.cpp
index 58a1f74..4d2e8fa 100644
--- a/llvm/lib/IR/Intrinsics.cpp
+++ b/llvm/lib/IR/Intrinsics.cpp
@@ -740,14 +740,6 @@ Intrinsic::ID Intrinsic::lookupIntrinsicID(StringRef Name) {
 #include "llvm/IR/IntrinsicImpl.inc"
 #undef GET_INTRINSIC_ATTRIBUTES
 
-AttributeSet Intrinsic::getFnAttributes(LLVMContext &C, ID id) {
-  if (id == 0)
-    return AttributeSet();
-  uint16_t PackedID = IntrinsicsToAttributesMap[id - 1];
-  uint8_t FnAttrID = PackedID >> 8;
-  return getIntrinsicFnAttributeSet(C, FnAttrID);
-}
-
 Function *Intrinsic::getOrInsertDeclaration(Module *M, ID id,
                                             ArrayRef<Type *> Tys) {
   // There can never be multiple globals with the same name of different types,
diff --git a/llvm/lib/MC/MCParser/AsmParser.cpp b/llvm/lib/MC/MCParser/AsmParser.cpp
index fb183a1..5fa1539 100644
--- a/llvm/lib/MC/MCParser/AsmParser.cpp
+++ b/llvm/lib/MC/MCParser/AsmParser.cpp
@@ -3876,20 +3876,15 @@ bool AsmParser::parseDirectiveCVLoc() {
 /// ::= .cv_linetable FunctionId, FnStart, FnEnd
 bool AsmParser::parseDirectiveCVLinetable() {
   int64_t FunctionId;
-  StringRef FnStartName, FnEndName;
+  MCSymbol *FnStartSym, *FnEndSym;
   SMLoc Loc = getTok().getLoc();
   if (parseCVFunctionId(FunctionId, ".cv_linetable") || parseComma() ||
       parseTokenLoc(Loc) ||
-      check(parseIdentifier(FnStartName), Loc,
-            "expected identifier in directive") ||
+      check(parseSymbol(FnStartSym), Loc, "expected identifier in directive") ||
       parseComma() || parseTokenLoc(Loc) ||
-      check(parseIdentifier(FnEndName), Loc,
-            "expected identifier in directive"))
+      check(parseSymbol(FnEndSym), Loc, "expected identifier in directive"))
     return true;
 
-  MCSymbol *FnStartSym = getContext().getOrCreateSymbol(FnStartName);
-  MCSymbol *FnEndSym = getContext().getOrCreateSymbol(FnEndName);
-
   getStreamer().emitCVLinetableDirective(FunctionId, FnStartSym, FnEndSym);
   return false;
 }
@@ -3898,7 +3893,7 @@ bool AsmParser::parseDirectiveCVLinetable() {
 /// ::= .cv_inline_linetable PrimaryFunctionId FileId LineNum FnStart FnEnd
 bool AsmParser::parseDirectiveCVInlineLinetable() {
   int64_t PrimaryFunctionId, SourceFileId, SourceLineNum;
-  StringRef FnStartName, FnEndName;
+  MCSymbol *FnStartSym, *FnEndSym;
   SMLoc Loc = getTok().getLoc();
   if (parseCVFunctionId(PrimaryFunctionId, ".cv_inline_linetable") ||
       parseTokenLoc(Loc) ||
@@ -3908,16 +3903,14 @@ bool AsmParser::parseDirectiveCVInlineLinetable() {
       parseIntToken(SourceLineNum, "expected SourceLineNum") ||
       check(SourceLineNum < 0, Loc, "Line number less than zero") ||
       parseTokenLoc(Loc) ||
-      check(parseIdentifier(FnStartName), Loc, "expected identifier") ||
+      check(parseSymbol(FnStartSym), Loc, "expected identifier") ||
       parseTokenLoc(Loc) ||
-      check(parseIdentifier(FnEndName), Loc, "expected identifier"))
+      check(parseSymbol(FnEndSym), Loc, "expected identifier"))
     return true;
 
   if (parseEOL())
     return true;
 
-  MCSymbol *FnStartSym = getContext().getOrCreateSymbol(FnStartName);
-  MCSymbol *FnEndSym = getContext().getOrCreateSymbol(FnEndName);
   getStreamer().emitCVInlineLinetableDirective(PrimaryFunctionId, SourceFileId,
                                                SourceLineNum, FnStartSym,
                                                FnEndSym);
@@ -3938,16 +3931,14 @@ bool AsmParser::parseDirectiveCVDefRange() {
   std::vector<std::pair<const MCSymbol *, const MCSymbol *>> Ranges;
   while (getLexer().is(AsmToken::Identifier)) {
     Loc = getLexer().getLoc();
-    StringRef GapStartName;
-    if (parseIdentifier(GapStartName))
+    MCSymbol *GapStartSym;
+    if (parseSymbol(GapStartSym))
       return Error(Loc, "expected identifier in directive");
-    MCSymbol *GapStartSym = getContext().getOrCreateSymbol(GapStartName);
 
     Loc = getLexer().getLoc();
-    StringRef GapEndName;
-    if (parseIdentifier(GapEndName))
+    MCSymbol *GapEndSym;
+    if (parseSymbol(GapEndSym))
       return Error(Loc, "expected identifier in directive");
-    MCSymbol *GapEndSym = getContext().getOrCreateSymbol(GapEndName);
 
     Ranges.push_back({GapStartSym, GapEndSym});
   }
@@ -4084,12 +4075,11 @@ bool AsmParser::parseDirectiveCVFileChecksumOffset() {
 /// ::= .cv_fpo_data procsym
 bool AsmParser::parseDirectiveCVFPOData() {
   SMLoc DirLoc = getLexer().getLoc();
-  StringRef ProcName;
-  if (parseIdentifier(ProcName))
+  MCSymbol *ProcSym;
+  if (parseSymbol(ProcSym))
     return TokError("expected symbol name");
   if (parseEOL())
     return true;
-  MCSymbol *ProcSym = getContext().getOrCreateSymbol(ProcName);
   getStreamer().emitCVFPOData(ProcSym, DirLoc);
   return false;
 }
@@ -4311,15 +4301,12 @@ bool AsmParser::parseDirectiveCFIPersonalityOrLsda(bool IsPersonality) {
   if (Encoding == dwarf::DW_EH_PE_omit)
     return false;
 
-  StringRef Name;
+  MCSymbol *Sym;
   if (check(!isValidEncoding(Encoding), "unsupported encoding.") ||
       parseComma() ||
-      check(parseIdentifier(Name), "expected identifier in directive") ||
-      parseEOL())
+      check(parseSymbol(Sym), "expected identifier in directive") || parseEOL())
     return true;
 
-  MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
-
   if (IsPersonality)
     getStreamer().emitCFIPersonality(Sym, Encoding);
   else
@@ -4920,13 +4907,10 @@ bool AsmParser::parseDirectiveComm(bool IsLocal) {
     return true;
 
   SMLoc IDLoc = getLexer().getLoc();
-  StringRef Name;
-  if (parseIdentifier(Name))
+  MCSymbol *Sym;
+  if (parseSymbol(Sym))
     return TokError("expected identifier in directive");
 
-  // Handle the identifier as the key symbol.
-  MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
-
   if (parseComma())
     return true;
 
@@ -5756,10 +5740,9 @@ bool AsmParser::parseDirectiveAddrsig() {
 }
 
 bool AsmParser::parseDirectiveAddrsigSym() {
-  StringRef Name;
-  if (check(parseIdentifier(Name), "expected identifier") || parseEOL())
+  MCSymbol *Sym;
+  if (check(parseSymbol(Sym), "expected identifier") || parseEOL())
     return true;
-  MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
   getStreamer().emitAddrsigSym(Sym);
   return false;
 }
diff --git a/llvm/lib/MC/MCParser/COFFAsmParser.cpp b/llvm/lib/MC/MCParser/COFFAsmParser.cpp
index 9fb1748..5dd7994 100644
--- a/llvm/lib/MC/MCParser/COFFAsmParser.cpp
+++ b/llvm/lib/MC/MCParser/COFFAsmParser.cpp
@@ -293,13 +293,11 @@ bool COFFAsmParser::parseDirectiveSymbolAttribute(StringRef Directive, SMLoc) {
   assert(Attr != MCSA_Invalid && "unexpected symbol attribute directive!");
   if (getLexer().isNot(AsmToken::EndOfStatement)) {
     while (true) {
-      StringRef Name;
+      MCSymbol *Sym;
 
-      if (getParser().parseIdentifier(Name))
+      if (getParser().parseSymbol(Sym))
         return TokError("expected identifier in directive");
 
-      MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
-
       getStreamer().emitSymbolAttribute(Sym, Attr);
 
       if (getLexer().is(AsmToken::EndOfStatement))
@@ -450,13 +448,11 @@ bool COFFAsmParser::parseDirectivePopSection(StringRef, SMLoc) {
 }
 
 bool COFFAsmParser::parseDirectiveDef(StringRef, SMLoc) {
-  StringRef SymbolName;
+  MCSymbol *Sym;
 
-  if (getParser().parseIdentifier(SymbolName))
+  if (getParser().parseSymbol(Sym))
     return TokError("expected identifier in directive");
 
-  MCSymbol *Sym = getContext().getOrCreateSymbol(SymbolName);
-
   getStreamer().beginCOFFSymbolDef(Sym);
 
   Lex();
@@ -496,8 +492,8 @@ bool COFFAsmParser::parseDirectiveEndef(StringRef, SMLoc) {
 }
 
 bool COFFAsmParser::parseDirectiveSecRel32(StringRef, SMLoc) {
-  StringRef SymbolID;
-  if (getParser().parseIdentifier(SymbolID))
+  MCSymbol *Symbol;
+  if (getParser().parseSymbol(Symbol))
     return TokError("expected identifier in directive");
 
   int64_t Offset = 0;
@@ -517,8 +513,6 @@ bool COFFAsmParser::parseDirectiveSecRel32(StringRef, SMLoc) {
         "invalid '.secrel32' directive offset, can't be less "
         "than zero or greater than std::numeric_limits<uint32_t>::max()");
 
-  MCSymbol *Symbol = getContext().getOrCreateSymbol(SymbolID);
-
   Lex();
   getStreamer().emitCOFFSecRel32(Symbol, Offset);
   return false;
@@ -526,8 +520,8 @@ bool COFFAsmParser::parseDirectiveSecRel32(StringRef, SMLoc) {
 
 bool COFFAsmParser::parseDirectiveRVA(StringRef, SMLoc) {
   auto parseOp = [&]() -> bool {
-    StringRef SymbolID;
-    if (getParser().parseIdentifier(SymbolID))
+    MCSymbol *Symbol;
+    if (getParser().parseSymbol(Symbol))
       return TokError("expected identifier in directive");
 
     int64_t Offset = 0;
@@ -544,8 +538,6 @@ bool COFFAsmParser::parseDirectiveRVA(StringRef, SMLoc) {
                               "than -2147483648 or greater than "
                               "2147483647");
 
-    MCSymbol *Symbol = getContext().getOrCreateSymbol(SymbolID);
-
     getStreamer().emitCOFFImgRel32(Symbol, Offset);
     return false;
   };
@@ -556,75 +548,65 @@ bool COFFAsmParser::parseDirectiveRVA(StringRef, SMLoc) {
 }
 
 bool COFFAsmParser::parseDirectiveSafeSEH(StringRef, SMLoc) {
-  StringRef SymbolID;
-  if (getParser().parseIdentifier(SymbolID))
+  MCSymbol *Symbol;
+  if (getParser().parseSymbol(Symbol))
     return TokError("expected identifier in directive");
 
   if (getLexer().isNot(AsmToken::EndOfStatement))
     return TokError("unexpected token in directive");
 
-  MCSymbol *Symbol = getContext().getOrCreateSymbol(SymbolID);
-
   Lex();
   getStreamer().emitCOFFSafeSEH(Symbol);
   return false;
 }
 
 bool COFFAsmParser::parseDirectiveSecIdx(StringRef, SMLoc) {
-  StringRef SymbolID;
-  if (getParser().parseIdentifier(SymbolID))
+  MCSymbol *Symbol;
+  if (getParser().parseSymbol(Symbol))
     return TokError("expected identifier in directive");
 
   if (getLexer().isNot(AsmToken::EndOfStatement))
     return TokError("unexpected token in directive");
 
-  MCSymbol *Symbol = getContext().getOrCreateSymbol(SymbolID);
-
   Lex();
   getStreamer().emitCOFFSectionIndex(Symbol);
   return false;
 }
 
 bool COFFAsmParser::parseDirectiveSymIdx(StringRef, SMLoc) {
-  StringRef SymbolID;
-  if (getParser().parseIdentifier(SymbolID))
+  MCSymbol *Symbol;
+  if (getParser().parseSymbol(Symbol))
     return TokError("expected identifier in directive");
 
   if (getLexer().isNot(AsmToken::EndOfStatement))
     return TokError("unexpected token in directive");
 
-  MCSymbol *Symbol = getContext().getOrCreateSymbol(SymbolID);
-
   Lex();
   getStreamer().emitCOFFSymbolIndex(Symbol);
   return false;
 }
 
 bool COFFAsmParser::parseDirectiveSecNum(StringRef, SMLoc) {
-  StringRef SymbolID;
-  if (getParser().parseIdentifier(SymbolID))
+  MCSymbol *Symbol;
+  if (getParser().parseSymbol(Symbol))
     return TokError("expected identifier in directive");
 
   if (getLexer().isNot(AsmToken::EndOfStatement))
     return TokError("unexpected token in directive");
 
-  MCSymbol *Symbol = getContext().getOrCreateSymbol(SymbolID);
-
   Lex();
   getStreamer().emitCOFFSecNumber(Symbol);
   return false;
 }
 
 bool COFFAsmParser::parseDirectiveSecOffset(StringRef, SMLoc) {
-  StringRef SymbolID;
-  if (getParser().parseIdentifier(SymbolID))
+  MCSymbol *Symbol;
+  if (getParser().parseSymbol(Symbol))
     return TokError("expected identifier in directive");
 
   if (getLexer().isNot(AsmToken::EndOfStatement))
     return TokError("unexpected token in directive");
 
-  MCSymbol *Symbol = getContext().getOrCreateSymbol(SymbolID);
-
   Lex();
   getStreamer().emitCOFFSecOffset(Symbol);
   return false;
@@ -679,15 +661,13 @@ bool COFFAsmParser::parseDirectiveLinkOnce(StringRef, SMLoc Loc) {
 }
 
 bool COFFAsmParser::parseSEHDirectiveStartProc(StringRef, SMLoc Loc) {
-  StringRef SymbolID;
-  if (getParser().parseIdentifier(SymbolID))
+  MCSymbol *Symbol;
+  if (getParser().parseSymbol(Symbol))
     return true;
 
   if (getLexer().isNot(AsmToken::EndOfStatement))
     return TokError("unexpected token in directive");
 
-  MCSymbol *Symbol = getContext().getOrCreateSymbol(SymbolID);
-
   Lex();
   getStreamer().emitWinCFIStartProc(Symbol, Loc);
   return false;
@@ -718,8 +698,8 @@ bool COFFAsmParser::parseSEHDirectiveEndChained(StringRef, SMLoc Loc) {
 }
 
 bool COFFAsmParser::parseSEHDirectiveHandler(StringRef, SMLoc Loc) {
-  StringRef SymbolID;
-  if (getParser().parseIdentifier(SymbolID))
+  MCSymbol *handler;
+  if (getParser().parseSymbol(handler))
     return true;
 
   if (getLexer().isNot(AsmToken::Comma))
@@ -736,8 +716,6 @@ bool COFFAsmParser::parseSEHDirectiveHandler(StringRef, SMLoc Loc) {
   if (getLexer().isNot(AsmToken::EndOfStatement))
     return TokError("unexpected token in directive");
 
-  MCSymbol *handler = getContext().getOrCreateSymbol(SymbolID);
-
   Lex();
   getStreamer().emitWinEHHandler(handler, unwind, except, Loc);
   return false;
diff --git a/llvm/lib/MC/MCParser/COFFMasmParser.cpp b/llvm/lib/MC/MCParser/COFFMasmParser.cpp
index 1bb617b..ef2815b 100644
--- a/llvm/lib/MC/MCParser/COFFMasmParser.cpp
+++ b/llvm/lib/MC/MCParser/COFFMasmParser.cpp
@@ -443,8 +443,8 @@ bool COFFMasmParser::parseDirectiveProc(StringRef Directive, SMLoc Loc) {
   if (!getStreamer().getCurrentFragment())
     return Error(getTok().getLoc(), "expected section directive");
 
-  StringRef Label;
-  if (getParser().parseIdentifier(Label))
+  MCSymbol *Sym;
+  if (getParser().parseSymbol(Sym))
     return Error(Loc, "expected identifier for procedure");
   if (getLexer().is(AsmToken::Identifier)) {
     StringRef nextVal = getTok().getString();
@@ -459,12 +459,12 @@ bool COFFMasmParser::parseDirectiveProc(StringRef Directive, SMLoc Loc) {
       nextLoc = getTok().getLoc();
     }
   }
-  auto *Sym =
-      static_cast<MCSymbolCOFF *>(getContext().getOrCreateSymbol(Label));
 
   // Define symbol as simple external function
-  Sym->setExternal(true);
-  Sym->setType(COFF::IMAGE_SYM_DTYPE_FUNCTION << COFF::SCT_COMPLEX_TYPE_SHIFT);
+  auto *COFFSym = static_cast<MCSymbolCOFF *>(Sym);
+  COFFSym->setExternal(true);
+  COFFSym->setType(COFF::IMAGE_SYM_DTYPE_FUNCTION
+                   << COFF::SCT_COMPLEX_TYPE_SHIFT);
 
   bool Framed = false;
   if (getLexer().is(AsmToken::Identifier) &&
@@ -475,7 +475,7 @@ bool COFFMasmParser::parseDirectiveProc(StringRef Directive, SMLoc Loc) {
   }
   getStreamer().emitLabel(Sym, Loc);
 
-  CurrentProcedures.push_back(Label);
+  CurrentProcedures.push_back(Sym->getName());
   CurrentProceduresFramed.push_back(Framed);
   return false;
 }
diff --git a/llvm/lib/MC/MCParser/DarwinAsmParser.cpp b/llvm/lib/MC/MCParser/DarwinAsmParser.cpp
index a9095b3..fceb718 100644
--- a/llvm/lib/MC/MCParser/DarwinAsmParser.cpp
+++ b/llvm/lib/MC/MCParser/DarwinAsmParser.cpp
@@ -501,13 +501,10 @@ bool DarwinAsmParser::parseSectionSwitch(StringRef Segment, StringRef Section,
 /// parseDirectiveAltEntry
 ///  ::= .alt_entry identifier
 bool DarwinAsmParser::parseDirectiveAltEntry(StringRef, SMLoc) {
-  StringRef Name;
-  if (getParser().parseIdentifier(Name))
+  MCSymbol *Sym;
+  if (getParser().parseSymbol(Sym))
     return TokError("expected identifier in directive");
 
-  // Look up symbol.
-  MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
-
   if (Sym->isDefined())
     return TokError(".alt_entry must preceed symbol definition");
 
@@ -521,13 +518,10 @@ bool DarwinAsmParser::parseDirectiveAltEntry(StringRef, SMLoc) {
 /// parseDirectiveDesc
 ///  ::= .desc identifier , expression
 bool DarwinAsmParser::parseDirectiveDesc(StringRef, SMLoc) {
-  StringRef Name;
-  if (getParser().parseIdentifier(Name))
+  MCSymbol *Sym;
+  if (getParser().parseSymbol(Sym))
     return TokError("expected identifier in directive");
 
-  // Handle the identifier as the key symbol.
-  MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
-
   if (getLexer().isNot(AsmToken::Comma))
     return TokError("unexpected token in '.desc' directive");
   Lex();
@@ -560,18 +554,17 @@ bool DarwinAsmParser::parseDirectiveIndirectSymbol(StringRef, SMLoc Loc) {
     return Error(Loc, "indirect symbol not in a symbol pointer or stub "
                       "section");
 
-  StringRef Name;
-  if (getParser().parseIdentifier(Name))
+  MCSymbol *Sym;
+  if (getParser().parseSymbol(Sym))
     return TokError("expected identifier in .indirect_symbol directive");
 
-  MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
-
   // Assembler local symbols don't make any sense here. Complain loudly.
   if (Sym->isTemporary())
     return TokError("non-local symbol required in directive");
 
   if (!getStreamer().emitSymbolAttribute(Sym, MCSA_IndirectSymbol))
-    return TokError("unable to emit indirect symbol attribute for: " + Name);
+    return TokError("unable to emit indirect symbol attribute for: " +
+                    Sym->getName());
 
   if (getLexer().isNot(AsmToken::EndOfStatement))
     return TokError("unexpected token in '.indirect_symbol' directive");
@@ -633,13 +626,10 @@ bool DarwinAsmParser::parseDirectiveLinkerOption(StringRef IDVal, SMLoc) {
 /// parseDirectiveLsym
 ///  ::= .lsym identifier , expression
 bool DarwinAsmParser::parseDirectiveLsym(StringRef, SMLoc) {
-  StringRef Name;
-  if (getParser().parseIdentifier(Name))
+  MCSymbol *Sym;
+  if (getParser().parseSymbol(Sym))
     return TokError("expected identifier in directive");
 
-  // Handle the identifier as the key symbol.
-  MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
-
   if (getLexer().isNot(AsmToken::Comma))
     return TokError("unexpected token in '.lsym' directive");
   Lex();
@@ -826,13 +816,10 @@ bool DarwinAsmParser::parseDirectiveSubsectionsViaSymbols(StringRef, SMLoc) {
 ///  ::= .tbss identifier, size, align
 bool DarwinAsmParser::parseDirectiveTBSS(StringRef, SMLoc) {
   SMLoc IDLoc = getLexer().getLoc();
-  StringRef Name;
-  if (getParser().parseIdentifier(Name))
+  MCSymbol *Sym;
+  if (getParser().parseSymbol(Sym))
     return TokError("expected identifier in directive");
 
-  // Handle the identifier as the key symbol.
-  MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
-
   if (getLexer().isNot(AsmToken::Comma))
     return TokError("unexpected token in directive");
   Lex();
@@ -911,13 +898,10 @@ bool DarwinAsmParser::parseDirectiveZerofill(StringRef, SMLoc) {
   Lex();
 
   SMLoc IDLoc = getLexer().getLoc();
-  StringRef IDStr;
-  if (getParser().parseIdentifier(IDStr))
+  MCSymbol *Sym;
+  if (getParser().parseSymbol(Sym))
     return TokError("expected identifier in directive");
 
-  // handle the identifier as the key symbol.
-  MCSymbol *Sym = getContext().getOrCreateSymbol(IDStr);
-
   if (getLexer().isNot(AsmToken::Comma))
     return TokError("unexpected token in directive");
   Lex();
diff --git a/llvm/lib/MC/MCParser/ELFAsmParser.cpp b/llvm/lib/MC/MCParser/ELFAsmParser.cpp
index 513f3b3..19da9f5 100644
--- a/llvm/lib/MC/MCParser/ELFAsmParser.cpp
+++ b/llvm/lib/MC/MCParser/ELFAsmParser.cpp
@@ -196,10 +196,9 @@ bool ELFAsmParser::parseSectionSwitch(StringRef Section, unsigned Type,
 }
 
 bool ELFAsmParser::parseDirectiveSize(StringRef, SMLoc) {
-  StringRef Name;
-  if (getParser().parseIdentifier(Name))
+  MCSymbol *Sym;
+  if (getParser().parseSymbol(Sym))
     return TokError("expected identifier");
-  auto *Sym = static_cast<MCSymbolELF *>(getContext().getOrCreateSymbol(Name));
 
   if (getLexer().isNot(AsmToken::Comma))
     return TokError("expected comma");
@@ -712,13 +711,10 @@ static MCSymbolAttr MCAttrForString(StringRef Type) {
 ///  ::= .type identifier , %attribute
 ///  ::= .type identifier , "attribute"
 bool ELFAsmParser::parseDirectiveType(StringRef, SMLoc) {
-  StringRef Name;
-  if (getParser().parseIdentifier(Name))
+  MCSymbol *Sym;
+  if (getParser().parseSymbol(Sym))
     return TokError("expected identifier");
 
-  // Handle the identifier as the key symbol.
-  MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
-
   bool AllowAt = getLexer().getAllowAtInIdentifier();
   if (!AllowAt &&
       !getContext().getAsmInfo()->getCommentString().starts_with("@"))
@@ -790,8 +786,9 @@ bool ELFAsmParser::parseDirectiveIdent(StringRef, SMLoc) {
 /// parseDirectiveSymver
 ///  ::= .symver foo, bar2@zed
 bool ELFAsmParser::parseDirectiveSymver(StringRef, SMLoc) {
-  StringRef OriginalName, Name, Action;
-  if (getParser().parseIdentifier(OriginalName))
+  MCSymbol *OriginalSym;
+  StringRef Name, Action;
+  if (getParser().parseSymbol(OriginalSym))
     return TokError("expected identifier");
 
   if (getLexer().isNot(AsmToken::Comma))
@@ -819,8 +816,7 @@ bool ELFAsmParser::parseDirectiveSymver(StringRef, SMLoc) {
   }
   (void)parseOptionalToken(AsmToken::EndOfStatement);
 
-  getStreamer().emitELFSymverDirective(
-      getContext().getOrCreateSymbol(OriginalName), Name, KeepOriginalSym);
+  getStreamer().emitELFSymverDirective(OriginalSym, Name, KeepOriginalSym);
   return false;
 }
 
@@ -853,8 +849,8 @@ bool ELFAsmParser::parseDirectiveVersion(StringRef, SMLoc) {
 bool ELFAsmParser::parseDirectiveWeakref(StringRef, SMLoc) {
   // FIXME: Share code with the other alias building directives.
 
-  StringRef AliasName;
-  if (getParser().parseIdentifier(AliasName))
+  MCSymbol *Alias;
+  if (getParser().parseSymbol(Alias))
     return TokError("expected identifier");
 
   if (getLexer().isNot(AsmToken::Comma))
@@ -862,14 +858,10 @@ bool ELFAsmParser::parseDirectiveWeakref(StringRef, SMLoc) {
 
   Lex();
 
-  StringRef Name;
-  if (getParser().parseIdentifier(Name))
+  MCSymbol *Sym;
+  if (getParser().parseSymbol(Sym))
     return TokError("expected identifier");
 
-  MCSymbol *Alias = getContext().getOrCreateSymbol(AliasName);
-
-  MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
-
   getStreamer().emitWeakReference(Alias, Sym);
   return false;
 }
diff --git a/llvm/lib/MC/MCParser/MCAsmParser.cpp b/llvm/lib/MC/MCParser/MCAsmParser.cpp
index 68b9cab..3721541 100644
--- a/llvm/lib/MC/MCParser/MCAsmParser.cpp
+++ b/llvm/lib/MC/MCParser/MCAsmParser.cpp
@@ -163,6 +163,15 @@ bool MCAsmParser::parseGNUAttribute(SMLoc L, int64_t &Tag,
   return true;
 }
 
+bool MCAsmParser::parseSymbol(MCSymbol *&Res) {
+  StringRef Name;
+  if (parseIdentifier(Name))
+    return true;
+
+  Res = getContext().getOrCreateSymbol(Name);
+  return false;
+}
+
 void MCParsedAsmOperand::dump() const {
   // Cannot completely remove virtual function even in release mode.
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
diff --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp
index 2dcfe0f..b38c2f7 100644
--- a/llvm/lib/MC/MCParser/MasmParser.cpp
+++ b/llvm/lib/MC/MCParser/MasmParser.cpp
@@ -4503,9 +4503,9 @@ bool MasmParser::parseDirectivePurgeMacro(SMLoc DirectiveLoc) {
 bool MasmParser::parseDirectiveExtern() {
   // .extern is the default - but we still need to take any provided type info.
   auto parseOp = [&]() -> bool {
-    StringRef Name;
+    MCSymbol *Sym;
     SMLoc NameLoc = getTok().getLoc();
-    if (parseIdentifier(Name))
+    if (parseSymbol(Sym))
       return Error(NameLoc, "expected name");
     if (parseToken(AsmToken::Colon))
       return true;
@@ -4518,12 +4518,10 @@ bool MasmParser::parseDirectiveExtern() {
       AsmTypeInfo Type;
       if (lookUpType(TypeName, Type))
         return Error(TypeLoc, "unrecognized type");
-      KnownType[Name.lower()] = Type;
+      KnownType[Sym->getName().lower()] = Type;
     }
 
-    auto *Sym =
-        static_cast<MCSymbolCOFF *>(getContext().getOrCreateSymbol(Name));
-    Sym->setExternal(true);
+    static_cast<MCSymbolCOFF *>(Sym)->setExternal(true);
     getStreamer().emitSymbolAttribute(Sym, MCSA_Extern);
 
     return false;
@@ -4538,11 +4536,10 @@ bool MasmParser::parseDirectiveExtern() {
 ///  ::= { ".globl", ".weak", ... } [ identifier ( , identifier )* ]
 bool MasmParser::parseDirectiveSymbolAttribute(MCSymbolAttr Attr) {
   auto parseOp = [&]() -> bool {
-    StringRef Name;
     SMLoc Loc = getTok().getLoc();
-    if (parseIdentifier(Name))
+    MCSymbol *Sym;
+    if (parseSymbol(Sym))
       return Error(Loc, "expected identifier");
-    MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
 
     // Assembler local symbols don't make any sense here. Complain loudly.
     if (Sym->isTemporary())
@@ -4565,13 +4562,10 @@ bool MasmParser::parseDirectiveComm(bool IsLocal) {
     return true;
 
   SMLoc IDLoc = getLexer().getLoc();
-  StringRef Name;
-  if (parseIdentifier(Name))
+  MCSymbol *Sym;
+  if (parseSymbol(Sym))
     return TokError("expected identifier in directive");
 
-  // Handle the identifier as the key symbol.
-  MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
-
   if (getLexer().isNot(AsmToken::Comma))
     return TokError("unexpected token in directive");
   Lex();
diff --git a/llvm/lib/MC/MCParser/WasmAsmParser.cpp b/llvm/lib/MC/MCParser/WasmAsmParser.cpp
index ddfe1e1..1befcac 100644
--- a/llvm/lib/MC/MCParser/WasmAsmParser.cpp
+++ b/llvm/lib/MC/MCParser/WasmAsmParser.cpp
@@ -212,10 +212,9 @@ public:
   // TODO: This function is almost the same as ELFAsmParser::ParseDirectiveSize
   // so maybe could be shared somehow.
   bool parseDirectiveSize(StringRef, SMLoc Loc) {
-    StringRef Name;
-    if (Parser->parseIdentifier(Name))
+    MCSymbol *Sym;
+    if (Parser->parseSymbol(Sym))
       return TokError("expected identifier in directive");
-    auto Sym = getContext().getOrCreateSymbol(Name);
     if (expect(AsmToken::Comma, ","))
       return true;
     const MCExpr *Expr;
@@ -293,10 +292,9 @@ public:
     assert(Attr != MCSA_Invalid && "unexpected symbol attribute directive!");
     if (getLexer().isNot(AsmToken::EndOfStatement)) {
       while (true) {
-        StringRef Name;
-        if (getParser().parseIdentifier(Name))
+        MCSymbol *Sym;
+        if (getParser().parseSymbol(Sym))
           return TokError("expected identifier in directive");
-        MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
         getStreamer().emitSymbolAttribute(Sym, Attr);
         if (getLexer().is(AsmToken::EndOfStatement))
           break;
diff --git a/llvm/lib/ObjectYAML/DXContainerEmitter.cpp b/llvm/lib/ObjectYAML/DXContainerEmitter.cpp
index 73dfa98..9103838 100644
--- a/llvm/lib/ObjectYAML/DXContainerEmitter.cpp
+++ b/llvm/lib/ObjectYAML/DXContainerEmitter.cpp
@@ -274,13 +274,8 @@ Error DXContainerWriter::writeParts(raw_ostream &OS) {
       for (DXContainerYAML::RootParameterLocationYaml &L :
            P.RootSignature->Parameters.Locations) {
 
-        assert(dxbc::isValidParameterType(L.Header.Type) &&
-               "invalid DXContainer YAML");
-        assert(dxbc::isValidShaderVisibility(L.Header.Visibility) &&
-               "invalid DXContainer YAML");
-        dxbc::RootParameterType Type = dxbc::RootParameterType(L.Header.Type);
-        dxbc::ShaderVisibility Visibility =
-            dxbc::ShaderVisibility(L.Header.Visibility);
+        const dxbc::RootParameterType Type = L.Header.Type;
+        const dxbc::ShaderVisibility Visibility = L.Header.Visibility;
 
         switch (Type) {
         case dxbc::RootParameterType::Constants32Bit: {
@@ -313,10 +308,8 @@ Error DXContainerWriter::writeParts(raw_ostream &OS) {
               P.RootSignature->Parameters.getOrInsertTable(L);
           mcdxbc::DescriptorTable Table;
           for (const auto &R : TableYaml.Ranges) {
-            assert(dxbc::isValidRangeType(R.RangeType) &&
-                   "Invalid Descriptor Range Type");
             mcdxbc::DescriptorRange Range;
-            Range.RangeType = dxil::ResourceClass(R.RangeType);
+            Range.RangeType = R.RangeType;
             Range.NumDescriptors = R.NumDescriptors;
             Range.BaseShaderRegister = R.BaseShaderRegister;
             Range.RegisterSpace = R.RegisterSpace;
@@ -335,30 +328,20 @@ Error DXContainerWriter::writeParts(raw_ostream &OS) {
       }
 
       for (const auto &Param : P.RootSignature->samplers()) {
-        assert(dxbc::isValidSamplerFilter(Param.Filter) &&
-               dxbc::isValidAddress(Param.AddressU) &&
-               dxbc::isValidAddress(Param.AddressV) &&
-               dxbc::isValidAddress(Param.AddressW) &&
-               dxbc::isValidComparisonFunc(Param.ComparisonFunc) &&
-               dxbc::isValidBorderColor(Param.BorderColor) &&
-               dxbc::isValidShaderVisibility(Param.ShaderVisibility) &&
-               "Invalid enum value in static sampler");
-
         mcdxbc::StaticSampler NewSampler;
-        NewSampler.Filter = dxbc::SamplerFilter(Param.Filter);
-        NewSampler.AddressU = dxbc::TextureAddressMode(Param.AddressU);
-        NewSampler.AddressV = dxbc::TextureAddressMode(Param.AddressV);
-        NewSampler.AddressW = dxbc::TextureAddressMode(Param.AddressW);
+        NewSampler.Filter = Param.Filter;
+        NewSampler.AddressU = Param.AddressU;
+        NewSampler.AddressV = Param.AddressV;
+        NewSampler.AddressW = Param.AddressW;
         NewSampler.MipLODBias = Param.MipLODBias;
         NewSampler.MaxAnisotropy = Param.MaxAnisotropy;
-        NewSampler.ComparisonFunc = dxbc::ComparisonFunc(Param.ComparisonFunc);
-        NewSampler.BorderColor = dxbc::StaticBorderColor(Param.BorderColor);
+        NewSampler.ComparisonFunc = Param.ComparisonFunc;
+        NewSampler.BorderColor = Param.BorderColor;
         NewSampler.MinLOD = Param.MinLOD;
         NewSampler.MaxLOD = Param.MaxLOD;
         NewSampler.ShaderRegister = Param.ShaderRegister;
         NewSampler.RegisterSpace = Param.RegisterSpace;
-        NewSampler.ShaderVisibility =
-            dxbc::ShaderVisibility(Param.ShaderVisibility);
+        NewSampler.ShaderVisibility = Param.ShaderVisibility;
 
         RS.StaticSamplers.push_back(NewSampler);
       }
diff --git a/llvm/lib/ObjectYAML/DXContainerYAML.cpp b/llvm/lib/ObjectYAML/DXContainerYAML.cpp
index 32b502e..22674b1 100644
--- a/llvm/lib/ObjectYAML/DXContainerYAML.cpp
+++ b/llvm/lib/ObjectYAML/DXContainerYAML.cpp
@@ -60,7 +60,10 @@ readDescriptorRanges(DXContainerYAML::RootParameterHeaderYaml &Header,
     NewR.NumDescriptors = R.NumDescriptors;
     NewR.BaseShaderRegister = R.BaseShaderRegister;
     NewR.RegisterSpace = R.RegisterSpace;
-    NewR.RangeType = R.RangeType;
+    if (!dxbc::isValidRangeType(R.RangeType))
+      return createStringError(std::errc::invalid_argument,
+                               "Invalid value for descriptor range type");
+    NewR.RangeType = dxil::ResourceClass(R.RangeType);
     if constexpr (std::is_same_v<T, dxbc::RTS0::v2::DescriptorRange>) {
       // Set all flag fields for v2
 #define DESCRIPTOR_RANGE_FLAG(Num, Enum, Flag)                                 \
@@ -94,15 +97,14 @@ DXContainerYAML::RootSignatureYamlDesc::create(
       return createStringError(std::errc::invalid_argument,
                                "Invalid value for parameter type");
 
-    RootParameterHeaderYaml Header(PH.ParameterType);
+    RootParameterHeaderYaml Header(dxbc::RootParameterType(PH.ParameterType));
     Header.Offset = PH.ParameterOffset;
-    Header.Type = PH.ParameterType;
 
     if (!dxbc::isValidShaderVisibility(PH.ShaderVisibility))
       return createStringError(std::errc::invalid_argument,
                                "Invalid value for shader visibility");
 
-    Header.Visibility = PH.ShaderVisibility;
+    Header.Visibility = dxbc::ShaderVisibility(PH.ShaderVisibility);
 
     llvm::Expected<object::DirectX::RootParameterView> ParamViewOrErr =
         Data.getParameter(PH);
@@ -162,20 +164,50 @@ DXContainerYAML::RootSignatureYamlDesc::create(
   }
 
   for (const auto &S : Data.samplers()) {
+    if (!dxbc::isValidSamplerFilter(S.Filter))
+      return createStringError(std::errc::invalid_argument,
+                               "Invalid value for static sampler filter");
+
+    if (!dxbc::isValidAddress(S.AddressU))
+      return createStringError(std::errc::invalid_argument,
+                               "Invalid value for static sampler AddressU");
+
+    if (!dxbc::isValidAddress(S.AddressV))
+      return createStringError(std::errc::invalid_argument,
+                               "Invalid value for static sampler AddressV");
+
+    if (!dxbc::isValidAddress(S.AddressW))
+      return createStringError(std::errc::invalid_argument,
+                               "Invalid value for static sampler AddressW");
+
+    if (!dxbc::isValidComparisonFunc(S.ComparisonFunc))
+      return createStringError(
+          std::errc::invalid_argument,
+          "Invalid value for static sampler ComparisonFunc");
+
+    if (!dxbc::isValidBorderColor(S.BorderColor))
+      return createStringError(std::errc::invalid_argument,
+                               "Invalid value for static sampler BorderColor");
+
+    if (!dxbc::isValidShaderVisibility(S.ShaderVisibility))
+      return createStringError(
+          std::errc::invalid_argument,
+          "Invalid value for static sampler ShaderVisibility");
+
     StaticSamplerYamlDesc NewS;
-    NewS.Filter = S.Filter;
-    NewS.AddressU = S.AddressU;
-    NewS.AddressV = S.AddressV;
-    NewS.AddressW = S.AddressW;
+    NewS.Filter = dxbc::SamplerFilter(S.Filter);
+    NewS.AddressU = dxbc::TextureAddressMode(S.AddressU);
+    NewS.AddressV = dxbc::TextureAddressMode(S.AddressV);
+    NewS.AddressW = dxbc::TextureAddressMode(S.AddressW);
     NewS.MipLODBias = S.MipLODBias;
     NewS.MaxAnisotropy = S.MaxAnisotropy;
-    NewS.ComparisonFunc = S.ComparisonFunc;
-    NewS.BorderColor = S.BorderColor;
+    NewS.ComparisonFunc = dxbc::ComparisonFunc(S.ComparisonFunc);
+    NewS.BorderColor = dxbc::StaticBorderColor(S.BorderColor);
     NewS.MinLOD = S.MinLOD;
     NewS.MaxLOD = S.MaxLOD;
     NewS.ShaderRegister = S.ShaderRegister;
     NewS.RegisterSpace = S.RegisterSpace;
-    NewS.ShaderVisibility = S.ShaderVisibility;
+    NewS.ShaderVisibility = dxbc::ShaderVisibility(S.ShaderVisibility);
 
     RootSigDesc.StaticSamplers.push_back(NewS);
   }
@@ -425,21 +457,21 @@ void MappingContextTraits<DXContainerYAML::RootParameterLocationYaml,
   IO.mapRequired("ShaderVisibility", L.Header.Visibility);
 
   switch (L.Header.Type) {
-  case llvm::to_underlying(dxbc::RootParameterType::Constants32Bit): {
+  case dxbc::RootParameterType::Constants32Bit: {
     DXContainerYAML::RootConstantsYaml &Constants =
         S.Parameters.getOrInsertConstants(L);
     IO.mapRequired("Constants", Constants);
     break;
   }
-  case llvm::to_underlying(dxbc::RootParameterType::CBV):
-  case llvm::to_underlying(dxbc::RootParameterType::SRV):
-  case llvm::to_underlying(dxbc::RootParameterType::UAV): {
+  case dxbc::RootParameterType::CBV:
+  case dxbc::RootParameterType::SRV:
+  case dxbc::RootParameterType::UAV: {
     DXContainerYAML::RootDescriptorYaml &Descriptor =
         S.Parameters.getOrInsertDescriptor(L);
     IO.mapRequired("Descriptor", Descriptor);
     break;
   }
-  case llvm::to_underlying(dxbc::RootParameterType::DescriptorTable): {
+  case dxbc::RootParameterType::DescriptorTable: {
     DXContainerYAML::DescriptorTableYaml &Table =
         S.Parameters.getOrInsertTable(L);
     IO.mapRequired("Table", Table);
@@ -585,6 +617,55 @@ void ScalarEnumerationTraits<dxbc::SigComponentType>::enumeration(
     IO.enumCase(Value, E.Name.str().c_str(), E.Value);
 }
 
+void ScalarEnumerationTraits<dxbc::RootParameterType>::enumeration(
+    IO &IO, dxbc::RootParameterType &Value) {
+  for (const auto &E : dxbc::getRootParameterTypes())
+    IO.enumCase(Value, E.Name.str().c_str(), E.Value);
+}
+
+void ScalarEnumerationTraits<dxil::ResourceClass>::enumeration(
+    IO &IO, dxil::ResourceClass &Value) {
+  const EnumEntry<dxil::ResourceClass> ResourceClasses[] = {
+      {"CBuffer", dxil::ResourceClass::CBuffer},
+      {"SRV", dxil::ResourceClass::SRV},
+      {"UAV", dxil::ResourceClass::UAV},
+      {"Sampler", dxil::ResourceClass::Sampler},
+  };
+
+  for (const auto &E : ResourceClasses)
+    IO.enumCase(Value, E.Name.str().c_str(), E.Value);
+}
+
+void ScalarEnumerationTraits<dxbc::SamplerFilter>::enumeration(
+    IO &IO, dxbc::SamplerFilter &Value) {
+  for (const auto &E : dxbc::getSamplerFilters())
+    IO.enumCase(Value, E.Name.str().c_str(), E.Value);
+}
+
+void ScalarEnumerationTraits<dxbc::StaticBorderColor>::enumeration(
+    IO &IO, dxbc::StaticBorderColor &Value) {
+  for (const auto &E : dxbc::getStaticBorderColors())
+    IO.enumCase(Value, E.Name.str().c_str(), E.Value);
+}
+
+void ScalarEnumerationTraits<dxbc::TextureAddressMode>::enumeration(
+    IO &IO, dxbc::TextureAddressMode &Value) {
+  for (const auto &E : dxbc::getTextureAddressModes())
+    IO.enumCase(Value, E.Name.str().c_str(), E.Value);
+}
+
+void ScalarEnumerationTraits<dxbc::ShaderVisibility>::enumeration(
+    IO &IO, dxbc::ShaderVisibility &Value) {
+  for (const auto &E : dxbc::getShaderVisibility())
+    IO.enumCase(Value, E.Name.str().c_str(), E.Value);
+}
+
+void ScalarEnumerationTraits<dxbc::ComparisonFunc>::enumeration(
+    IO &IO, dxbc::ComparisonFunc &Value) {
+  for (const auto &E : dxbc::getComparisonFuncs())
+    IO.enumCase(Value, E.Name.str().c_str(), E.Value);
+}
+
 } // namespace yaml
 
 void DXContainerYAML::PSVInfo::mapInfoForVersion(yaml::IO &IO) {
diff --git a/llvm/lib/ObjectYAML/GOFFEmitter.cpp b/llvm/lib/ObjectYAML/GOFFEmitter.cpp
index 7e94ac6..c26893c 100644
--- a/llvm/lib/ObjectYAML/GOFFEmitter.cpp
+++ b/llvm/lib/ObjectYAML/GOFFEmitter.cpp
@@ -38,8 +38,8 @@ template <typename ValueType> struct BinaryBeImpl {
 template <typename ValueType>
 raw_ostream &operator<<(raw_ostream &OS, const BinaryBeImpl<ValueType> &BBE) {
   char Buffer[sizeof(BBE.Value)];
-  support::endian::write<ValueType, llvm::endianness::big, support::unaligned>(
-      Buffer, BBE.Value);
+  support::endian::write<ValueType, support::unaligned>(Buffer, BBE.Value,
+                                                        llvm::endianness::big);
   OS.write(Buffer, sizeof(BBE.Value));
   return OS;
 }
diff --git a/llvm/lib/ProfileData/SampleProf.cpp b/llvm/lib/ProfileData/SampleProf.cpp
index 60c1393..ac7513e 100644
--- a/llvm/lib/ProfileData/SampleProf.cpp
+++ b/llvm/lib/ProfileData/SampleProf.cpp
@@ -47,6 +47,24 @@ bool FunctionSamples::ProfileIsPreInlined = false;
 bool FunctionSamples::UseMD5 = false;
 bool FunctionSamples::HasUniqSuffix = true;
 bool FunctionSamples::ProfileIsFS = false;
+
+std::error_code
+serializeTypeMap(const TypeCountMap &Map,
+                 const MapVector<FunctionId, uint32_t> &NameTable,
+                 raw_ostream &OS) {
+  encodeULEB128(Map.size(), OS);
+  for (const auto &[TypeName, SampleCount] : Map) {
+    if (auto NameIndexIter = NameTable.find(TypeName);
+        NameIndexIter != NameTable.end()) {
+      encodeULEB128(NameIndexIter->second, OS);
+    } else {
+      // If the type is not in the name table, we cannot serialize it.
+      return sampleprof_error::truncated_name_table;
+    }
+    encodeULEB128(SampleCount, OS);
+  }
+  return sampleprof_error::success;
+}
 } // namespace sampleprof
 } // namespace llvm
 
@@ -178,6 +196,17 @@ raw_ostream &llvm::sampleprof::operator<<(raw_ostream &OS,
   return OS;
 }
 
+static void printTypeCountMap(raw_ostream &OS, LineLocation Loc,
+                              const TypeCountMap &TypeCountMap) {
+  if (TypeCountMap.empty()) {
+    return;
+  }
+  OS << Loc << ": vtables: ";
+  for (const auto &[Type, Count] : TypeCountMap)
+    OS << Type << ":" << Count << " ";
+  OS << "\n";
+}
+
 /// Print the samples collected for a function on stream \p OS.
 void FunctionSamples::print(raw_ostream &OS, unsigned Indent) const {
   if (getFunctionHash())
@@ -192,7 +221,13 @@ void FunctionSamples::print(raw_ostream &OS, unsigned Indent) const {
     SampleSorter<LineLocation, SampleRecord> SortedBodySamples(BodySamples);
     for (const auto &SI : SortedBodySamples.get()) {
       OS.indent(Indent + 2);
+      const auto &Loc = SI->first;
       OS << SI->first << ": " << SI->second;
+      if (const TypeCountMap *TypeCountMap =
+              this->findCallsiteTypeSamplesAt(Loc)) {
+        OS.indent(Indent + 2);
+        printTypeCountMap(OS, Loc, *TypeCountMap);
+      }
     }
     OS.indent(Indent);
     OS << "}\n";
@@ -214,6 +249,11 @@ void FunctionSamples::print(raw_ostream &OS, unsigned Indent) const {
         OS << Loc << ": inlined callee: " << FuncSample.getFunction() << ": ";
         FuncSample.print(OS, Indent + 4);
       }
+      auto TypeSamplesIter = VirtualCallsiteTypeCounts.find(Loc);
+      if (TypeSamplesIter != VirtualCallsiteTypeCounts.end()) {
+        OS.indent(Indent + 2);
+        printTypeCountMap(OS, Loc, TypeSamplesIter->second);
+      }
     }
     OS.indent(Indent);
     OS << "}\n";
diff --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp
index 12769a3..81ae792 100644
--- a/llvm/lib/ProfileData/SampleProfReader.cpp
+++ b/llvm/lib/ProfileData/SampleProfReader.cpp
@@ -197,8 +197,37 @@ enum class LineType {
   CallSiteProfile,
   BodyProfile,
   Metadata,
+  VirtualCallTypeProfile,
 };
 
+// Parse `Input` as a white-space separated list of `vtable:count` pairs. An
+// example input line is `_ZTVbar:1471 _ZTVfoo:630`.
+static bool parseTypeCountMap(StringRef Input,
+                              DenseMap<StringRef, uint64_t> &TypeCountMap) {
+  for (size_t Index = Input.find_first_not_of(' '); Index != StringRef::npos;) {
+    size_t ColonIndex = Input.find(':', Index);
+    if (ColonIndex == StringRef::npos)
+      return false; // No colon found, invalid format.
+    StringRef TypeName = Input.substr(Index, ColonIndex - Index);
+    // CountIndex is the start index of count.
+    size_t CountStartIndex = ColonIndex + 1;
+    // NextIndex is the start index after the 'target:count' pair.
+    size_t NextIndex = Input.find_first_of(' ', CountStartIndex);
+    uint64_t Count;
+    if (Input.substr(CountStartIndex, NextIndex - CountStartIndex)
+            .getAsInteger(10, Count))
+      return false; // Invalid count.
+    // Error on duplicated type names in one line of input.
+    auto [Iter, Inserted] = TypeCountMap.insert({TypeName, Count});
+    if (!Inserted)
+      return false;
+    Index = (NextIndex == StringRef::npos)
+                ? StringRef::npos
+                : Input.find_first_not_of(' ', NextIndex);
+  }
+  return true;
+}
+
 /// Parse \p Input as line sample.
 ///
 /// \param Input input line.
@@ -215,6 +244,7 @@ static bool ParseLine(const StringRef &Input, LineType &LineTy, uint32_t &Depth,
                       uint64_t &NumSamples, uint32_t &LineOffset,
                       uint32_t &Discriminator, StringRef &CalleeName,
                       DenseMap<StringRef, uint64_t> &TargetCountMap,
+                      DenseMap<StringRef, uint64_t> &TypeCountMap,
                       uint64_t &FunctionHash, uint32_t &Attributes,
                       bool &IsFlat) {
   for (Depth = 0; Input[Depth] == ' '; Depth++)
@@ -306,6 +336,10 @@ static bool ParseLine(const StringRef &Input, LineType &LineTy, uint32_t &Depth,
       // Change n3 to the next blank space after colon + integer pair.
       n3 = n4;
     }
+  } else if (Rest.starts_with(kVTableProfPrefix)) {
+    LineTy = LineType::VirtualCallTypeProfile;
+    return parseTypeCountMap(Rest.substr(strlen(kVTableProfPrefix)),
+                             TypeCountMap);
   } else {
     LineTy = LineType::CallSiteProfile;
     size_t n3 = Rest.find_last_of(':');
@@ -374,19 +408,27 @@ std::error_code SampleProfileReaderText::readImpl() {
       uint64_t NumSamples;
       StringRef FName;
       DenseMap<StringRef, uint64_t> TargetCountMap;
+      DenseMap<StringRef, uint64_t> TypeCountMap;
       uint32_t Depth, LineOffset, Discriminator;
       LineType LineTy = LineType::BodyProfile;
       uint64_t FunctionHash = 0;
       uint32_t Attributes = 0;
       bool IsFlat = false;
+      // TODO: Update ParseLine to return an error code instead of a bool and
+      // report it.
       if (!ParseLine(*LineIt, LineTy, Depth, NumSamples, LineOffset,
-                     Discriminator, FName, TargetCountMap, FunctionHash,
-                     Attributes, IsFlat)) {
+                     Discriminator, FName, TargetCountMap, TypeCountMap,
+                     FunctionHash, Attributes, IsFlat)) {
         switch (LineTy) {
         case LineType::Metadata:
           reportError(LineIt.line_number(),
                       "Cannot parse metadata: " + *LineIt);
           break;
+        case LineType::VirtualCallTypeProfile:
+          reportError(LineIt.line_number(),
+                      "Expected 'vtables [mangled_vtable:NUM]+', found " +
+                          *LineIt);
+          break;
         default:
           reportError(LineIt.line_number(),
                       "Expected 'NUM[.NUM]: NUM[ mangled_name:NUM]*', found " +
@@ -417,6 +459,14 @@ std::error_code SampleProfileReaderText::readImpl() {
         DepthMetadata = 0;
         break;
       }
+
+      case LineType::VirtualCallTypeProfile: {
+        mergeSampleProfErrors(
+            Result, InlineStack.back()->addCallsiteVTableTypeProfAt(
+                        LineLocation(LineOffset, Discriminator), TypeCountMap));
+        break;
+      }
+
       case LineType::BodyProfile: {
         FunctionSamples &FProfile = *InlineStack.back();
         for (const auto &name_count : TargetCountMap) {
@@ -599,6 +649,67 @@ SampleProfileReaderBinary::readSampleContextFromTable() {
 }
 
 std::error_code
+SampleProfileReaderBinary::readVTableTypeCountMap(TypeCountMap &M) {
+  auto NumVTableTypes = readNumber<uint32_t>();
+  if (std::error_code EC = NumVTableTypes.getError())
+    return EC;
+
+  for (uint32_t I = 0; I < *NumVTableTypes; ++I) {
+    auto VTableType(readStringFromTable());
+    if (std::error_code EC = VTableType.getError())
+      return EC;
+
+    auto VTableSamples = readNumber<uint64_t>();
+    if (std::error_code EC = VTableSamples.getError())
+      return EC;
+    // The source profile should not have duplicate vtable records at the same
+    // location. In case duplicate vtables are found, reader can emit a warning
+    // but continue processing the profile.
+    if (!M.insert(std::make_pair(*VTableType, *VTableSamples)).second) {
+      Ctx.diagnose(DiagnosticInfoSampleProfile(
+          Buffer->getBufferIdentifier(), 0,
+          "Duplicate vtable type " + VTableType->str() +
+              " at the same location. Additional counters will be ignored.",
+          DS_Warning));
+      continue;
+    }
+  }
+  return sampleprof_error::success;
+}
+
+std::error_code
+SampleProfileReaderBinary::readCallsiteVTableProf(FunctionSamples &FProfile) {
+  assert(ReadVTableProf &&
+         "Cannot read vtable profiles if ReadVTableProf is false");
+
+  // Read the vtable type profile for the callsite.
+  auto NumCallsites = readNumber<uint32_t>();
+  if (std::error_code EC = NumCallsites.getError())
+    return EC;
+
+  for (uint32_t I = 0; I < *NumCallsites; ++I) {
+    auto LineOffset = readNumber<uint64_t>();
+    if (std::error_code EC = LineOffset.getError())
+      return EC;
+
+    if (!isOffsetLegal(*LineOffset))
+      return sampleprof_error::illegal_line_offset;
+
+    auto Discriminator = readNumber<uint64_t>();
+    if (std::error_code EC = Discriminator.getError())
+      return EC;
+
+    // Here we handle FS discriminators:
+    const uint32_t DiscriminatorVal = (*Discriminator) & getDiscriminatorMask();
+
+    if (std::error_code EC = readVTableTypeCountMap(FProfile.getTypeSamplesAt(
+            LineLocation(*LineOffset, DiscriminatorVal))))
+      return EC;
+  }
+  return sampleprof_error::success;
+}
+
+std::error_code
 SampleProfileReaderBinary::readProfile(FunctionSamples &FProfile) {
   auto NumSamples = readNumber<uint64_t>();
   if (std::error_code EC = NumSamples.getError())
@@ -678,6 +789,9 @@ SampleProfileReaderBinary::readProfile(FunctionSamples &FProfile) {
       return EC;
   }
 
+  if (ReadVTableProf)
+    return readCallsiteVTableProf(FProfile);
+
   return sampleprof_error::success;
 }
 
@@ -740,6 +854,8 @@ std::error_code SampleProfileReaderExtBinaryBase::readOneSection(
       FunctionSamples::ProfileIsPreInlined = ProfileIsPreInlined = true;
     if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFSDiscriminator))
       FunctionSamples::ProfileIsFS = ProfileIsFS = true;
+    if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagHasVTableTypeProf))
+      ReadVTableProf = true;
     break;
   case SecNameTable: {
     bool FixedLengthMD5 =
diff --git a/llvm/lib/ProfileData/SampleProfWriter.cpp b/llvm/lib/ProfileData/SampleProfWriter.cpp
index 9173a0f..e5f3134 100644
--- a/llvm/lib/ProfileData/SampleProfWriter.cpp
+++ b/llvm/lib/ProfileData/SampleProfWriter.cpp
@@ -41,6 +41,11 @@
 using namespace llvm;
 using namespace sampleprof;
 
+// To begin with, make this option off by default.
+static cl::opt<bool> ExtBinaryWriteVTableTypeProf(
+    "extbinary-write-vtable-type-prof", cl::init(false), cl::Hidden,
+    cl::desc("Write vtable type profile in ext-binary sample profile writer"));
+
 namespace llvm {
 namespace support {
 namespace endian {
@@ -435,6 +440,9 @@ std::error_code SampleProfileWriterExtBinaryBase::writeOneSection(
     addSectionFlag(SecProfSummary, SecProfSummaryFlags::SecFlagIsPreInlined);
   if (Type == SecProfSummary && FunctionSamples::ProfileIsFS)
     addSectionFlag(SecProfSummary, SecProfSummaryFlags::SecFlagFSDiscriminator);
+  if (Type == SecProfSummary && ExtBinaryWriteVTableTypeProf)
+    addSectionFlag(SecProfSummary,
+                   SecProfSummaryFlags::SecFlagHasVTableTypeProf);
 
   uint64_t SectionStart = markSectionStart(Type, LayoutIdx);
   switch (Type) {
@@ -478,6 +486,12 @@ std::error_code SampleProfileWriterExtBinaryBase::writeOneSection(
   return sampleprof_error::success;
 }
 
+SampleProfileWriterExtBinary::SampleProfileWriterExtBinary(
+    std::unique_ptr<raw_ostream> &OS)
+    : SampleProfileWriterExtBinaryBase(OS) {
+  WriteVTableProf = ExtBinaryWriteVTableTypeProf;
+}
+
 std::error_code SampleProfileWriterExtBinary::writeDefaultLayout(
     const SampleProfileMap &ProfileMap) {
   // The const indices passed to writeOneSection below are specifying the
@@ -587,6 +601,19 @@ std::error_code SampleProfileWriterText::writeSample(const FunctionSamples &S) {
       OS << " " << J.first << ":" << J.second;
     OS << "\n";
     LineCount++;
+
+    if (const TypeCountMap *Map = S.findCallsiteTypeSamplesAt(Loc);
+        Map && !Map->empty()) {
+      OS.indent(Indent + 1);
+      Loc.print(OS);
+      OS << ": ";
+      OS << kVTableProfPrefix;
+      for (const auto [TypeName, Count] : *Map) {
+        OS << TypeName << ":" << Count << " ";
+      }
+      OS << "\n";
+      LineCount++;
+    }
   }
 
   SampleSorter<LineLocation, FunctionSamplesMap> SortedCallsiteSamples(
@@ -603,7 +630,21 @@ std::error_code SampleProfileWriterText::writeSample(const FunctionSamples &S) {
       if (std::error_code EC = writeSample(CalleeSamples))
         return EC;
     }
+
+    if (const TypeCountMap *Map = S.findCallsiteTypeSamplesAt(Loc);
+        Map && !Map->empty()) {
+      OS.indent(Indent);
+      Loc.print(OS);
+      OS << ": ";
+      OS << kVTableProfPrefix;
+      for (const auto [TypeId, Count] : *Map) {
+        OS << TypeId << ":" << Count << " ";
+      }
+      OS << "\n";
+      LineCount++;
+    }
   }
+
   Indent -= 1;
 
   if (FunctionSamples::ProfileIsProbeBased) {
@@ -663,6 +704,17 @@ void SampleProfileWriterBinary::addNames(const FunctionSamples &S) {
       addName(CalleeSamples.getFunction());
       addNames(CalleeSamples);
     }
+
+  if (!WriteVTableProf)
+    return;
+  // Add all the vtable names to NameTable.
+  for (const auto &VTableAccessCountMap :
+       llvm::make_second_range(S.getCallsiteTypeCounts())) {
+    // Add type name to NameTable.
+    for (const auto Type : llvm::make_first_range(VTableAccessCountMap)) {
+      addName(Type);
+    }
+  }
 }
 
 void SampleProfileWriterExtBinaryBase::addContext(
@@ -801,6 +853,22 @@ std::error_code SampleProfileWriterExtBinaryBase::writeHeader(
   return sampleprof_error::success;
 }
 
+std::error_code SampleProfileWriterBinary::writeCallsiteVTableProf(
+    const CallsiteTypeMap &CallsiteTypeMap, raw_ostream &OS) {
+  assert(WriteVTableProf &&
+         "writeCallsiteVTableProf should not be called if WriteVTableProf is "
+         "false");
+
+  encodeULEB128(CallsiteTypeMap.size(), OS);
+  for (const auto &[Loc, TypeMap] : CallsiteTypeMap) {
+    Loc.serialize(OS);
+    if (std::error_code EC = serializeTypeMap(TypeMap, getNameTable(), OS))
+      return EC;
+  }
+
+  return sampleprof_error::success;
+}
+
 std::error_code SampleProfileWriterBinary::writeSummary() {
   auto &OS = *OutputStream;
   encodeULEB128(Summary->getTotalCount(), OS);
@@ -838,14 +906,16 @@ std::error_code SampleProfileWriterBinary::writeBody(const FunctionSamples &S) {
   for (const auto &J : S.getCallsiteSamples())
     NumCallsites += J.second.size();
   encodeULEB128(NumCallsites, OS);
-  for (const auto &[Loc, CalleeFunctionSampleMap] : S.getCallsiteSamples())
-    for (const auto &FunctionSample :
-         llvm::make_second_range(CalleeFunctionSampleMap)) {
-      Loc.serialize(OS);
-      if (std::error_code EC = writeBody(FunctionSample))
+  for (const auto &J : S.getCallsiteSamples())
+    for (const auto &FS : J.second) {
+      J.first.serialize(OS);
+      if (std::error_code EC = writeBody(FS.second))
         return EC;
     }
 
+  if (WriteVTableProf)
+    return writeCallsiteVTableProf(S.getCallsiteTypeCounts(), OS);
+
   return sampleprof_error::success;
 }
 
diff --git a/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp b/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp
index 484bc2a..cb831963 100644
--- a/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp
@@ -630,7 +630,7 @@ void SSACCmpConv::convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks) {
     const MCInstrDesc &MCID = TII->get(Opc);
     // Create a dummy virtual register for the SUBS def.
     Register DestReg =
-        MRI->createVirtualRegister(TII->getRegClass(MCID, 0, TRI, *MF));
+        MRI->createVirtualRegister(TII->getRegClass(MCID, 0, TRI));
     // Insert a SUBS Rn, #0 instruction instead of the cbz / cbnz.
     BuildMI(*Head, Head->end(), TermDL, MCID)
         .addReg(DestReg, RegState::Define | RegState::Dead)
@@ -639,7 +639,7 @@ void SSACCmpConv::convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks) {
         .addImm(0);
     // SUBS uses the GPR*sp register classes.
     MRI->constrainRegClass(HeadCond[2].getReg(),
-                           TII->getRegClass(MCID, 1, TRI, *MF));
+                           TII->getRegClass(MCID, 1, TRI));
   }
 
   Head->splice(Head->end(), CmpBB, CmpBB->begin(), CmpBB->end());
@@ -686,10 +686,10 @@ void SSACCmpConv::convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks) {
   unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(CmpBBTailCC);
   const MCInstrDesc &MCID = TII->get(Opc);
   MRI->constrainRegClass(CmpMI->getOperand(FirstOp).getReg(),
-                         TII->getRegClass(MCID, 0, TRI, *MF));
+                         TII->getRegClass(MCID, 0, TRI));
   if (CmpMI->getOperand(FirstOp + 1).isReg())
     MRI->constrainRegClass(CmpMI->getOperand(FirstOp + 1).getReg(),
-                           TII->getRegClass(MCID, 1, TRI, *MF));
+                           TII->getRegClass(MCID, 1, TRI));
   MachineInstrBuilder MIB = BuildMI(*Head, CmpMI, CmpMI->getDebugLoc(), MCID)
                                 .add(CmpMI->getOperand(FirstOp)); // Register Rn
   if (isZBranch)
diff --git a/llvm/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp b/llvm/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp
index 987dfbc..75361f5 100644
--- a/llvm/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp
+++ b/llvm/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp
@@ -115,7 +115,6 @@ static bool atomicReadDroppedOnZero(unsigned Opcode) {
 
 void AArch64DeadRegisterDefinitions::processMachineBasicBlock(
     MachineBasicBlock &MBB) {
-  const MachineFunction &MF = *MBB.getParent();
   for (MachineInstr &MI : MBB) {
     if (usesFrameIndex(MI)) {
       // We need to skip this instruction because while it appears to have a
@@ -157,7 +156,7 @@ void AArch64DeadRegisterDefinitions::processMachineBasicBlock(
         LLVM_DEBUG(dbgs() << "    Ignoring, def is tied operand.\n");
         continue;
       }
-      const TargetRegisterClass *RC = TII->getRegClass(Desc, I, TRI, MF);
+      const TargetRegisterClass *RC = TII->getRegClass(Desc, I, TRI);
       unsigned NewReg;
       if (RC == nullptr) {
         LLVM_DEBUG(dbgs() << "    Ignoring, register is not a GPR.\n");
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 6fdc981..235dbc4 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -71,6 +71,9 @@ public:
   template <signed Low, signed High, signed Scale>
   bool SelectRDVLImm(SDValue N, SDValue &Imm);
 
+  template <signed Low, signed High>
+  bool SelectRDSVLShiftImm(SDValue N, SDValue &Imm);
+
   bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);
   bool SelectArithUXTXRegister(SDValue N, SDValue &Reg, SDValue &Shift);
   bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
@@ -574,7 +577,7 @@ bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
     // We need to make sure that this one operand does not end up in XZR, thus
     // require the address to be in a PointerRegClass register.
     const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
-    const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF);
+    const TargetRegisterClass *TRC = TRI->getPointerRegClass();
     SDLoc dl(Op);
     SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64);
     SDValue NewOp =
@@ -938,6 +941,21 @@ bool AArch64DAGToDAGISel::SelectRDVLImm(SDValue N, SDValue &Imm) {
   return false;
 }
 
+// Returns a suitable RDSVL multiplier from a left shift.
+template <signed Low, signed High>
+bool AArch64DAGToDAGISel::SelectRDSVLShiftImm(SDValue N, SDValue &Imm) {
+  if (!isa<ConstantSDNode>(N))
+    return false;
+
+  int64_t MulImm = 1 << cast<ConstantSDNode>(N)->getSExtValue();
+  if (MulImm >= Low && MulImm <= High) {
+    Imm = CurDAG->getSignedTargetConstant(MulImm, SDLoc(N), MVT::i32);
+    return true;
+  }
+
+  return false;
+}
+
 /// SelectArithExtendedRegister - Select a "extended register" operand.  This
 /// operand folds in an extend followed by an optional left shift.
 bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index a194147..c9a756d 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -6392,25 +6392,11 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
   case Intrinsic::aarch64_sve_clz:
     return DAG.getNode(AArch64ISD::CTLZ_MERGE_PASSTHRU, DL, Op.getValueType(),
                        Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
-  case Intrinsic::aarch64_sme_cntsb:
-    return DAG.getNode(AArch64ISD::RDSVL, DL, Op.getValueType(),
-                       DAG.getConstant(1, DL, MVT::i32));
-  case Intrinsic::aarch64_sme_cntsh: {
-    SDValue One = DAG.getConstant(1, DL, MVT::i32);
-    SDValue Bytes = DAG.getNode(AArch64ISD::RDSVL, DL, Op.getValueType(), One);
-    return DAG.getNode(ISD::SRL, DL, Op.getValueType(), Bytes, One);
-  }
-  case Intrinsic::aarch64_sme_cntsw: {
-    SDValue Bytes = DAG.getNode(AArch64ISD::RDSVL, DL, Op.getValueType(),
-                                DAG.getConstant(1, DL, MVT::i32));
-    return DAG.getNode(ISD::SRL, DL, Op.getValueType(), Bytes,
-                       DAG.getConstant(2, DL, MVT::i32));
-  }
   case Intrinsic::aarch64_sme_cntsd: {
     SDValue Bytes = DAG.getNode(AArch64ISD::RDSVL, DL, Op.getValueType(),
                                 DAG.getConstant(1, DL, MVT::i32));
     return DAG.getNode(ISD::SRL, DL, Op.getValueType(), Bytes,
-                       DAG.getConstant(3, DL, MVT::i32));
+                       DAG.getConstant(3, DL, MVT::i32), SDNodeFlags::Exact);
   }
   case Intrinsic::aarch64_sve_cnt: {
     SDValue Data = Op.getOperand(3);
@@ -9312,6 +9298,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
 
   std::optional<unsigned> ZAMarkerNode;
   bool UseNewSMEABILowering = getTM().useNewSMEABILowering();
+
   if (UseNewSMEABILowering) {
     if (CallAttrs.requiresLazySave() ||
         CallAttrs.requiresPreservingAllZAState())
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index bf3d47a..9a7512b 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -10951,9 +10951,8 @@ static Register cloneInstr(const MachineInstr *MI, unsigned ReplaceOprNum,
           MRI.getRegClass(NewMI->getOperand(0).getReg()));
       NewMI->getOperand(I).setReg(Result);
     } else if (I == ReplaceOprNum) {
-      MRI.constrainRegClass(
-          ReplaceReg,
-          TII->getRegClass(NewMI->getDesc(), I, TRI, *MBB.getParent()));
+      MRI.constrainRegClass(ReplaceReg,
+                            TII->getRegClass(NewMI->getDesc(), I, TRI));
       NewMI->getOperand(I).setReg(ReplaceReg);
     }
   }
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index f0020a9..3fcafc6 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -5409,6 +5409,11 @@ defm UCVTF : IntegerToFP<0b00, 0b011, "ucvtf", any_uint_to_fp>;
 let Predicates = [HasNEON, HasFPRCVT] in {
   defm SCVTF : IntegerToFPSIMDScalar<0b11, 0b100, "scvtf", any_sint_to_fp>;
   defm UCVTF : IntegerToFPSIMDScalar<0b11, 0b101, "ucvtf", any_uint_to_fp>;
+
+  def : Pat<(v1f64 (extract_subvector (v2f64 (sint_to_fp (v2i64 (sext (v2i32 V64:$Rn))))), (i64 0))),
+          (SCVTFDSr (EXTRACT_SUBREG V64:$Rn, ssub))>;
+  def : Pat<(v1f64 (extract_subvector (v2f64 (uint_to_fp (v2i64 (zext (v2i32 V64:$Rn))))), (i64 0))),
+          (UCVTFDSr (EXTRACT_SUBREG V64:$Rn, ssub))>;
 }
 
 def : Pat<(f16 (fdiv (f16 (any_sint_to_fp (i32 GPR32:$Rn))), fixedpoint_f16_i32:$scale)),
diff --git a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
index fd4ef2a..04e76c7 100644
--- a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
+++ b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
@@ -594,19 +594,18 @@ bool AArch64MIPeepholeOpt::splitTwoPartImm(
   // NewDstReg = Opcode.second NewTmpReg Imm1
 
   // Determine register classes for destinations and register operands
-  MachineFunction *MF = MI.getMF();
   const TargetRegisterClass *FirstInstrDstRC =
-      TII->getRegClass(TII->get(Opcode.first), 0, TRI, *MF);
+      TII->getRegClass(TII->get(Opcode.first), 0, TRI);
   const TargetRegisterClass *FirstInstrOperandRC =
-      TII->getRegClass(TII->get(Opcode.first), 1, TRI, *MF);
+      TII->getRegClass(TII->get(Opcode.first), 1, TRI);
   const TargetRegisterClass *SecondInstrDstRC =
       (Opcode.first == Opcode.second)
           ? FirstInstrDstRC
-          : TII->getRegClass(TII->get(Opcode.second), 0, TRI, *MF);
+          : TII->getRegClass(TII->get(Opcode.second), 0, TRI);
   const TargetRegisterClass *SecondInstrOperandRC =
       (Opcode.first == Opcode.second)
           ? FirstInstrOperandRC
-          : TII->getRegClass(TII->get(Opcode.second), 1, TRI, *MF);
+          : TII->getRegClass(TII->get(Opcode.second), 1, TRI);
 
   // Get old registers destinations and new register destinations
   Register DstReg = MI.getOperand(0).getReg();
@@ -785,14 +784,14 @@ bool AArch64MIPeepholeOpt::visitUBFMXri(MachineInstr &MI) {
   }
 
   const TargetRegisterClass *DstRC64 =
-      TII->getRegClass(TII->get(MI.getOpcode()), 0, TRI, *MI.getMF());
+      TII->getRegClass(TII->get(MI.getOpcode()), 0, TRI);
   const TargetRegisterClass *DstRC32 =
       TRI->getSubRegisterClass(DstRC64, AArch64::sub_32);
   assert(DstRC32 && "Destination register class of UBFMXri doesn't have a "
                     "sub_32 subregister class");
 
   const TargetRegisterClass *SrcRC64 =
-      TII->getRegClass(TII->get(MI.getOpcode()), 1, TRI, *MI.getMF());
+      TII->getRegClass(TII->get(MI.getOpcode()), 1, TRI);
   const TargetRegisterClass *SrcRC32 =
       TRI->getSubRegisterClass(SrcRC64, AArch64::sub_32);
   assert(SrcRC32 && "Source register class of UBFMXri doesn't have a sub_32 "
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
index 77dfab8..2b0c8ad 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -610,8 +610,7 @@ bool AArch64RegisterInfo::isAsmClobberable(const MachineFunction &MF,
 }
 
 const TargetRegisterClass *
-AArch64RegisterInfo::getPointerRegClass(const MachineFunction &MF,
-                                      unsigned Kind) const {
+AArch64RegisterInfo::getPointerRegClass(unsigned Kind) const {
   return &AArch64::GPR64spRegClass;
 }
 
@@ -893,7 +892,7 @@ AArch64RegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
   const MCInstrDesc &MCID = TII->get(AArch64::ADDXri);
   MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
   Register BaseReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
-  MRI.constrainRegClass(BaseReg, TII->getRegClass(MCID, 0, this, MF));
+  MRI.constrainRegClass(BaseReg, TII->getRegClass(MCID, 0, this));
   unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
 
   BuildMI(*MBB, Ins, DL, MCID, BaseReg)
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.h b/llvm/lib/Target/AArch64/AArch64RegisterInfo.h
index 1ed8e95..72a7676 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.h
@@ -102,8 +102,7 @@ public:
   bool isAsmClobberable(const MachineFunction &MF,
                        MCRegister PhysReg) const override;
   const TargetRegisterClass *
-  getPointerRegClass(const MachineFunction &MF,
-                     unsigned Kind = 0) const override;
+  getPointerRegClass(unsigned Kind = 0) const override;
   const TargetRegisterClass *
   getCrossCopyRegClass(const TargetRegisterClass *RC) const override;
 
diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
index 601dc34..2d2d81a 100644
--- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
@@ -134,11 +134,16 @@ def : Pat<(AArch64_sme_state_alloc), (SMEStateAllocPseudo)>;
 def SDT_AArch64RDSVL  : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisInt<1>]>;
 def AArch64rdsvl : SDNode<"AArch64ISD::RDSVL", SDT_AArch64RDSVL>;
 
+def sme_rdsvl_shl_imm : ComplexPattern<i64, 1, "SelectRDSVLShiftImm<1, 31>">;
+
 let Predicates = [HasSMEandIsNonStreamingSafe] in {
 def RDSVLI_XI  : sve_int_read_vl_a<0b0, 0b11111, "rdsvl", /*streaming_sve=*/0b1>;
 def ADDSPL_XXI : sve_int_arith_vl<0b1, "addspl", /*streaming_sve=*/0b1>;
 def ADDSVL_XXI : sve_int_arith_vl<0b0, "addsvl", /*streaming_sve=*/0b1>;
 
+def : Pat<(i64 (shl (AArch64rdsvl (i32 1)), (sme_rdsvl_shl_imm i64:$imm))),
+          (RDSVLI_XI (!cast<SDNodeXForm>("trunc_imm") $imm))>;
+
 def : Pat<(AArch64rdsvl (i32 simm6_32b:$imm)), (RDSVLI_XI simm6_32b:$imm)>;
 }
 
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 92321a7..8c4b4f6 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -2103,15 +2103,15 @@ instCombineSVECntElts(InstCombiner &IC, IntrinsicInst &II, unsigned NumElts) {
 }
 
 static std::optional<Instruction *>
-instCombineSMECntsElts(InstCombiner &IC, IntrinsicInst &II, unsigned NumElts,
-                       const AArch64Subtarget *ST) {
+instCombineSMECntsd(InstCombiner &IC, IntrinsicInst &II,
+                    const AArch64Subtarget *ST) {
   if (!ST->isStreaming())
     return std::nullopt;
 
-  // In streaming-mode, aarch64_sme_cnts is equivalent to aarch64_sve_cnt
+  // In streaming-mode, aarch64_sme_cntds is equivalent to aarch64_sve_cntd
   // with SVEPredPattern::all
-  Value *Cnt = IC.Builder.CreateElementCount(
-      II.getType(), ElementCount::getScalable(NumElts));
+  Value *Cnt =
+      IC.Builder.CreateElementCount(II.getType(), ElementCount::getScalable(2));
   Cnt->takeName(&II);
   return IC.replaceInstUsesWith(II, Cnt);
 }
@@ -2747,6 +2747,15 @@ static std::optional<Instruction *> instCombineDMB(InstCombiner &IC,
   return std::nullopt;
 }
 
+static std::optional<Instruction *> instCombineWhilelo(InstCombiner &IC,
+                                                       IntrinsicInst &II) {
+  return IC.replaceInstUsesWith(
+      II,
+      IC.Builder.CreateIntrinsic(Intrinsic::get_active_lane_mask,
+                                 {II.getType(), II.getOperand(0)->getType()},
+                                 {II.getOperand(0), II.getOperand(1)}));
+}
+
 static std::optional<Instruction *> instCombinePTrue(InstCombiner &IC,
                                                      IntrinsicInst &II) {
   if (match(II.getOperand(0), m_ConstantInt<AArch64SVEPredPattern::all>()))
@@ -2826,13 +2835,7 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
   case Intrinsic::aarch64_sve_cntb:
     return instCombineSVECntElts(IC, II, 16);
   case Intrinsic::aarch64_sme_cntsd:
-    return instCombineSMECntsElts(IC, II, 2, ST);
-  case Intrinsic::aarch64_sme_cntsw:
-    return instCombineSMECntsElts(IC, II, 4, ST);
-  case Intrinsic::aarch64_sme_cntsh:
-    return instCombineSMECntsElts(IC, II, 8, ST);
-  case Intrinsic::aarch64_sme_cntsb:
-    return instCombineSMECntsElts(IC, II, 16, ST);
+    return instCombineSMECntsd(IC, II, ST);
   case Intrinsic::aarch64_sve_ptest_any:
   case Intrinsic::aarch64_sve_ptest_first:
   case Intrinsic::aarch64_sve_ptest_last:
@@ -2889,6 +2892,8 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
     return instCombineSVEDupqLane(IC, II);
   case Intrinsic::aarch64_sve_insr:
     return instCombineSVEInsr(IC, II);
+  case Intrinsic::aarch64_sve_whilelo:
+    return instCombineWhilelo(IC, II);
   case Intrinsic::aarch64_sve_ptrue:
     return instCombinePTrue(IC, II);
   case Intrinsic::aarch64_sve_uxtb:
diff --git a/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp b/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp
index c39a5cc..cced0fa 100644
--- a/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp
+++ b/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp
@@ -110,6 +110,71 @@ struct PhysRegSave {
   Register X0Save = AArch64::NoRegister;
 };
 
+/// Contains the needed ZA state (and live registers) at an instruction. That is
+/// the state ZA must be in _before_ "InsertPt".
+struct InstInfo {
+  ZAState NeededState{ZAState::ANY};
+  MachineBasicBlock::iterator InsertPt;
+  LiveRegs PhysLiveRegs = LiveRegs::None;
+};
+
+/// Contains the needed ZA state for each instruction in a block. Instructions
+/// that do not require a ZA state are not recorded.
+struct BlockInfo {
+  ZAState FixedEntryState{ZAState::ANY};
+  SmallVector<InstInfo> Insts;
+  LiveRegs PhysLiveRegsAtEntry = LiveRegs::None;
+  LiveRegs PhysLiveRegsAtExit = LiveRegs::None;
+};
+
+/// Contains the needed ZA state information for all blocks within a function.
+struct FunctionInfo {
+  SmallVector<BlockInfo> Blocks;
+  std::optional<MachineBasicBlock::iterator> AfterSMEProloguePt;
+  LiveRegs PhysLiveRegsAfterSMEPrologue = LiveRegs::None;
+};
+
+/// State/helpers that is only needed when emitting code to handle
+/// saving/restoring ZA.
+class EmitContext {
+public:
+  EmitContext() = default;
+
+  /// Get or create a TPIDR2 block in \p MF.
+  int getTPIDR2Block(MachineFunction &MF) {
+    if (TPIDR2BlockFI)
+      return *TPIDR2BlockFI;
+    MachineFrameInfo &MFI = MF.getFrameInfo();
+    TPIDR2BlockFI = MFI.CreateStackObject(16, Align(16), false);
+    return *TPIDR2BlockFI;
+  }
+
+  /// Get or create agnostic ZA buffer pointer in \p MF.
+  Register getAgnosticZABufferPtr(MachineFunction &MF) {
+    if (AgnosticZABufferPtr != AArch64::NoRegister)
+      return AgnosticZABufferPtr;
+    Register BufferPtr =
+        MF.getInfo<AArch64FunctionInfo>()->getEarlyAllocSMESaveBuffer();
+    AgnosticZABufferPtr =
+        BufferPtr != AArch64::NoRegister
+            ? BufferPtr
+            : MF.getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass);
+    return AgnosticZABufferPtr;
+  }
+
+  /// Returns true if the function must allocate a ZA save buffer on entry. This
+  /// will be the case if, at any point in the function, a ZA save was emitted.
+  bool needsSaveBuffer() const {
+    assert(!(TPIDR2BlockFI && AgnosticZABufferPtr) &&
+           "Cannot have both a TPIDR2 block and agnostic ZA buffer");
+    return TPIDR2BlockFI || AgnosticZABufferPtr != AArch64::NoRegister;
+  }
+
+private:
+  std::optional<int> TPIDR2BlockFI;
+  Register AgnosticZABufferPtr = AArch64::NoRegister;
+};
+
 static bool isLegalEdgeBundleZAState(ZAState State) {
   switch (State) {
   case ZAState::ACTIVE:
@@ -119,9 +184,6 @@ static bool isLegalEdgeBundleZAState(ZAState State) {
     return false;
   }
 }
-struct TPIDR2State {
-  int FrameIndex = -1;
-};
 
 StringRef getZAStateString(ZAState State) {
 #define MAKE_CASE(V)                                                           \
@@ -192,25 +254,28 @@ struct MachineSMEABI : public MachineFunctionPass {
 
   /// Collects the needed ZA state (and live registers) before each instruction
   /// within the machine function.
-  void collectNeededZAStates(SMEAttrs);
+  FunctionInfo collectNeededZAStates(SMEAttrs SMEFnAttrs);
 
   /// Assigns each edge bundle a ZA state based on the needed states of blocks
   /// that have incoming or outgoing edges in that bundle.
-  void assignBundleZAStates();
+  SmallVector<ZAState> assignBundleZAStates(const EdgeBundles &Bundles,
+                                            const FunctionInfo &FnInfo);
 
   /// Inserts code to handle changes between ZA states within the function.
   /// E.g., ACTIVE -> LOCAL_SAVED will insert code required to save ZA.
-  void insertStateChanges();
+  void insertStateChanges(EmitContext &, const FunctionInfo &FnInfo,
+                          const EdgeBundles &Bundles,
+                          ArrayRef<ZAState> BundleStates);
 
   // Emission routines for private and shared ZA functions (using lazy saves).
   void emitNewZAPrologue(MachineBasicBlock &MBB,
                          MachineBasicBlock::iterator MBBI);
-  void emitRestoreLazySave(MachineBasicBlock &MBB,
+  void emitRestoreLazySave(EmitContext &, MachineBasicBlock &MBB,
                            MachineBasicBlock::iterator MBBI,
                            LiveRegs PhysLiveRegs);
-  void emitSetupLazySave(MachineBasicBlock &MBB,
+  void emitSetupLazySave(EmitContext &, MachineBasicBlock &MBB,
                          MachineBasicBlock::iterator MBBI);
-  void emitAllocateLazySaveBuffer(MachineBasicBlock &MBB,
+  void emitAllocateLazySaveBuffer(EmitContext &, MachineBasicBlock &MBB,
                                   MachineBasicBlock::iterator MBBI);
   void emitZAOff(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
                  bool ClearTPIDR2);
@@ -222,78 +287,49 @@ struct MachineSMEABI : public MachineFunctionPass {
   // Emit a "full" ZA save or restore. It is "full" in the sense that this
   // function will emit a call to __arm_sme_save or __arm_sme_restore, which
   // handles saving and restoring both ZA and ZT0.
-  void emitFullZASaveRestore(MachineBasicBlock &MBB,
+  void emitFullZASaveRestore(EmitContext &, MachineBasicBlock &MBB,
                              MachineBasicBlock::iterator MBBI,
                              LiveRegs PhysLiveRegs, bool IsSave);
-  void emitAllocateFullZASaveBuffer(MachineBasicBlock &MBB,
+  void emitAllocateFullZASaveBuffer(EmitContext &, MachineBasicBlock &MBB,
                                     MachineBasicBlock::iterator MBBI,
                                     LiveRegs PhysLiveRegs);
 
-  void emitStateChange(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
-                       ZAState From, ZAState To, LiveRegs PhysLiveRegs);
+  void emitStateChange(EmitContext &, MachineBasicBlock &MBB,
+                       MachineBasicBlock::iterator MBBI, ZAState From,
+                       ZAState To, LiveRegs PhysLiveRegs);
 
   // Helpers for switching between lazy/full ZA save/restore routines.
-  void emitZASave(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
-                  LiveRegs PhysLiveRegs) {
+  void emitZASave(EmitContext &Context, MachineBasicBlock &MBB,
+                  MachineBasicBlock::iterator MBBI, LiveRegs PhysLiveRegs) {
     if (AFI->getSMEFnAttrs().hasAgnosticZAInterface())
-      return emitFullZASaveRestore(MBB, MBBI, PhysLiveRegs, /*IsSave=*/true);
-    return emitSetupLazySave(MBB, MBBI);
+      return emitFullZASaveRestore(Context, MBB, MBBI, PhysLiveRegs,
+                                   /*IsSave=*/true);
+    return emitSetupLazySave(Context, MBB, MBBI);
   }
-  void emitZARestore(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
-                     LiveRegs PhysLiveRegs) {
+  void emitZARestore(EmitContext &Context, MachineBasicBlock &MBB,
+                     MachineBasicBlock::iterator MBBI, LiveRegs PhysLiveRegs) {
     if (AFI->getSMEFnAttrs().hasAgnosticZAInterface())
-      return emitFullZASaveRestore(MBB, MBBI, PhysLiveRegs, /*IsSave=*/false);
-    return emitRestoreLazySave(MBB, MBBI, PhysLiveRegs);
+      return emitFullZASaveRestore(Context, MBB, MBBI, PhysLiveRegs,
+                                   /*IsSave=*/false);
+    return emitRestoreLazySave(Context, MBB, MBBI, PhysLiveRegs);
   }
-  void emitAllocateZASaveBuffer(MachineBasicBlock &MBB,
+  void emitAllocateZASaveBuffer(EmitContext &Context, MachineBasicBlock &MBB,
                                 MachineBasicBlock::iterator MBBI,
                                 LiveRegs PhysLiveRegs) {
     if (AFI->getSMEFnAttrs().hasAgnosticZAInterface())
-      return emitAllocateFullZASaveBuffer(MBB, MBBI, PhysLiveRegs);
-    return emitAllocateLazySaveBuffer(MBB, MBBI);
+      return emitAllocateFullZASaveBuffer(Context, MBB, MBBI, PhysLiveRegs);
+    return emitAllocateLazySaveBuffer(Context, MBB, MBBI);
   }
 
   /// Save live physical registers to virtual registers.
   PhysRegSave createPhysRegSave(LiveRegs PhysLiveRegs, MachineBasicBlock &MBB,
                                 MachineBasicBlock::iterator MBBI, DebugLoc DL);
   /// Restore physical registers from a save of their previous values.
-  void restorePhyRegSave(PhysRegSave const &RegSave, MachineBasicBlock &MBB,
+  void restorePhyRegSave(const PhysRegSave &RegSave, MachineBasicBlock &MBB,
                          MachineBasicBlock::iterator MBBI, DebugLoc DL);
 
-  /// Get or create a TPIDR2 block in this function.
-  TPIDR2State getTPIDR2Block();
-
-  Register getAgnosticZABufferPtr();
-
 private:
-  /// Contains the needed ZA state (and live registers) at an instruction.
-  struct InstInfo {
-    ZAState NeededState{ZAState::ANY};
-    MachineBasicBlock::iterator InsertPt;
-    LiveRegs PhysLiveRegs = LiveRegs::None;
-  };
-
-  /// Contains the needed ZA state for each instruction in a block.
-  /// Instructions that do not require a ZA state are not recorded.
-  struct BlockInfo {
-    ZAState FixedEntryState{ZAState::ANY};
-    SmallVector<InstInfo> Insts;
-    LiveRegs PhysLiveRegsAtEntry = LiveRegs::None;
-    LiveRegs PhysLiveRegsAtExit = LiveRegs::None;
-  };
-
-  // All pass state that must be cleared between functions.
-  struct PassState {
-    SmallVector<BlockInfo> Blocks;
-    SmallVector<ZAState> BundleStates;
-    std::optional<TPIDR2State> TPIDR2Block;
-    std::optional<MachineBasicBlock::iterator> AfterSMEProloguePt;
-    Register AgnosticZABufferPtr = AArch64::NoRegister;
-    LiveRegs PhysLiveRegsAfterSMEPrologue = LiveRegs::None;
-  } State;
-
   MachineFunction *MF = nullptr;
-  EdgeBundles *Bundles = nullptr;
   const AArch64Subtarget *Subtarget = nullptr;
   const AArch64RegisterInfo *TRI = nullptr;
   const AArch64FunctionInfo *AFI = nullptr;
@@ -301,14 +337,18 @@ private:
   MachineRegisterInfo *MRI = nullptr;
 };
 
-void MachineSMEABI::collectNeededZAStates(SMEAttrs SMEFnAttrs) {
+FunctionInfo MachineSMEABI::collectNeededZAStates(SMEAttrs SMEFnAttrs) {
   assert((SMEFnAttrs.hasAgnosticZAInterface() || SMEFnAttrs.hasZT0State() ||
           SMEFnAttrs.hasZAState()) &&
          "Expected function to have ZA/ZT0 state!");
 
-  State.Blocks.resize(MF->getNumBlockIDs());
+  SmallVector<BlockInfo> Blocks(MF->getNumBlockIDs());
+  LiveRegs PhysLiveRegsAfterSMEPrologue = LiveRegs::None;
+  std::optional<MachineBasicBlock::iterator> AfterSMEProloguePt;
+
   for (MachineBasicBlock &MBB : *MF) {
-    BlockInfo &Block = State.Blocks[MBB.getNumber()];
+    BlockInfo &Block = Blocks[MBB.getNumber()];
+
     if (MBB.isEntryBlock()) {
       // Entry block:
       Block.FixedEntryState = SMEFnAttrs.hasPrivateZAInterface()
@@ -347,8 +387,8 @@ void MachineSMEABI::collectNeededZAStates(SMEAttrs SMEFnAttrs) {
       // allocation -- which is a safe point for this pass to insert any TPIDR2
       // block setup.
       if (MI.getOpcode() == AArch64::SMEStateAllocPseudo) {
-        State.AfterSMEProloguePt = MBBI;
-        State.PhysLiveRegsAfterSMEPrologue = PhysLiveRegs;
+        AfterSMEProloguePt = MBBI;
+        PhysLiveRegsAfterSMEPrologue = PhysLiveRegs;
       }
       // Note: We treat Agnostic ZA as inout_za with an alternate save/restore.
       auto [NeededState, InsertPt] = getZAStateBeforeInst(
@@ -368,11 +408,18 @@ void MachineSMEABI::collectNeededZAStates(SMEAttrs SMEFnAttrs) {
     // Reverse vector (as we had to iterate backwards for liveness).
     std::reverse(Block.Insts.begin(), Block.Insts.end());
   }
+
+  return FunctionInfo{std::move(Blocks), AfterSMEProloguePt,
+                      PhysLiveRegsAfterSMEPrologue};
 }
 
-void MachineSMEABI::assignBundleZAStates() {
-  State.BundleStates.resize(Bundles->getNumBundles());
-  for (unsigned I = 0, E = Bundles->getNumBundles(); I != E; ++I) {
+/// Assigns each edge bundle a ZA state based on the needed states of blocks
+/// that have incoming or outgoing edges in that bundle.
+SmallVector<ZAState>
+MachineSMEABI::assignBundleZAStates(const EdgeBundles &Bundles,
+                                    const FunctionInfo &FnInfo) {
+  SmallVector<ZAState> BundleStates(Bundles.getNumBundles());
+  for (unsigned I = 0, E = Bundles.getNumBundles(); I != E; ++I) {
     LLVM_DEBUG(dbgs() << "Assigning ZA state for edge bundle: " << I << '\n');
 
     // Attempt to assign a ZA state for this bundle that minimizes state
@@ -381,16 +428,16 @@ void MachineSMEABI::assignBundleZAStates() {
     // TODO: We should propagate desired incoming/outgoing states through blocks
     // that have the "ANY" state first to make better global decisions.
     int EdgeStateCounts[ZAState::NUM_ZA_STATE] = {0};
-    for (unsigned BlockID : Bundles->getBlocks(I)) {
+    for (unsigned BlockID : Bundles.getBlocks(I)) {
       LLVM_DEBUG(dbgs() << "- bb." << BlockID);
 
-      const BlockInfo &Block = State.Blocks[BlockID];
+      const BlockInfo &Block = FnInfo.Blocks[BlockID];
       if (Block.Insts.empty()) {
         LLVM_DEBUG(dbgs() << " (no state preference)\n");
         continue;
       }
-      bool InEdge = Bundles->getBundle(BlockID, /*Out=*/false) == I;
-      bool OutEdge = Bundles->getBundle(BlockID, /*Out=*/true) == I;
+      bool InEdge = Bundles.getBundle(BlockID, /*Out=*/false) == I;
+      bool OutEdge = Bundles.getBundle(BlockID, /*Out=*/true) == I;
 
       ZAState DesiredIncomingState = Block.Insts.front().NeededState;
       if (InEdge && isLegalEdgeBundleZAState(DesiredIncomingState)) {
@@ -423,15 +470,20 @@ void MachineSMEABI::assignBundleZAStates() {
       dbgs() << "\n\n";
     });
 
-    State.BundleStates[I] = BundleState;
+    BundleStates[I] = BundleState;
   }
+
+  return BundleStates;
 }
 
-void MachineSMEABI::insertStateChanges() {
+void MachineSMEABI::insertStateChanges(EmitContext &Context,
+                                       const FunctionInfo &FnInfo,
+                                       const EdgeBundles &Bundles,
+                                       ArrayRef<ZAState> BundleStates) {
   for (MachineBasicBlock &MBB : *MF) {
-    const BlockInfo &Block = State.Blocks[MBB.getNumber()];
-    ZAState InState = State.BundleStates[Bundles->getBundle(MBB.getNumber(),
-                                                            /*Out=*/false)];
+    const BlockInfo &Block = FnInfo.Blocks[MBB.getNumber()];
+    ZAState InState = BundleStates[Bundles.getBundle(MBB.getNumber(),
+                                                     /*Out=*/false)];
 
     ZAState CurrentState = Block.FixedEntryState;
     if (CurrentState == ZAState::ANY)
@@ -439,8 +491,8 @@ void MachineSMEABI::insertStateChanges() {
 
     for (auto &Inst : Block.Insts) {
       if (CurrentState != Inst.NeededState)
-        emitStateChange(MBB, Inst.InsertPt, CurrentState, Inst.NeededState,
-                        Inst.PhysLiveRegs);
+        emitStateChange(Context, MBB, Inst.InsertPt, CurrentState,
+                        Inst.NeededState, Inst.PhysLiveRegs);
       CurrentState = Inst.NeededState;
     }
 
@@ -448,21 +500,13 @@ void MachineSMEABI::insertStateChanges() {
       continue;
 
     ZAState OutState =
-        State.BundleStates[Bundles->getBundle(MBB.getNumber(), /*Out=*/true)];
+        BundleStates[Bundles.getBundle(MBB.getNumber(), /*Out=*/true)];
     if (CurrentState != OutState)
-      emitStateChange(MBB, MBB.getFirstTerminator(), CurrentState, OutState,
-                      Block.PhysLiveRegsAtExit);
+      emitStateChange(Context, MBB, MBB.getFirstTerminator(), CurrentState,
+                      OutState, Block.PhysLiveRegsAtExit);
   }
 }
 
-TPIDR2State MachineSMEABI::getTPIDR2Block() {
-  if (State.TPIDR2Block)
-    return *State.TPIDR2Block;
-  MachineFrameInfo &MFI = MF->getFrameInfo();
-  State.TPIDR2Block = TPIDR2State{MFI.CreateStackObject(16, Align(16), false)};
-  return *State.TPIDR2Block;
-}
-
 static DebugLoc getDebugLoc(MachineBasicBlock &MBB,
                             MachineBasicBlock::iterator MBBI) {
   if (MBBI != MBB.end())
@@ -470,7 +514,8 @@ static DebugLoc getDebugLoc(MachineBasicBlock &MBB,
   return DebugLoc();
 }
 
-void MachineSMEABI::emitSetupLazySave(MachineBasicBlock &MBB,
+void MachineSMEABI::emitSetupLazySave(EmitContext &Context,
+                                      MachineBasicBlock &MBB,
                                       MachineBasicBlock::iterator MBBI) {
   DebugLoc DL = getDebugLoc(MBB, MBBI);
 
@@ -478,7 +523,7 @@ void MachineSMEABI::emitSetupLazySave(MachineBasicBlock &MBB,
   Register TPIDR2 = MRI->createVirtualRegister(&AArch64::GPR64spRegClass);
   Register TPIDR2Ptr = MRI->createVirtualRegister(&AArch64::GPR64RegClass);
   BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXri), TPIDR2)
-      .addFrameIndex(getTPIDR2Block().FrameIndex)
+      .addFrameIndex(Context.getTPIDR2Block(*MF))
       .addImm(0)
       .addImm(0);
   BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY), TPIDR2Ptr)
@@ -512,7 +557,7 @@ PhysRegSave MachineSMEABI::createPhysRegSave(LiveRegs PhysLiveRegs,
   return RegSave;
 }
 
-void MachineSMEABI::restorePhyRegSave(PhysRegSave const &RegSave,
+void MachineSMEABI::restorePhyRegSave(const PhysRegSave &RegSave,
                                       MachineBasicBlock &MBB,
                                       MachineBasicBlock::iterator MBBI,
                                       DebugLoc DL) {
@@ -528,7 +573,8 @@ void MachineSMEABI::restorePhyRegSave(PhysRegSave const &RegSave,
         .addReg(RegSave.X0Save);
 }
 
-void MachineSMEABI::emitRestoreLazySave(MachineBasicBlock &MBB,
+void MachineSMEABI::emitRestoreLazySave(EmitContext &Context,
+                                        MachineBasicBlock &MBB,
                                         MachineBasicBlock::iterator MBBI,
                                         LiveRegs PhysLiveRegs) {
   auto *TLI = Subtarget->getTargetLowering();
@@ -548,7 +594,7 @@ void MachineSMEABI::emitRestoreLazySave(MachineBasicBlock &MBB,
       .addImm(AArch64SysReg::TPIDR2_EL0);
   // Get pointer to TPIDR2 block.
   BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXri), TPIDR2)
-      .addFrameIndex(getTPIDR2Block().FrameIndex)
+      .addFrameIndex(Context.getTPIDR2Block(*MF))
       .addImm(0)
       .addImm(0);
   // (Conditionally) restore ZA state.
@@ -582,7 +628,8 @@ void MachineSMEABI::emitZAOff(MachineBasicBlock &MBB,
 }
 
 void MachineSMEABI::emitAllocateLazySaveBuffer(
-    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) {
+    EmitContext &Context, MachineBasicBlock &MBB,
+    MachineBasicBlock::iterator MBBI) {
   MachineFrameInfo &MFI = MF->getFrameInfo();
   DebugLoc DL = getDebugLoc(MBB, MBBI);
   Register SP = MRI->createVirtualRegister(&AArch64::GPR64RegClass);
@@ -630,7 +677,7 @@ void MachineSMEABI::emitAllocateLazySaveBuffer(
     BuildMI(MBB, MBBI, DL, TII->get(AArch64::STPXi))
         .addReg(Buffer)
         .addReg(SVL)
-        .addFrameIndex(getTPIDR2Block().FrameIndex)
+        .addFrameIndex(Context.getTPIDR2Block(*MF))
         .addImm(0);
   }
 }
@@ -662,18 +709,8 @@ void MachineSMEABI::emitNewZAPrologue(MachineBasicBlock &MBB,
       .addImm(1);
 }
 
-Register MachineSMEABI::getAgnosticZABufferPtr() {
-  if (State.AgnosticZABufferPtr != AArch64::NoRegister)
-    return State.AgnosticZABufferPtr;
-  Register BufferPtr = AFI->getEarlyAllocSMESaveBuffer();
-  State.AgnosticZABufferPtr =
-      BufferPtr != AArch64::NoRegister
-          ? BufferPtr
-          : MF->getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass);
-  return State.AgnosticZABufferPtr;
-}
-
-void MachineSMEABI::emitFullZASaveRestore(MachineBasicBlock &MBB,
+void MachineSMEABI::emitFullZASaveRestore(EmitContext &Context,
+                                          MachineBasicBlock &MBB,
                                           MachineBasicBlock::iterator MBBI,
                                           LiveRegs PhysLiveRegs, bool IsSave) {
   auto *TLI = Subtarget->getTargetLowering();
@@ -684,7 +721,7 @@ void MachineSMEABI::emitFullZASaveRestore(MachineBasicBlock &MBB,
 
   // Copy the buffer pointer into X0.
   BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY), BufferPtr)
-      .addReg(getAgnosticZABufferPtr());
+      .addReg(Context.getAgnosticZABufferPtr(*MF));
 
   // Call __arm_sme_save/__arm_sme_restore.
   BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL))
@@ -699,14 +736,14 @@ void MachineSMEABI::emitFullZASaveRestore(MachineBasicBlock &MBB,
 }
 
 void MachineSMEABI::emitAllocateFullZASaveBuffer(
-    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
-    LiveRegs PhysLiveRegs) {
+    EmitContext &Context, MachineBasicBlock &MBB,
+    MachineBasicBlock::iterator MBBI, LiveRegs PhysLiveRegs) {
   // Buffer already allocated in SelectionDAG.
   if (AFI->getEarlyAllocSMESaveBuffer())
     return;
 
   DebugLoc DL = getDebugLoc(MBB, MBBI);
-  Register BufferPtr = getAgnosticZABufferPtr();
+  Register BufferPtr = Context.getAgnosticZABufferPtr(*MF);
   Register BufferSize = MRI->createVirtualRegister(&AArch64::GPR64RegClass);
 
   PhysRegSave RegSave = createPhysRegSave(PhysLiveRegs, MBB, MBBI, DL);
@@ -742,11 +779,11 @@ void MachineSMEABI::emitAllocateFullZASaveBuffer(
   restorePhyRegSave(RegSave, MBB, MBBI, DL);
 }
 
-void MachineSMEABI::emitStateChange(MachineBasicBlock &MBB,
+void MachineSMEABI::emitStateChange(EmitContext &Context,
+                                    MachineBasicBlock &MBB,
                                     MachineBasicBlock::iterator InsertPt,
                                     ZAState From, ZAState To,
                                     LiveRegs PhysLiveRegs) {
-
   // ZA not used.
   if (From == ZAState::ANY || To == ZAState::ANY)
     return;
@@ -774,9 +811,9 @@ void MachineSMEABI::emitStateChange(MachineBasicBlock &MBB,
   }
 
   if (From == ZAState::ACTIVE && To == ZAState::LOCAL_SAVED)
-    emitZASave(MBB, InsertPt, PhysLiveRegs);
+    emitZASave(Context, MBB, InsertPt, PhysLiveRegs);
   else if (From == ZAState::LOCAL_SAVED && To == ZAState::ACTIVE)
-    emitZARestore(MBB, InsertPt, PhysLiveRegs);
+    emitZARestore(Context, MBB, InsertPt, PhysLiveRegs);
   else if (To == ZAState::OFF) {
     assert(From != ZAState::CALLER_DORMANT &&
            "CALLER_DORMANT to OFF should have already been handled");
@@ -807,32 +844,33 @@ bool MachineSMEABI::runOnMachineFunction(MachineFunction &MF) {
 
   assert(MF.getRegInfo().isSSA() && "Expected to be run on SSA form!");
 
-  // Reset pass state.
-  State = PassState{};
   this->MF = &MF;
-  Bundles = &getAnalysis<EdgeBundlesWrapperLegacy>().getEdgeBundles();
   Subtarget = &MF.getSubtarget<AArch64Subtarget>();
   TII = Subtarget->getInstrInfo();
   TRI = Subtarget->getRegisterInfo();
   MRI = &MF.getRegInfo();
 
-  collectNeededZAStates(SMEFnAttrs);
-  assignBundleZAStates();
-  insertStateChanges();
+  const EdgeBundles &Bundles =
+      getAnalysis<EdgeBundlesWrapperLegacy>().getEdgeBundles();
+
+  FunctionInfo FnInfo = collectNeededZAStates(SMEFnAttrs);
+  SmallVector<ZAState> BundleStates = assignBundleZAStates(Bundles, FnInfo);
+
+  EmitContext Context;
+  insertStateChanges(Context, FnInfo, Bundles, BundleStates);
 
-  // Allocate save buffer (if needed).
-  if (State.AgnosticZABufferPtr != AArch64::NoRegister || State.TPIDR2Block) {
-    if (State.AfterSMEProloguePt) {
+  if (Context.needsSaveBuffer()) {
+    if (FnInfo.AfterSMEProloguePt) {
       // Note: With inline stack probes the AfterSMEProloguePt may not be in the
       // entry block (due to the probing loop).
-      emitAllocateZASaveBuffer(*(*State.AfterSMEProloguePt)->getParent(),
-                               *State.AfterSMEProloguePt,
-                               State.PhysLiveRegsAfterSMEPrologue);
+      MachineBasicBlock::iterator MBBI = *FnInfo.AfterSMEProloguePt;
+      emitAllocateZASaveBuffer(Context, *MBBI->getParent(), MBBI,
+                               FnInfo.PhysLiveRegsAfterSMEPrologue);
     } else {
       MachineBasicBlock &EntryBlock = MF.front();
       emitAllocateZASaveBuffer(
-          EntryBlock, EntryBlock.getFirstNonPHI(),
-          State.Blocks[EntryBlock.getNumber()].PhysLiveRegsAtEntry);
+          Context, EntryBlock, EntryBlock.getFirstNonPHI(),
+          FnInfo.Blocks[EntryBlock.getNumber()].PhysLiveRegsAtEntry);
     }
   }
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp
index d158f0f..dda8033 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp
@@ -107,6 +107,14 @@ AMDGPUFunctionArgInfo::getPreloadedValue(
   case AMDGPUFunctionArgInfo::WORKGROUP_ID_Z:
     return std::tuple(WorkGroupIDZ ? &WorkGroupIDZ : nullptr,
                       &AMDGPU::SGPR_32RegClass, LLT::scalar(32));
+  case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_X:
+  case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_Y:
+  case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_Z:
+  case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_X:
+  case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_Y:
+  case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_Z:
+  case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_FLAT_ID:
+    return std::tuple(nullptr, &AMDGPU::SGPR_32RegClass, LLT::scalar(32));
   case AMDGPUFunctionArgInfo::LDS_KERNEL_ID:
     return std::tuple(LDSKernelId ? &LDSKernelId : nullptr,
                       &AMDGPU::SGPR_32RegClass, LLT::scalar(32));
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h
index e07d473..1064e57 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h
@@ -111,18 +111,25 @@ struct AMDGPUFunctionArgInfo {
     DISPATCH_ID         =  4,
     FLAT_SCRATCH_INIT   =  5,
     LDS_KERNEL_ID       =  6, // LLVM internal, not part of the ABI
-    WORKGROUP_ID_X      = 10,
-    WORKGROUP_ID_Y      = 11,
-    WORKGROUP_ID_Z      = 12,
+    WORKGROUP_ID_X      = 10, // Also used for cluster ID X.
+    WORKGROUP_ID_Y      = 11, // Also used for cluster ID Y.
+    WORKGROUP_ID_Z      = 12, // Also used for cluster ID Z.
     PRIVATE_SEGMENT_WAVE_BYTE_OFFSET = 14,
     IMPLICIT_BUFFER_PTR = 15,
     IMPLICIT_ARG_PTR = 16,
     PRIVATE_SEGMENT_SIZE = 17,
+    CLUSTER_WORKGROUP_ID_X = 21,
+    CLUSTER_WORKGROUP_ID_Y = 22,
+    CLUSTER_WORKGROUP_ID_Z = 23,
+    CLUSTER_WORKGROUP_MAX_ID_X = 24,
+    CLUSTER_WORKGROUP_MAX_ID_Y = 25,
+    CLUSTER_WORKGROUP_MAX_ID_Z = 26,
+    CLUSTER_WORKGROUP_MAX_FLAT_ID = 27,
 
     // VGPRS:
-    WORKITEM_ID_X       = 18,
-    WORKITEM_ID_Y       = 19,
-    WORKITEM_ID_Z       = 20,
+    WORKITEM_ID_X       = 28,
+    WORKITEM_ID_Y       = 29,
+    WORKITEM_ID_Z       = 30,
     FIRST_VGPR_VALUE    = WORKITEM_ID_X
   };
   // clang-format on
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index f18536c..d8c4cbb 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -4452,6 +4452,74 @@ void AMDGPULegalizerInfo::buildLoadInputValue(Register DstReg,
   }
 }
 
+bool AMDGPULegalizerInfo::legalizeWorkGroupId(
+    MachineInstr &MI, MachineIRBuilder &B,
+    AMDGPUFunctionArgInfo::PreloadedValue WorkGroupIdPV,
+    AMDGPUFunctionArgInfo::PreloadedValue ClusterMaxIdPV,
+    AMDGPUFunctionArgInfo::PreloadedValue ClusterWorkGroupIdPV) const {
+  Register DstReg = MI.getOperand(0).getReg();
+  if (!ST.hasClusters()) {
+    if (!loadInputValue(DstReg, B, WorkGroupIdPV))
+      return false;
+    MI.eraseFromParent();
+    return true;
+  }
+
+  // Clusters are supported. Return the global position in the grid. If clusters
+  // are enabled, WorkGroupIdPV returns the cluster ID not the workgroup ID.
+
+  // WorkGroupIdXYZ = ClusterId == 0 ?
+  //   ClusterIdXYZ :
+  //   ClusterIdXYZ * (ClusterMaxIdXYZ + 1) + ClusterWorkGroupIdXYZ
+  MachineRegisterInfo &MRI = *B.getMRI();
+  const LLT S32 = LLT::scalar(32);
+  Register ClusterIdXYZ = MRI.createGenericVirtualRegister(S32);
+  Register ClusterMaxIdXYZ = MRI.createGenericVirtualRegister(S32);
+  Register ClusterWorkGroupIdXYZ = MRI.createGenericVirtualRegister(S32);
+  if (!loadInputValue(ClusterIdXYZ, B, WorkGroupIdPV) ||
+      !loadInputValue(ClusterWorkGroupIdXYZ, B, ClusterWorkGroupIdPV) ||
+      !loadInputValue(ClusterMaxIdXYZ, B, ClusterMaxIdPV))
+    return false;
+
+  auto One = B.buildConstant(S32, 1);
+  auto ClusterSizeXYZ = B.buildAdd(S32, ClusterMaxIdXYZ, One);
+  auto GlobalIdXYZ = B.buildAdd(S32, ClusterWorkGroupIdXYZ,
+                                B.buildMul(S32, ClusterIdXYZ, ClusterSizeXYZ));
+
+  const SIMachineFunctionInfo *MFI = B.getMF().getInfo<SIMachineFunctionInfo>();
+
+  switch (MFI->getClusterDims().getKind()) {
+  case AMDGPU::ClusterDimsAttr::Kind::FixedDims:
+  case AMDGPU::ClusterDimsAttr::Kind::VariableDims: {
+    B.buildCopy(DstReg, GlobalIdXYZ);
+    MI.eraseFromParent();
+    return true;
+  }
+  case AMDGPU::ClusterDimsAttr::Kind::NoCluster: {
+    B.buildCopy(DstReg, ClusterIdXYZ);
+    MI.eraseFromParent();
+    return true;
+  }
+  case AMDGPU::ClusterDimsAttr::Kind::Unknown: {
+    using namespace AMDGPU::Hwreg;
+    unsigned ClusterIdField = HwregEncoding::encode(ID_IB_STS2, 6, 4);
+    Register ClusterId = MRI.createGenericVirtualRegister(S32);
+    MRI.setRegClass(ClusterId, &AMDGPU::SReg_32RegClass);
+    B.buildInstr(AMDGPU::S_GETREG_B32_const)
+        .addDef(ClusterId)
+        .addImm(ClusterIdField);
+    auto Zero = B.buildConstant(S32, 0);
+    auto NoClusters =
+        B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1), ClusterId, Zero);
+    B.buildSelect(DstReg, NoClusters, ClusterIdXYZ, GlobalIdXYZ);
+    MI.eraseFromParent();
+    return true;
+  }
+  }
+
+  llvm_unreachable("nothing should reach here");
+}
+
 bool AMDGPULegalizerInfo::loadInputValue(
     Register DstReg, MachineIRBuilder &B,
     AMDGPUFunctionArgInfo::PreloadedValue ArgType) const {
@@ -4471,8 +4539,31 @@ bool AMDGPULegalizerInfo::loadInputValue(
       AMDGPU::isEntryFunctionCC(CC) && !MFI->hasWorkGroupIDZ() ? ~0u : 0xFFFFu);
   const ArgDescriptor WorkGroupIDZ =
       ArgDescriptor::createRegister(AMDGPU::TTMP7, 0xFFFF0000u);
+  const ArgDescriptor ClusterWorkGroupIDX =
+      ArgDescriptor::createRegister(AMDGPU::TTMP6, 0x0000000Fu);
+  const ArgDescriptor ClusterWorkGroupIDY =
+      ArgDescriptor::createRegister(AMDGPU::TTMP6, 0x000000F0u);
+  const ArgDescriptor ClusterWorkGroupIDZ =
+      ArgDescriptor::createRegister(AMDGPU::TTMP6, 0x00000F00u);
+  const ArgDescriptor ClusterWorkGroupMaxIDX =
+      ArgDescriptor::createRegister(AMDGPU::TTMP6, 0x0000F000u);
+  const ArgDescriptor ClusterWorkGroupMaxIDY =
+      ArgDescriptor::createRegister(AMDGPU::TTMP6, 0x000F0000u);
+  const ArgDescriptor ClusterWorkGroupMaxIDZ =
+      ArgDescriptor::createRegister(AMDGPU::TTMP6, 0x00F00000u);
+  const ArgDescriptor ClusterWorkGroupMaxFlatID =
+      ArgDescriptor::createRegister(AMDGPU::TTMP6, 0x0F000000u);
+
+  auto LoadConstant = [&](unsigned N) {
+    B.buildConstant(DstReg, N);
+    return true;
+  };
+
   if (ST.hasArchitectedSGPRs() &&
       (AMDGPU::isCompute(CC) || CC == CallingConv::AMDGPU_Gfx)) {
+    AMDGPU::ClusterDimsAttr ClusterDims = MFI->getClusterDims();
+    bool HasFixedDims = ClusterDims.isFixedDims();
+
     switch (ArgType) {
     case AMDGPUFunctionArgInfo::WORKGROUP_ID_X:
       Arg = &WorkGroupIDX;
@@ -4489,6 +4580,53 @@ bool AMDGPULegalizerInfo::loadInputValue(
       ArgRC = &AMDGPU::SReg_32RegClass;
       ArgTy = LLT::scalar(32);
       break;
+    case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_X:
+      if (HasFixedDims && ClusterDims.getDims()[0] == 1)
+        return LoadConstant(0);
+      Arg = &ClusterWorkGroupIDX;
+      ArgRC = &AMDGPU::SReg_32RegClass;
+      ArgTy = LLT::scalar(32);
+      break;
+    case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_Y:
+      if (HasFixedDims && ClusterDims.getDims()[1] == 1)
+        return LoadConstant(0);
+      Arg = &ClusterWorkGroupIDY;
+      ArgRC = &AMDGPU::SReg_32RegClass;
+      ArgTy = LLT::scalar(32);
+      break;
+    case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_Z:
+      if (HasFixedDims && ClusterDims.getDims()[2] == 1)
+        return LoadConstant(0);
+      Arg = &ClusterWorkGroupIDZ;
+      ArgRC = &AMDGPU::SReg_32RegClass;
+      ArgTy = LLT::scalar(32);
+      break;
+    case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_X:
+      if (HasFixedDims)
+        return LoadConstant(ClusterDims.getDims()[0] - 1);
+      Arg = &ClusterWorkGroupMaxIDX;
+      ArgRC = &AMDGPU::SReg_32RegClass;
+      ArgTy = LLT::scalar(32);
+      break;
+    case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_Y:
+      if (HasFixedDims)
+        return LoadConstant(ClusterDims.getDims()[1] - 1);
+      Arg = &ClusterWorkGroupMaxIDY;
+      ArgRC = &AMDGPU::SReg_32RegClass;
+      ArgTy = LLT::scalar(32);
+      break;
+    case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_Z:
+      if (HasFixedDims)
+        return LoadConstant(ClusterDims.getDims()[2] - 1);
+      Arg = &ClusterWorkGroupMaxIDZ;
+      ArgRC = &AMDGPU::SReg_32RegClass;
+      ArgTy = LLT::scalar(32);
+      break;
+    case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_FLAT_ID:
+      Arg = &ClusterWorkGroupMaxFlatID;
+      ArgRC = &AMDGPU::SReg_32RegClass;
+      ArgTy = LLT::scalar(32);
+      break;
     default:
       break;
     }
@@ -4499,10 +4637,9 @@ bool AMDGPULegalizerInfo::loadInputValue(
 
   if (!Arg) {
     if (ArgType == AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR) {
-      // The intrinsic may appear when we have a 0 sized kernarg segment, in which
-      // case the pointer argument may be missing and we use null.
-      B.buildConstant(DstReg, 0);
-      return true;
+      // The intrinsic may appear when we have a 0 sized kernarg segment, in
+      // which case the pointer argument may be missing and we use null.
+      return LoadConstant(0);
     }
 
     // It's undefined behavior if a function marked with the amdgpu-no-*
@@ -7415,6 +7552,22 @@ bool AMDGPULegalizerInfo::legalizeWaveID(MachineInstr &MI,
   return true;
 }
 
+bool AMDGPULegalizerInfo::legalizeConstHwRegRead(MachineInstr &MI,
+                                                 MachineIRBuilder &B,
+                                                 AMDGPU::Hwreg::Id HwReg,
+                                                 unsigned LowBit,
+                                                 unsigned Width) const {
+  MachineRegisterInfo &MRI = *B.getMRI();
+  Register DstReg = MI.getOperand(0).getReg();
+  if (!MRI.getRegClassOrNull(DstReg))
+    MRI.setRegClass(DstReg, &AMDGPU::SReg_32RegClass);
+  B.buildInstr(AMDGPU::S_GETREG_B32_const)
+      .addDef(DstReg)
+      .addImm(AMDGPU::Hwreg::HwregEncoding::encode(HwReg, LowBit, Width));
+  MI.eraseFromParent();
+  return true;
+}
+
 static constexpr unsigned FPEnvModeBitField =
     AMDGPU::Hwreg::HwregEncoding::encode(AMDGPU::Hwreg::ID_MODE, 0, 23);
 
@@ -7577,14 +7730,64 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
     return legalizeWorkitemIDIntrinsic(MI, MRI, B, 2,
                                        AMDGPUFunctionArgInfo::WORKITEM_ID_Z);
   case Intrinsic::amdgcn_workgroup_id_x:
-    return legalizePreloadedArgIntrin(MI, MRI, B,
-                                      AMDGPUFunctionArgInfo::WORKGROUP_ID_X);
+    return legalizeWorkGroupId(
+        MI, B, AMDGPUFunctionArgInfo::WORKGROUP_ID_X,
+        AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_X,
+        AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_X);
   case Intrinsic::amdgcn_workgroup_id_y:
-    return legalizePreloadedArgIntrin(MI, MRI, B,
-                                      AMDGPUFunctionArgInfo::WORKGROUP_ID_Y);
+    return legalizeWorkGroupId(
+        MI, B, AMDGPUFunctionArgInfo::WORKGROUP_ID_Y,
+        AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_Y,
+        AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_Y);
   case Intrinsic::amdgcn_workgroup_id_z:
-    return legalizePreloadedArgIntrin(MI, MRI, B,
+    return legalizeWorkGroupId(
+        MI, B, AMDGPUFunctionArgInfo::WORKGROUP_ID_Z,
+        AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_Z,
+        AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_Z);
+  case Intrinsic::amdgcn_cluster_id_x:
+    return ST.hasClusters() &&
+           legalizePreloadedArgIntrin(MI, MRI, B,
+                                      AMDGPUFunctionArgInfo::WORKGROUP_ID_X);
+  case Intrinsic::amdgcn_cluster_id_y:
+    return ST.hasClusters() &&
+           legalizePreloadedArgIntrin(MI, MRI, B,
+                                      AMDGPUFunctionArgInfo::WORKGROUP_ID_Y);
+  case Intrinsic::amdgcn_cluster_id_z:
+    return ST.hasClusters() &&
+           legalizePreloadedArgIntrin(MI, MRI, B,
                                       AMDGPUFunctionArgInfo::WORKGROUP_ID_Z);
+  case Intrinsic::amdgcn_cluster_workgroup_id_x:
+    return ST.hasClusters() &&
+           legalizePreloadedArgIntrin(
+               MI, MRI, B, AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_X);
+  case Intrinsic::amdgcn_cluster_workgroup_id_y:
+    return ST.hasClusters() &&
+           legalizePreloadedArgIntrin(
+               MI, MRI, B, AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_Y);
+  case Intrinsic::amdgcn_cluster_workgroup_id_z:
+    return ST.hasClusters() &&
+           legalizePreloadedArgIntrin(
+               MI, MRI, B, AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_Z);
+  case Intrinsic::amdgcn_cluster_workgroup_flat_id:
+    return ST.hasClusters() &&
+           legalizeConstHwRegRead(MI, B, AMDGPU::Hwreg::ID_IB_STS2, 21, 4);
+  case Intrinsic::amdgcn_cluster_workgroup_max_id_x:
+    return ST.hasClusters() &&
+           legalizePreloadedArgIntrin(
+               MI, MRI, B, AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_X);
+  case Intrinsic::amdgcn_cluster_workgroup_max_id_y:
+    return ST.hasClusters() &&
+           legalizePreloadedArgIntrin(
+               MI, MRI, B, AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_Y);
+  case Intrinsic::amdgcn_cluster_workgroup_max_id_z:
+    return ST.hasClusters() &&
+           legalizePreloadedArgIntrin(
+               MI, MRI, B, AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_Z);
+  case Intrinsic::amdgcn_cluster_workgroup_max_flat_id:
+    return ST.hasClusters() &&
+           legalizePreloadedArgIntrin(
+               MI, MRI, B,
+               AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_FLAT_ID);
   case Intrinsic::amdgcn_wave_id:
     return legalizeWaveID(MI, B);
   case Intrinsic::amdgcn_lds_kernel_id:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
index 1f4e02b0..cd44a9b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
@@ -114,6 +114,11 @@ public:
   void buildLoadInputValue(Register DstReg, MachineIRBuilder &B,
                            const ArgDescriptor *Arg,
                            const TargetRegisterClass *ArgRC, LLT ArgTy) const;
+  bool legalizeWorkGroupId(
+      MachineInstr &MI, MachineIRBuilder &B,
+      AMDGPUFunctionArgInfo::PreloadedValue ClusterIdPV,
+      AMDGPUFunctionArgInfo::PreloadedValue ClusterMaxIdPV,
+      AMDGPUFunctionArgInfo::PreloadedValue ClusterWorkGroupIdPV) const;
   bool loadInputValue(Register DstReg, MachineIRBuilder &B,
                       AMDGPUFunctionArgInfo::PreloadedValue ArgType) const;
 
@@ -218,6 +223,9 @@ public:
 
   bool legalizeStackSave(MachineInstr &MI, MachineIRBuilder &B) const;
   bool legalizeWaveID(MachineInstr &MI, MachineIRBuilder &B) const;
+  bool legalizeConstHwRegRead(MachineInstr &MI, MachineIRBuilder &B,
+                              AMDGPU::Hwreg::Id HwReg, unsigned LowBit,
+                              unsigned Width) const;
 
   bool legalizeGetFPEnv(MachineInstr &MI, MachineRegisterInfo &MRI,
                         MachineIRBuilder &B) const;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index 7dbe123..ddabd25 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -406,6 +406,7 @@ static Value *GEPToVectorIndex(GetElementPtrInst *GEP, AllocaInst *Alloca,
                                SmallVector<Instruction *> &NewInsts) {
   // TODO: Extracting a "multiple of X" from a GEP might be a useful generic
   // helper.
+  LLVMContext &Ctx = GEP->getContext();
   unsigned BW = DL.getIndexTypeSizeInBits(GEP->getType());
   SmallMapVector<Value *, APInt, 4> VarOffsets;
   APInt ConstOffset(BW, 0);
@@ -438,27 +439,24 @@ static Value *GEPToVectorIndex(GetElementPtrInst *GEP, AllocaInst *Alloca,
 
   assert(CurPtr == Alloca && "GEP not based on alloca");
 
-  unsigned VecElemSize = DL.getTypeAllocSize(VecElemTy);
+  int64_t VecElemSize = DL.getTypeAllocSize(VecElemTy);
   if (VarOffsets.size() > 1)
     return nullptr;
 
   APInt IndexQuot;
-  APInt Rem;
-  APInt::sdivrem(ConstOffset, APInt(ConstOffset.getBitWidth(), VecElemSize),
-                 IndexQuot, Rem);
-  if (!Rem.isZero())
+  int64_t Rem;
+  APInt::sdivrem(ConstOffset, VecElemSize, IndexQuot, Rem);
+  if (Rem != 0)
     return nullptr;
   if (VarOffsets.size() == 0)
-    return ConstantInt::get(GEP->getContext(), IndexQuot);
+    return ConstantInt::get(Ctx, IndexQuot);
 
   IRBuilder<> Builder(GEP);
 
   const auto &VarOffset = VarOffsets.front();
   APInt OffsetQuot;
-  APInt::sdivrem(VarOffset.second,
-                 APInt(VarOffset.second.getBitWidth(), VecElemSize), OffsetQuot,
-                 Rem);
-  if (!Rem.isZero() || OffsetQuot.isZero())
+  APInt::sdivrem(VarOffset.second, VecElemSize, OffsetQuot, Rem);
+  if (Rem != 0 || OffsetQuot.isZero())
     return nullptr;
 
   Value *Offset = VarOffset.first;
@@ -468,7 +466,7 @@ static Value *GEPToVectorIndex(GetElementPtrInst *GEP, AllocaInst *Alloca,
 
   if (!OffsetQuot.isOne()) {
     ConstantInt *ConstMul =
-        ConstantInt::get(OffsetType, OffsetQuot.getSExtValue());
+        ConstantInt::get(Ctx, OffsetQuot.sext(OffsetType->getBitWidth()));
     Offset = Builder.CreateMul(Offset, ConstMul);
     if (Instruction *NewInst = dyn_cast<Instruction>(Offset))
       NewInsts.push_back(NewInst);
@@ -477,7 +475,7 @@ static Value *GEPToVectorIndex(GetElementPtrInst *GEP, AllocaInst *Alloca,
     return Offset;
 
   ConstantInt *ConstIndex =
-      ConstantInt::get(OffsetType, IndexQuot.getSExtValue());
+      ConstantInt::get(Ctx, IndexQuot.sext(OffsetType->getBitWidth()));
   Value *IndexAdd = Builder.CreateAdd(Offset, ConstIndex);
   if (Instruction *NewInst = dyn_cast<Instruction>(IndexAdd))
     NewInsts.push_back(NewInst);
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 5297816..edc4858 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -709,7 +709,7 @@ bool SIFoldOperandsImpl::updateOperand(FoldCandidate &Fold) const {
 
   // Verify the register is compatible with the operand.
   if (const TargetRegisterClass *OpRC =
-          TII->getRegClass(MI->getDesc(), Fold.UseOpNo, TRI, *MF)) {
+          TII->getRegClass(MI->getDesc(), Fold.UseOpNo, TRI)) {
     const TargetRegisterClass *OldRC = MRI->getRegClass(Old.getReg());
     const TargetRegisterClass *NewRC = MRI->getRegClass(New->getReg());
     unsigned NewSubReg = New->getSubReg();
@@ -2409,8 +2409,7 @@ bool SIFoldOperandsImpl::tryFoldRegSequence(MachineInstr &MI) {
 
   unsigned OpIdx = Op - &UseMI->getOperand(0);
   const MCInstrDesc &InstDesc = UseMI->getDesc();
-  const TargetRegisterClass *OpRC =
-      TII->getRegClass(InstDesc, OpIdx, TRI, *MI.getMF());
+  const TargetRegisterClass *OpRC = TII->getRegClass(InstDesc, OpIdx, TRI);
   if (!OpRC || !TRI->isVectorSuperClass(OpRC))
     return false;
 
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 4927d2b..3332723 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -2443,6 +2443,53 @@ SDValue SITargetLowering::lowerStackParameter(SelectionDAG &DAG,
   return DAG.getMergeValues({ConvertedVal, ArgValue.getValue(1)}, SL);
 }
 
+SDValue SITargetLowering::lowerWorkGroupId(
+    SelectionDAG &DAG, const SIMachineFunctionInfo &MFI, EVT VT,
+    AMDGPUFunctionArgInfo::PreloadedValue WorkGroupIdPV,
+    AMDGPUFunctionArgInfo::PreloadedValue ClusterMaxIdPV,
+    AMDGPUFunctionArgInfo::PreloadedValue ClusterWorkGroupIdPV) const {
+  if (!Subtarget->hasClusters())
+    return getPreloadedValue(DAG, MFI, VT, WorkGroupIdPV);
+
+  // Clusters are supported. Return the global position in the grid. If clusters
+  // are enabled, WorkGroupIdPV returns the cluster ID not the workgroup ID.
+
+  // WorkGroupIdXYZ = ClusterId == 0 ?
+  //   ClusterIdXYZ :
+  //   ClusterIdXYZ * (ClusterMaxIdXYZ + 1) + ClusterWorkGroupIdXYZ
+  SDValue ClusterIdXYZ = getPreloadedValue(DAG, MFI, VT, WorkGroupIdPV);
+  SDLoc SL(ClusterIdXYZ);
+  SDValue ClusterMaxIdXYZ = getPreloadedValue(DAG, MFI, VT, ClusterMaxIdPV);
+  SDValue One = DAG.getConstant(1, SL, VT);
+  SDValue ClusterSizeXYZ = DAG.getNode(ISD::ADD, SL, VT, ClusterMaxIdXYZ, One);
+  SDValue ClusterWorkGroupIdXYZ =
+      getPreloadedValue(DAG, MFI, VT, ClusterWorkGroupIdPV);
+  SDValue GlobalIdXYZ =
+      DAG.getNode(ISD::ADD, SL, VT, ClusterWorkGroupIdXYZ,
+                  DAG.getNode(ISD::MUL, SL, VT, ClusterIdXYZ, ClusterSizeXYZ));
+
+  switch (MFI.getClusterDims().getKind()) {
+  case AMDGPU::ClusterDimsAttr::Kind::FixedDims:
+  case AMDGPU::ClusterDimsAttr::Kind::VariableDims:
+    return GlobalIdXYZ;
+  case AMDGPU::ClusterDimsAttr::Kind::NoCluster:
+    return ClusterIdXYZ;
+  case AMDGPU::ClusterDimsAttr::Kind::Unknown: {
+    using namespace AMDGPU::Hwreg;
+    SDValue ClusterIdField =
+        DAG.getTargetConstant(HwregEncoding::encode(ID_IB_STS2, 6, 4), SL, VT);
+    SDNode *GetReg =
+        DAG.getMachineNode(AMDGPU::S_GETREG_B32_const, SL, VT, ClusterIdField);
+    SDValue ClusterId(GetReg, 0);
+    SDValue Zero = DAG.getConstant(0, SL, VT);
+    return DAG.getNode(ISD::SELECT_CC, SL, VT, ClusterId, Zero, ClusterIdXYZ,
+                       GlobalIdXYZ, DAG.getCondCode(ISD::SETEQ));
+  }
+  }
+
+  llvm_unreachable("nothing should reach here");
+}
+
 SDValue SITargetLowering::getPreloadedValue(
     SelectionDAG &DAG, const SIMachineFunctionInfo &MFI, EVT VT,
     AMDGPUFunctionArgInfo::PreloadedValue PVID) const {
@@ -2461,9 +2508,30 @@ SDValue SITargetLowering::getPreloadedValue(
       AMDGPU::isEntryFunctionCC(CC) && !MFI.hasWorkGroupIDZ() ? ~0u : 0xFFFFu);
   const ArgDescriptor WorkGroupIDZ =
       ArgDescriptor::createRegister(AMDGPU::TTMP7, 0xFFFF0000u);
+  const ArgDescriptor ClusterWorkGroupIDX =
+      ArgDescriptor::createRegister(AMDGPU::TTMP6, 0x0000000Fu);
+  const ArgDescriptor ClusterWorkGroupIDY =
+      ArgDescriptor::createRegister(AMDGPU::TTMP6, 0x000000F0u);
+  const ArgDescriptor ClusterWorkGroupIDZ =
+      ArgDescriptor::createRegister(AMDGPU::TTMP6, 0x00000F00u);
+  const ArgDescriptor ClusterWorkGroupMaxIDX =
+      ArgDescriptor::createRegister(AMDGPU::TTMP6, 0x0000F000u);
+  const ArgDescriptor ClusterWorkGroupMaxIDY =
+      ArgDescriptor::createRegister(AMDGPU::TTMP6, 0x000F0000u);
+  const ArgDescriptor ClusterWorkGroupMaxIDZ =
+      ArgDescriptor::createRegister(AMDGPU::TTMP6, 0x00F00000u);
+  const ArgDescriptor ClusterWorkGroupMaxFlatID =
+      ArgDescriptor::createRegister(AMDGPU::TTMP6, 0x0F000000u);
+
+  auto LoadConstant = [&](unsigned N) {
+    return DAG.getConstant(N, SDLoc(), VT);
+  };
+
   if (Subtarget->hasArchitectedSGPRs() &&
-      (AMDGPU::isCompute(CC) || CC == CallingConv::AMDGPU_Gfx ||
-       CC == CallingConv::AMDGPU_Gfx_WholeWave)) {
+      (AMDGPU::isCompute(CC) || CC == CallingConv::AMDGPU_Gfx)) {
+    AMDGPU::ClusterDimsAttr ClusterDims = MFI.getClusterDims();
+    bool HasFixedDims = ClusterDims.isFixedDims();
+
     switch (PVID) {
     case AMDGPUFunctionArgInfo::WORKGROUP_ID_X:
       Reg = &WorkGroupIDX;
@@ -2480,6 +2548,53 @@ SDValue SITargetLowering::getPreloadedValue(
       RC = &AMDGPU::SReg_32RegClass;
       Ty = LLT::scalar(32);
       break;
+    case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_X:
+      if (HasFixedDims && ClusterDims.getDims()[0] == 1)
+        return LoadConstant(0);
+      Reg = &ClusterWorkGroupIDX;
+      RC = &AMDGPU::SReg_32RegClass;
+      Ty = LLT::scalar(32);
+      break;
+    case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_Y:
+      if (HasFixedDims && ClusterDims.getDims()[1] == 1)
+        return LoadConstant(0);
+      Reg = &ClusterWorkGroupIDY;
+      RC = &AMDGPU::SReg_32RegClass;
+      Ty = LLT::scalar(32);
+      break;
+    case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_Z:
+      if (HasFixedDims && ClusterDims.getDims()[2] == 1)
+        return LoadConstant(0);
+      Reg = &ClusterWorkGroupIDZ;
+      RC = &AMDGPU::SReg_32RegClass;
+      Ty = LLT::scalar(32);
+      break;
+    case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_X:
+      if (HasFixedDims)
+        return LoadConstant(ClusterDims.getDims()[0] - 1);
+      Reg = &ClusterWorkGroupMaxIDX;
+      RC = &AMDGPU::SReg_32RegClass;
+      Ty = LLT::scalar(32);
+      break;
+    case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_Y:
+      if (HasFixedDims)
+        return LoadConstant(ClusterDims.getDims()[1] - 1);
+      Reg = &ClusterWorkGroupMaxIDY;
+      RC = &AMDGPU::SReg_32RegClass;
+      Ty = LLT::scalar(32);
+      break;
+    case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_Z:
+      if (HasFixedDims)
+        return LoadConstant(ClusterDims.getDims()[2] - 1);
+      Reg = &ClusterWorkGroupMaxIDZ;
+      RC = &AMDGPU::SReg_32RegClass;
+      Ty = LLT::scalar(32);
+      break;
+    case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_FLAT_ID:
+      Reg = &ClusterWorkGroupMaxFlatID;
+      RC = &AMDGPU::SReg_32RegClass;
+      Ty = LLT::scalar(32);
+      break;
     default:
       break;
     }
@@ -9539,6 +9654,19 @@ SDValue SITargetLowering::lowerWaveID(SelectionDAG &DAG, SDValue Op) const {
                      DAG.getConstant(25, SL, VT), DAG.getConstant(5, SL, VT));
 }
 
+SDValue SITargetLowering::lowerConstHwRegRead(SelectionDAG &DAG, SDValue Op,
+                                              AMDGPU::Hwreg::Id HwReg,
+                                              unsigned LowBit,
+                                              unsigned Width) const {
+  SDLoc SL(Op);
+  using namespace AMDGPU::Hwreg;
+  return {DAG.getMachineNode(
+              AMDGPU::S_GETREG_B32_const, SL, MVT::i32,
+              DAG.getTargetConstant(HwregEncoding::encode(HwReg, LowBit, Width),
+                                    SL, MVT::i32)),
+          0};
+}
+
 SDValue SITargetLowering::lowerWorkitemID(SelectionDAG &DAG, SDValue Op,
                                           unsigned Dim,
                                           const ArgDescriptor &Arg) const {
@@ -9685,14 +9813,81 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
     return lowerImplicitZextParam(DAG, Op, MVT::i16,
                                   SI::KernelInputOffsets::LOCAL_SIZE_Z);
   case Intrinsic::amdgcn_workgroup_id_x:
-    return getPreloadedValue(DAG, *MFI, VT,
-                             AMDGPUFunctionArgInfo::WORKGROUP_ID_X);
+    return lowerWorkGroupId(DAG, *MFI, VT,
+                            AMDGPUFunctionArgInfo::WORKGROUP_ID_X,
+                            AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_X,
+                            AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_X);
   case Intrinsic::amdgcn_workgroup_id_y:
-    return getPreloadedValue(DAG, *MFI, VT,
-                             AMDGPUFunctionArgInfo::WORKGROUP_ID_Y);
+    return lowerWorkGroupId(DAG, *MFI, VT,
+                            AMDGPUFunctionArgInfo::WORKGROUP_ID_Y,
+                            AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_Y,
+                            AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_Y);
   case Intrinsic::amdgcn_workgroup_id_z:
-    return getPreloadedValue(DAG, *MFI, VT,
-                             AMDGPUFunctionArgInfo::WORKGROUP_ID_Z);
+    return lowerWorkGroupId(DAG, *MFI, VT,
+                            AMDGPUFunctionArgInfo::WORKGROUP_ID_Z,
+                            AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_Z,
+                            AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_Z);
+  case Intrinsic::amdgcn_cluster_id_x:
+    return Subtarget->hasClusters()
+               ? getPreloadedValue(DAG, *MFI, VT,
+                                   AMDGPUFunctionArgInfo::WORKGROUP_ID_X)
+               : DAG.getPOISON(VT);
+  case Intrinsic::amdgcn_cluster_id_y:
+    return Subtarget->hasClusters()
+               ? getPreloadedValue(DAG, *MFI, VT,
+                                   AMDGPUFunctionArgInfo::WORKGROUP_ID_Y)
+               : DAG.getPOISON(VT);
+  case Intrinsic::amdgcn_cluster_id_z:
+    return Subtarget->hasClusters()
+               ? getPreloadedValue(DAG, *MFI, VT,
+                                   AMDGPUFunctionArgInfo::WORKGROUP_ID_Z)
+               : DAG.getPOISON(VT);
+  case Intrinsic::amdgcn_cluster_workgroup_id_x:
+    return Subtarget->hasClusters()
+               ? getPreloadedValue(
+                     DAG, *MFI, VT,
+                     AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_X)
+               : DAG.getPOISON(VT);
+  case Intrinsic::amdgcn_cluster_workgroup_id_y:
+    return Subtarget->hasClusters()
+               ? getPreloadedValue(
+                     DAG, *MFI, VT,
+                     AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_Y)
+               : DAG.getPOISON(VT);
+  case Intrinsic::amdgcn_cluster_workgroup_id_z:
+    return Subtarget->hasClusters()
+               ? getPreloadedValue(
+                     DAG, *MFI, VT,
+                     AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_Z)
+               : DAG.getPOISON(VT);
+  case Intrinsic::amdgcn_cluster_workgroup_flat_id:
+    return Subtarget->hasClusters()
+               ? lowerConstHwRegRead(DAG, Op, AMDGPU::Hwreg::ID_IB_STS2, 21, 4)
+               : SDValue();
+  case Intrinsic::amdgcn_cluster_workgroup_max_id_x:
+    return Subtarget->hasClusters()
+               ? getPreloadedValue(
+                     DAG, *MFI, VT,
+                     AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_X)
+               : DAG.getPOISON(VT);
+  case Intrinsic::amdgcn_cluster_workgroup_max_id_y:
+    return Subtarget->hasClusters()
+               ? getPreloadedValue(
+                     DAG, *MFI, VT,
+                     AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_Y)
+               : DAG.getPOISON(VT);
+  case Intrinsic::amdgcn_cluster_workgroup_max_id_z:
+    return Subtarget->hasClusters()
+               ? getPreloadedValue(
+                     DAG, *MFI, VT,
+                     AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_Z)
+               : DAG.getPOISON(VT);
+  case Intrinsic::amdgcn_cluster_workgroup_max_flat_id:
+    return Subtarget->hasClusters()
+               ? getPreloadedValue(
+                     DAG, *MFI, VT,
+                     AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_FLAT_ID)
+               : DAG.getPOISON(VT);
   case Intrinsic::amdgcn_wave_id:
     return lowerWaveID(DAG, Op);
   case Intrinsic::amdgcn_lds_kernel_id: {
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index 9c26cfa4..ba408a8 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -16,6 +16,7 @@
 
 #include "AMDGPUArgumentUsageInfo.h"
 #include "AMDGPUISelLowering.h"
+#include "SIDefines.h"
 #include "llvm/CodeGen/MachineFunction.h"
 
 namespace llvm {
@@ -64,6 +65,11 @@ private:
   SDValue lowerStackParameter(SelectionDAG &DAG, CCValAssign &VA,
                               const SDLoc &SL, SDValue Chain,
                               const ISD::InputArg &Arg) const;
+  SDValue lowerWorkGroupId(
+      SelectionDAG &DAG, const SIMachineFunctionInfo &MFI, EVT VT,
+      AMDGPUFunctionArgInfo::PreloadedValue ClusterIdPV,
+      AMDGPUFunctionArgInfo::PreloadedValue ClusterMaxIdPV,
+      AMDGPUFunctionArgInfo::PreloadedValue ClusterWorkGroupIdPV) const;
   SDValue getPreloadedValue(SelectionDAG &DAG,
                             const SIMachineFunctionInfo &MFI,
                             EVT VT,
@@ -84,6 +90,9 @@ private:
                                         unsigned NewOpcode) const;
 
   SDValue lowerWaveID(SelectionDAG &DAG, SDValue Op) const;
+  SDValue lowerConstHwRegRead(SelectionDAG &DAG, SDValue Op,
+                              AMDGPU::Hwreg::Id HwReg, unsigned LowBit,
+                              unsigned Width) const;
   SDValue lowerWorkitemID(SelectionDAG &DAG, SDValue Op, unsigned Dim,
                           const ArgDescriptor &ArgDesc) const;
 
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index b163a27..ae75fb5 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -1941,13 +1941,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
 
         // LOAD_CNT is only relevant to vgpr or LDS.
         unsigned RegNo = FIRST_LDS_VGPR;
-        // Only objects with alias scope info were added to LDSDMAScopes array.
-        // In the absense of the scope info we will not be able to disambiguate
-        // aliasing here. There is no need to try searching for a corresponding
-        // store slot. This is conservatively correct because in that case we
-        // will produce a wait using the first (general) LDS DMA wait slot which
-        // will wait on all of them anyway.
-        if (Ptr && Memop->getAAInfo() && Memop->getAAInfo().Scope) {
+        if (Ptr && Memop->getAAInfo()) {
           const auto &LDSDMAStores = ScoreBrackets.getLDSDMAStores();
           for (unsigned I = 0, E = LDSDMAStores.size(); I != E; ++I) {
             if (MI.mayAlias(AA, *LDSDMAStores[I], true))
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 23a124f..0361868 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -2598,7 +2598,7 @@ void SIInstrInfo::reMaterialize(MachineBasicBlock &MBB,
 
     const MCInstrDesc &TID = get(NewOpcode);
     const TargetRegisterClass *NewRC =
-        RI.getAllocatableClass(getRegClass(TID, 0, &RI, *MF));
+        RI.getAllocatableClass(getRegClass(TID, 0, &RI));
     MRI.setRegClass(DestReg, NewRC);
 
     UseMO->setReg(DestReg);
@@ -3615,7 +3615,7 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
           AMDGPU::V_MOV_B64_PSEUDO, AMDGPU::V_ACCVGPR_WRITE_B32_e64}) {
       const MCInstrDesc &MovDesc = get(MovOp);
 
-      const TargetRegisterClass *MovDstRC = getRegClass(MovDesc, 0, &RI, *MF);
+      const TargetRegisterClass *MovDstRC = getRegClass(MovDesc, 0, &RI);
       if (Is16Bit) {
         // We just need to find a correctly sized register class, so the
         // subregister index compatibility doesn't matter since we're statically
@@ -5976,9 +5976,7 @@ SIInstrInfo::getWholeWaveFunctionSetup(MachineFunction &MF) const {
 static const TargetRegisterClass *
 adjustAllocatableRegClass(const GCNSubtarget &ST, const SIRegisterInfo &RI,
                           const MCInstrDesc &TID, unsigned RCID) {
-  if (!ST.hasGFX90AInsts() && (((TID.mayLoad() || TID.mayStore()) &&
-                                !(TID.TSFlags & SIInstrFlags::Spill)) ||
-                               (TID.TSFlags & SIInstrFlags::MIMG))) {
+  if (!ST.hasGFX90AInsts() && (TID.mayLoad() || TID.mayStore())) {
     switch (RCID) {
     case AMDGPU::AV_32RegClassID:
       RCID = AMDGPU::VGPR_32RegClassID;
@@ -6006,17 +6004,15 @@ adjustAllocatableRegClass(const GCNSubtarget &ST, const SIRegisterInfo &RI,
   return RI.getProperlyAlignedRC(RI.getRegClass(RCID));
 }
 
-const TargetRegisterClass *SIInstrInfo::getRegClass(const MCInstrDesc &TID,
-    unsigned OpNum, const TargetRegisterInfo *TRI,
-    const MachineFunction &MF)
-  const {
+const TargetRegisterClass *
+SIInstrInfo::getRegClass(const MCInstrDesc &TID, unsigned OpNum,
+                         const TargetRegisterInfo *TRI) const {
   if (OpNum >= TID.getNumOperands())
     return nullptr;
   auto RegClass = TID.operands()[OpNum].RegClass;
-  if (TID.getOpcode() == AMDGPU::AV_MOV_B64_IMM_PSEUDO) {
-    // Special pseudos have no alignment requirement
+  // Special pseudos have no alignment requirement.
+  if (TID.getOpcode() == AMDGPU::AV_MOV_B64_IMM_PSEUDO || isSpill(TID))
     return RI.getRegClass(RegClass);
-  }
 
   return adjustAllocatableRegClass(ST, RI, TID, RegClass);
 }
@@ -6755,8 +6751,8 @@ void SIInstrInfo::legalizeOperandsFLAT(MachineRegisterInfo &MRI,
   if (moveFlatAddrToVGPR(MI))
     return;
 
-  const TargetRegisterClass *DeclaredRC = getRegClass(
-      MI.getDesc(), SAddr->getOperandNo(), &RI, *MI.getParent()->getParent());
+  const TargetRegisterClass *DeclaredRC =
+      getRegClass(MI.getDesc(), SAddr->getOperandNo(), &RI);
 
   Register ToSGPR = readlaneVGPRToSGPR(SAddr->getReg(), MI, MRI, DeclaredRC);
   SAddr->setReg(ToSGPR);
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index f7dde2b..dffb3d7 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -797,10 +797,12 @@ public:
     return get(Opcode).TSFlags & SIInstrFlags::Spill;
   }
 
-  static bool isSpill(const MachineInstr &MI) {
-    return MI.getDesc().TSFlags & SIInstrFlags::Spill;
+  static bool isSpill(const MCInstrDesc &Desc) {
+    return Desc.TSFlags & SIInstrFlags::Spill;
   }
 
+  static bool isSpill(const MachineInstr &MI) { return isSpill(MI.getDesc()); }
+
   static bool isWWMRegSpillOpcode(uint16_t Opcode) {
     return Opcode == AMDGPU::SI_SPILL_WWM_V32_SAVE ||
            Opcode == AMDGPU::SI_SPILL_WWM_AV32_SAVE ||
@@ -926,7 +928,8 @@ public:
     return Opcode == AMDGPU::S_CMPK_EQ_U32 || Opcode == AMDGPU::S_CMPK_LG_U32 ||
            Opcode == AMDGPU::S_CMPK_GT_U32 || Opcode == AMDGPU::S_CMPK_GE_U32 ||
            Opcode == AMDGPU::S_CMPK_LT_U32 || Opcode == AMDGPU::S_CMPK_LE_U32 ||
-           Opcode == AMDGPU::S_GETREG_B32;
+           Opcode == AMDGPU::S_GETREG_B32 ||
+           Opcode == AMDGPU::S_GETREG_B32_const;
   }
 
   /// \returns true if this is an s_store_dword* instruction. This is more
@@ -1534,10 +1537,9 @@ public:
   /// Return true if this opcode should not be used by codegen.
   bool isAsmOnlyOpcode(int MCOp) const;
 
-  const TargetRegisterClass *getRegClass(const MCInstrDesc &TID, unsigned OpNum,
-                                         const TargetRegisterInfo *TRI,
-                                         const MachineFunction &MF)
-    const override;
+  const TargetRegisterClass *
+  getRegClass(const MCInstrDesc &TID, unsigned OpNum,
+              const TargetRegisterInfo *TRI) const override;
 
   void fixImplicitOperands(MachineInstr &MI) const;
 
diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
index 69d02e7..f0d1117 100644
--- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
@@ -1337,10 +1337,10 @@ SILoadStoreOptimizer::checkAndPrepareMerge(CombineInfo &CI,
                                               AMDGPU::OpName::data1);
 
     const TargetRegisterClass *DataRC0 =
-        TII->getRegClass(Write2Opc, Data0Idx, TRI, *MF);
+        TII->getRegClass(Write2Opc, Data0Idx, TRI);
 
     const TargetRegisterClass *DataRC1 =
-        TII->getRegClass(Write2Opc, Data1Idx, TRI, *MF);
+        TII->getRegClass(Write2Opc, Data1Idx, TRI);
 
     if (unsigned SubReg = Data0->getSubReg()) {
       DataRC0 = TRI->getMatchingSuperRegClass(MRI->getRegClass(Data0->getReg()),
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index 54426d3..1f11be4 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -195,6 +195,8 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F,
     VGPRForAGPRCopy =
         AMDGPU::VGPR_32RegClass.getRegister(ST.getMaxNumVGPRs(F) - 1);
   }
+
+  ClusterDims = AMDGPU::ClusterDimsAttr::get(F);
 }
 
 MachineFunctionInfo *SIMachineFunctionInfo::clone(
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index ca8f803..4560615 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -465,6 +465,9 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction,
   // Default/requested number of work groups for the function.
   SmallVector<unsigned> MaxNumWorkGroups = {0, 0, 0};
 
+  // Requested cluster dimensions.
+  AMDGPU::ClusterDimsAttr ClusterDims;
+
 private:
   unsigned NumUserSGPRs = 0;
   unsigned NumSystemSGPRs = 0;
@@ -1207,6 +1210,8 @@ public:
   unsigned getMaxNumWorkGroupsX() const { return MaxNumWorkGroups[0]; }
   unsigned getMaxNumWorkGroupsY() const { return MaxNumWorkGroups[1]; }
   unsigned getMaxNumWorkGroupsZ() const { return MaxNumWorkGroups[2]; }
+
+  AMDGPU::ClusterDimsAttr getClusterDims() const { return ClusterDims; }
 };
 
 } // end namespace llvm
diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
index 1637c06..c501ebb 100644
--- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
@@ -462,10 +462,6 @@ public:
                              SIAtomicScope Scope,
                              SIAtomicAddrSpace AddrSpace) const override;
 
-  bool enableStoreCacheBypass(const MachineBasicBlock::iterator &MI,
-                              SIAtomicScope Scope,
-                              SIAtomicAddrSpace AddrSpace) const override;
-
   bool enableRMWCacheBypass(const MachineBasicBlock::iterator &MI,
                             SIAtomicScope Scope,
                             SIAtomicAddrSpace AddrSpace) const override;
@@ -1375,41 +1371,6 @@ bool SIGfx90ACacheControl::enableLoadCacheBypass(
   return Changed;
 }
 
-bool SIGfx90ACacheControl::enableStoreCacheBypass(
-    const MachineBasicBlock::iterator &MI,
-    SIAtomicScope Scope,
-    SIAtomicAddrSpace AddrSpace) const {
-  assert(!MI->mayLoad() && MI->mayStore());
-  bool Changed = false;
-
-  if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
-    switch (Scope) {
-    case SIAtomicScope::SYSTEM:
-    case SIAtomicScope::AGENT:
-      /// Do not set glc for store atomic operations as they implicitly write
-      /// through the L1 cache.
-      break;
-    case SIAtomicScope::WORKGROUP:
-    case SIAtomicScope::WAVEFRONT:
-    case SIAtomicScope::SINGLETHREAD:
-      // No cache to bypass. Store atomics implicitly write through the L1
-      // cache.
-      break;
-    default:
-      llvm_unreachable("Unsupported synchronization scope");
-    }
-  }
-
-  /// The scratch address space does not need the global memory caches
-  /// to be bypassed as all memory operations by the same thread are
-  /// sequentially consistent, and no other thread can access scratch
-  /// memory.
-
-  /// Other address spaces do not have a cache.
-
-  return Changed;
-}
-
 bool SIGfx90ACacheControl::enableRMWCacheBypass(
     const MachineBasicBlock::iterator &MI,
     SIAtomicScope Scope,
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 2248838..205237f 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -1108,8 +1108,8 @@ bool SIRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
                                 SIInstrFlags::FlatScratch);
 }
 
-const TargetRegisterClass *SIRegisterInfo::getPointerRegClass(
-  const MachineFunction &MF, unsigned Kind) const {
+const TargetRegisterClass *
+SIRegisterInfo::getPointerRegClass(unsigned Kind) const {
   // This is inaccurate. It depends on the instruction and address space. The
   // only place where we should hit this is for dealing with frame indexes /
   // private accesses, so this is correct in that case.
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
index eeefef1..7b91ba7 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -154,8 +154,8 @@ public:
   bool isFrameOffsetLegal(const MachineInstr *MI, Register BaseReg,
                           int64_t Offset) const override;
 
-  const TargetRegisterClass *getPointerRegClass(
-    const MachineFunction &MF, unsigned Kind = 0) const override;
+  const TargetRegisterClass *
+  getPointerRegClass(unsigned Kind = 0) const override;
 
   /// Returns a legal register class to copy a register in the specified class
   /// to or from. If it is possible to copy the register directly without using
diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index fe94887..296ce5a 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -1127,19 +1127,26 @@ def S_CBRANCH_I_FORK : SOPK_Pseudo <
   "$sdst, $simm16"
 >;
 
-// This is hasSideEffects to allow its use in readcyclecounter selection.
 // FIXME: Need to truncate immediate to 16-bits.
-// FIXME: Should have separate pseudos for known may read MODE and
-// only read MODE.
-def S_GETREG_B32 : SOPK_Pseudo <
+class S_GETREG_B32_Pseudo<list<dag> pattern=[]> : SOPK_Pseudo <
   "s_getreg_b32",
   (outs SReg_32:$sdst), (ins hwreg:$simm16),
-  "$sdst, $simm16",
-  [(set i32:$sdst, (int_amdgcn_s_getreg (i32 timm:$simm16)))]> {
+  "$sdst, $simm16", pattern>;
+
+// This is hasSideEffects to allow its use in readcyclecounter selection.
+// FIXME: Should have separate pseudos for known may read MODE and
+// only read MODE.
+def S_GETREG_B32 : S_GETREG_B32_Pseudo<
+    [(set i32:$sdst, (int_amdgcn_s_getreg (i32 timm:$simm16)))]> {
   let hasSideEffects = 1;
   let Uses = [MODE];
 }
 
+// A version of the pseudo for reading hardware register fields that are
+// known to remain the same during the course of the run. Has no side
+// effects and doesn't read MODE.
+def S_GETREG_B32_const : S_GETREG_B32_Pseudo;
+
 let Defs = [MODE], Uses = [MODE] in {
 
 // FIXME: Need to truncate immediate to 16-bits.
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 40da4f9..faae1fe 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -3533,6 +3533,54 @@ bool isPackedFP32Inst(unsigned Opc) {
   }
 }
 
+const std::array<unsigned, 3> &ClusterDimsAttr::getDims() const {
+  assert(isFixedDims() && "expect kind to be FixedDims");
+  return Dims;
+}
+
+std::string ClusterDimsAttr::to_string() const {
+  SmallString<10> Buffer;
+  raw_svector_ostream OS(Buffer);
+
+  switch (getKind()) {
+  case Kind::Unknown:
+    return "";
+  case Kind::NoCluster: {
+    OS << EncoNoCluster << ',' << EncoNoCluster << ',' << EncoNoCluster;
+    return Buffer.c_str();
+  }
+  case Kind::VariableDims: {
+    OS << EncoVariableDims << ',' << EncoVariableDims << ','
+       << EncoVariableDims;
+    return Buffer.c_str();
+  }
+  case Kind::FixedDims: {
+    OS << Dims[0] << ',' << Dims[1] << ',' << Dims[2];
+    return Buffer.c_str();
+  }
+  }
+  llvm_unreachable("Unknown ClusterDimsAttr kind");
+}
+
+ClusterDimsAttr ClusterDimsAttr::get(const Function &F) {
+  std::optional<SmallVector<unsigned>> Attr =
+      getIntegerVecAttribute(F, "amdgpu-cluster-dims", /*Size=*/3);
+  ClusterDimsAttr::Kind AttrKind = Kind::FixedDims;
+
+  if (!Attr.has_value())
+    AttrKind = Kind::Unknown;
+  else if (all_of(*Attr, [](unsigned V) { return V == EncoNoCluster; }))
+    AttrKind = Kind::NoCluster;
+  else if (all_of(*Attr, [](unsigned V) { return V == EncoVariableDims; }))
+    AttrKind = Kind::VariableDims;
+
+  ClusterDimsAttr A(AttrKind);
+  if (AttrKind == Kind::FixedDims)
+    A.Dims = {(*Attr)[0], (*Attr)[1], (*Attr)[2]};
+
+  return A;
+}
+
 } // namespace AMDGPU
 
 raw_ostream &operator<<(raw_ostream &OS,
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 3fcd16f..3f8d43d 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -1813,6 +1813,50 @@ bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode);
 /// must be defined in terms of bytes.
 unsigned getLdsDwGranularity(const MCSubtargetInfo &ST);
 
+class ClusterDimsAttr {
+public:
+  enum class Kind { Unknown, NoCluster, VariableDims, FixedDims };
+
+  ClusterDimsAttr() = default;
+
+  Kind getKind() const { return AttrKind; }
+
+  bool isUnknown() const { return getKind() == Kind::Unknown; }
+
+  bool isNoCluster() const { return getKind() == Kind::NoCluster; }
+
+  bool isFixedDims() const { return getKind() == Kind::FixedDims; }
+
+  bool isVariableedDims() const { return getKind() == Kind::VariableDims; }
+
+  void setUnknown() { *this = ClusterDimsAttr(Kind::Unknown); }
+
+  void setNoCluster() { *this = ClusterDimsAttr(Kind::NoCluster); }
+
+  void setVariableDims() { *this = ClusterDimsAttr(Kind::VariableDims); }
+
+  /// \returns the dims stored. Note that this function can only be called if
+  /// the kind is \p Fixed.
+  const std::array<unsigned, 3> &getDims() const;
+
+  bool operator==(const ClusterDimsAttr &RHS) const {
+    return AttrKind == RHS.AttrKind && Dims == RHS.Dims;
+  }
+
+  std::string to_string() const;
+
+  static ClusterDimsAttr get(const Function &F);
+
+private:
+  enum Encoding { EncoNoCluster = 0, EncoVariableDims = 1024 };
+
+  ClusterDimsAttr(Kind AttrKind) : AttrKind(AttrKind) {}
+
+  std::array<unsigned, 3> Dims = {0, 0, 0};
+
+  Kind AttrKind = Kind::Unknown;
+};
+
 } // end namespace AMDGPU
 
 raw_ostream &operator<<(raw_ostream &OS,
diff --git a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index bc20daf..e94220a 100644
--- a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -310,8 +310,7 @@ ARMBaseRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
 }
 
 const TargetRegisterClass *
-ARMBaseRegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind)
-                                                                         const {
+ARMBaseRegisterInfo::getPointerRegClass(unsigned Kind) const {
   return &ARM::GPRRegClass;
 }
 
@@ -708,7 +707,7 @@ ARMBaseRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
   const MCInstrDesc &MCID = TII.get(ADDriOpc);
   Register BaseReg = MRI.createVirtualRegister(&ARM::GPRRegClass);
-  MRI.constrainRegClass(BaseReg, TII.getRegClass(MCID, 0, this, MF));
+  MRI.constrainRegClass(BaseReg, TII.getRegClass(MCID, 0, this));
 
   MachineInstrBuilder MIB = BuildMI(*MBB, Ins, DL, MCID, BaseReg)
     .addFrameIndex(FrameIdx).addImm(Offset);
@@ -882,7 +881,7 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
 
   const MCInstrDesc &MCID = MI.getDesc();
   const TargetRegisterClass *RegClass =
-      TII.getRegClass(MCID, FIOperandNum, this, *MI.getParent()->getParent());
+      TII.getRegClass(MCID, FIOperandNum, this);
 
   if (Offset == 0 && (FrameReg.isVirtual() || RegClass->contains(FrameReg)))
     // Must be addrmode4/6.
diff --git a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
index 69e10ac..5b67b34 100644
--- a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -91,8 +91,7 @@ public:
                               MCRegister PhysReg) const override;
 
   const TargetRegisterClass *
-  getPointerRegClass(const MachineFunction &MF,
-                     unsigned Kind = 0) const override;
+  getPointerRegClass(unsigned Kind = 0) const override;
   const TargetRegisterClass *
   getCrossCopyRegClass(const TargetRegisterClass *RC) const override;
 
diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/llvm/lib/Target/ARM/ARMFrameLowering.cpp
index a8da70e..138981a 100644
--- a/llvm/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMFrameLowering.cpp
@@ -2364,7 +2364,7 @@ static unsigned estimateRSStackSizeLimit(MachineFunction &MF,
           break;
 
         const MCInstrDesc &MCID = MI.getDesc();
-        const TargetRegisterClass *RegClass = TII.getRegClass(MCID, i, TRI, MF);
+        const TargetRegisterClass *RegClass = TII.getRegClass(MCID, i, TRI);
         if (RegClass && !RegClass->contains(ARM::SP))
           HasNonSPFrameIndex = true;
 
diff --git a/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index eea0cb6..cd4299b 100644
--- a/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -2424,7 +2424,7 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(
           Ops.pop_back();
 
           const MCInstrDesc &MCID = TII->get(NewOpc);
-          const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF);
+          const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI);
           MRI->constrainRegClass(FirstReg, TRC);
           MRI->constrainRegClass(SecondReg, TRC);
 
@@ -3014,7 +3014,7 @@ static void AdjustBaseAndOffset(MachineInstr *MI, Register NewBaseReg,
   MachineFunction *MF = MI->getMF();
   MachineRegisterInfo &MRI = MF->getRegInfo();
   const MCInstrDesc &MCID = TII->get(MI->getOpcode());
-  const TargetRegisterClass *TRC = TII->getRegClass(MCID, BaseOp, TRI, *MF);
+  const TargetRegisterClass *TRC = TII->getRegClass(MCID, BaseOp, TRI);
   MRI.constrainRegClass(NewBaseReg, TRC);
 
   int OldOffset = MI->getOperand(BaseOp + 1).getImm();
@@ -3071,10 +3071,10 @@ static MachineInstr *createPostIncLoadStore(MachineInstr *MI, int Offset,
 
   const MCInstrDesc &MCID = TII->get(NewOpcode);
   // Constrain the def register class
-  const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF);
+  const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI);
   MRI.constrainRegClass(NewReg, TRC);
   // And do the same for the base operand
-  TRC = TII->getRegClass(MCID, 2, TRI, *MF);
+  TRC = TII->getRegClass(MCID, 2, TRI);
   MRI.constrainRegClass(MI->getOperand(1).getReg(), TRC);
 
   unsigned AddrMode = (MCID.TSFlags & ARMII::AddrModeMask);
diff --git a/llvm/lib/Target/ARM/MLxExpansionPass.cpp b/llvm/lib/Target/ARM/MLxExpansionPass.cpp
index 00d8d846..8e1bf1d 100644
--- a/llvm/lib/Target/ARM/MLxExpansionPass.cpp
+++ b/llvm/lib/Target/ARM/MLxExpansionPass.cpp
@@ -283,9 +283,7 @@ MLxExpansion::ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI,
 
   const MCInstrDesc &MCID1 = TII->get(MulOpc);
   const MCInstrDesc &MCID2 = TII->get(AddSubOpc);
-  const MachineFunction &MF = *MI->getParent()->getParent();
-  Register TmpReg =
-      MRI->createVirtualRegister(TII->getRegClass(MCID1, 0, TRI, MF));
+  Register TmpReg = MRI->createVirtualRegister(TII->getRegClass(MCID1, 0, TRI));
 
   MachineInstrBuilder MIB = BuildMI(MBB, MI, MI->getDebugLoc(), MCID1, TmpReg)
     .addReg(Src1Reg, getKillRegState(Src1Kill))
diff --git a/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp b/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
index ce4ee15..4b8c2fd 100644
--- a/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
+++ b/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -24,7 +24,7 @@
 using namespace llvm;
 
 Thumb1InstrInfo::Thumb1InstrInfo(const ARMSubtarget &STI)
-    : ARMBaseInstrInfo(STI) {}
+    : ARMBaseInstrInfo(STI), RI(STI) {}
 
 /// Return the noop instruction to use for a noop.
 MCInst Thumb1InstrInfo::getNop() const {
diff --git a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
index e91441b..431ce38 100644
--- a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -46,7 +46,7 @@ PreferNoCSEL("prefer-no-csel", cl::Hidden,
              cl::init(false));
 
 Thumb2InstrInfo::Thumb2InstrInfo(const ARMSubtarget &STI)
-    : ARMBaseInstrInfo(STI) {}
+    : ARMBaseInstrInfo(STI), RI(STI) {}
 
 /// Return the noop instruction to use for a noop.
 MCInst Thumb2InstrInfo::getNop() const {
@@ -564,8 +564,7 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
   bool isSub = false;
 
   MachineFunction &MF = *MI.getParent()->getParent();
-  const TargetRegisterClass *RegClass =
-      TII.getRegClass(Desc, FrameRegIdx, TRI, MF);
+  const TargetRegisterClass *RegClass = TII.getRegClass(Desc, FrameRegIdx, TRI);
 
   // Memory operands in inline assembly always use AddrModeT2_i12.
   if (Opcode == ARM::INLINEASM || Opcode == ARM::INLINEASM_BR)
diff --git a/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp b/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp
index 9115026..12875c2 100644
--- a/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp
+++ b/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp
@@ -35,12 +35,13 @@ extern cl::opt<bool> ReuseFrameIndexVals;
 
 using namespace llvm;
 
-ThumbRegisterInfo::ThumbRegisterInfo() = default;
+ThumbRegisterInfo::ThumbRegisterInfo(const ARMSubtarget &STI)
+    : IsThumb1Only(STI.isThumb1Only()) {}
 
 const TargetRegisterClass *
 ThumbRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
                                               const MachineFunction &MF) const {
-  if (!MF.getSubtarget<ARMSubtarget>().isThumb1Only())
+  if (!IsThumb1Only)
     return ARMBaseRegisterInfo::getLargestLegalSuperClass(RC, MF);
 
   if (ARM::tGPRRegClass.hasSubClassEq(RC))
@@ -49,10 +50,9 @@ ThumbRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
 }
 
 const TargetRegisterClass *
-ThumbRegisterInfo::getPointerRegClass(const MachineFunction &MF,
-                                      unsigned Kind) const {
-  if (!MF.getSubtarget<ARMSubtarget>().isThumb1Only())
-    return ARMBaseRegisterInfo::getPointerRegClass(MF, Kind);
+ThumbRegisterInfo::getPointerRegClass(unsigned Kind) const {
+  if (!IsThumb1Only)
+    return ARMBaseRegisterInfo::getPointerRegClass(Kind);
   return &ARM::tGPRRegClass;
 }
 
diff --git a/llvm/lib/Target/ARM/ThumbRegisterInfo.h b/llvm/lib/Target/ARM/ThumbRegisterInfo.h
index ccfe211..1512a09 100644
--- a/llvm/lib/Target/ARM/ThumbRegisterInfo.h
+++ b/llvm/lib/Target/ARM/ThumbRegisterInfo.h
@@ -23,16 +23,18 @@ namespace llvm {
   class ARMBaseInstrInfo;
 
 struct ThumbRegisterInfo : public ARMBaseRegisterInfo {
+private:
+  const bool IsThumb1Only;
+
 public:
-  ThumbRegisterInfo();
+  explicit ThumbRegisterInfo(const ARMSubtarget &STI);
 
   const TargetRegisterClass *
   getLargestLegalSuperClass(const TargetRegisterClass *RC,
                             const MachineFunction &MF) const override;
 
   const TargetRegisterClass *
-  getPointerRegClass(const MachineFunction &MF,
-                     unsigned Kind = 0) const override;
+  getPointerRegClass(unsigned Kind = 0) const override;
 
   /// emitLoadConstPool - Emits a load from constpool to materialize the
   /// specified immediate.
diff --git a/llvm/lib/Target/AVR/AVRRegisterInfo.cpp b/llvm/lib/Target/AVR/AVRRegisterInfo.cpp
index 051affe..18bea84 100644
--- a/llvm/lib/Target/AVR/AVRRegisterInfo.cpp
+++ b/llvm/lib/Target/AVR/AVRRegisterInfo.cpp
@@ -289,8 +289,7 @@ Register AVRRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
 }
 
 const TargetRegisterClass *
-AVRRegisterInfo::getPointerRegClass(const MachineFunction &MF,
-                                    unsigned Kind) const {
+AVRRegisterInfo::getPointerRegClass(unsigned Kind) const {
   // FIXME: Currently we're using avr-gcc as reference, so we restrict
   // ptrs to Y and Z regs. Though avr-gcc has buggy implementation
   // of memory constraint, so we can fix it and bit avr-gcc here ;-)
diff --git a/llvm/lib/Target/AVR/AVRRegisterInfo.h b/llvm/lib/Target/AVR/AVRRegisterInfo.h
index 8eb0cf3..e69696b4 100644
--- a/llvm/lib/Target/AVR/AVRRegisterInfo.h
+++ b/llvm/lib/Target/AVR/AVRRegisterInfo.h
@@ -44,8 +44,7 @@ public:
   Register getFrameRegister(const MachineFunction &MF) const override;
 
   const TargetRegisterClass *
-  getPointerRegClass(const MachineFunction &MF,
-                     unsigned Kind = 0) const override;
+  getPointerRegClass(unsigned Kind = 0) const override;
 
   /// Splits a 16-bit `DREGS` register into the lo/hi register pair.
   /// \param Reg A 16-bit register to split.
diff --git a/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp b/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp
index 3b7bd1c..52e6b0b 100644
--- a/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp
@@ -1751,10 +1751,11 @@ namespace {
   class BitSimplification : public Transformation {
   public:
     BitSimplification(BitTracker &bt, const MachineDominatorTree &mdt,
-        const HexagonInstrInfo &hii, const HexagonRegisterInfo &hri,
-        MachineRegisterInfo &mri, MachineFunction &mf)
-      : Transformation(true), MDT(mdt), HII(hii), HRI(hri), MRI(mri),
-        MF(mf), BT(bt) {}
+                      const HexagonInstrInfo &hii,
+                      const HexagonRegisterInfo &hri, MachineRegisterInfo &mri,
+                      MachineFunction &mf)
+        : Transformation(true), MDT(mdt), HII(hii), HRI(hri), MRI(mri), BT(bt) {
+    }
 
     bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override;
 
@@ -1797,7 +1798,6 @@ namespace {
     const HexagonInstrInfo &HII;
     const HexagonRegisterInfo &HRI;
     MachineRegisterInfo &MRI;
-    MachineFunction &MF;
     BitTracker &BT;
   };
 
@@ -1886,7 +1886,7 @@ bool BitSimplification::matchHalf(unsigned SelfR,
 
 bool BitSimplification::validateReg(BitTracker::RegisterRef R, unsigned Opc,
       unsigned OpNum) {
-  auto *OpRC = HII.getRegClass(HII.get(Opc), OpNum, &HRI, MF);
+  auto *OpRC = HII.getRegClass(HII.get(Opc), OpNum, &HRI);
   auto *RRC = HBS::getFinalVRegClass(R, MRI);
   return OpRC->hasSubClassEq(RRC);
 }
diff --git a/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp b/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
index b3c61e1..dd343d9 100644
--- a/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
@@ -2225,7 +2225,7 @@ void HexagonFrameLowering::optimizeSpillSlots(MachineFunction &MF,
         if (!Bad) {
           // If the addressing mode is ok, check the register class.
           unsigned OpNum = Load ? 0 : 2;
-          auto *RC = HII.getRegClass(In.getDesc(), OpNum, &HRI, MF);
+          auto *RC = HII.getRegClass(In.getDesc(), OpNum, &HRI);
           RC = getCommonRC(SI.RC, RC);
           if (RC == nullptr)
             Bad = true;
@@ -2395,7 +2395,7 @@ void HexagonFrameLowering::optimizeSpillSlots(MachineFunction &MF,
 
         HexagonBlockRanges::RegisterRef SrcRR = { SrcOp.getReg(),
                                                   SrcOp.getSubReg() };
-        auto *RC = HII.getRegClass(SI.getDesc(), 2, &HRI, MF);
+        auto *RC = HII.getRegClass(SI.getDesc(), 2, &HRI);
         // The this-> is needed to unconfuse MSVC.
         Register FoundR = this->findPhysReg(MF, Range, IM, DM, RC);
         LLVM_DEBUG(dbgs() << "Replacement reg:" << printReg(FoundR, &HRI)
diff --git a/llvm/lib/Target/Hexagon/HexagonLoadStoreWidening.cpp b/llvm/lib/Target/Hexagon/HexagonLoadStoreWidening.cpp
index b8060fb..7cbd81f 100644
--- a/llvm/lib/Target/Hexagon/HexagonLoadStoreWidening.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonLoadStoreWidening.cpp
@@ -646,7 +646,7 @@ bool HexagonLoadStoreWidening::createWideStores(InstrGroup &OG, InstrGroup &NG,
     MachineInstr *CombI;
     if (Acc != 0) {
       const MCInstrDesc &TfrD = TII->get(Hexagon::A2_tfrsi);
-      const TargetRegisterClass *RC = TII->getRegClass(TfrD, 0, TRI, *MF);
+      const TargetRegisterClass *RC = TII->getRegClass(TfrD, 0, TRI);
       Register VReg = MF->getRegInfo().createVirtualRegister(RC);
       MachineInstr *TfrI = BuildMI(*MF, DL, TfrD, VReg).addImm(LowerAcc);
       NG.push_back(TfrI);
@@ -677,7 +677,7 @@ bool HexagonLoadStoreWidening::createWideStores(InstrGroup &OG, InstrGroup &NG,
   } else {
     // Create vreg = A2_tfrsi #Acc; mem[hw] = vreg
     const MCInstrDesc &TfrD = TII->get(Hexagon::A2_tfrsi);
-    const TargetRegisterClass *RC = TII->getRegClass(TfrD, 0, TRI, *MF);
+    const TargetRegisterClass *RC = TII->getRegClass(TfrD, 0, TRI);
     Register VReg = MF->getRegInfo().createVirtualRegister(RC);
     MachineInstr *TfrI = BuildMI(*MF, DL, TfrD, VReg).addImm(int(Acc));
     NG.push_back(TfrI);
diff --git a/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp
index 2731c52..77ce983 100644
--- a/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp
@@ -444,7 +444,6 @@ bool HexagonRegisterInfo::useFPForScavengingIndex(const MachineFunction &MF)
 }
 
 const TargetRegisterClass *
-HexagonRegisterInfo::getPointerRegClass(const MachineFunction &MF,
-                                        unsigned Kind) const {
+HexagonRegisterInfo::getPointerRegClass(unsigned Kind) const {
   return &Hexagon::IntRegsRegClass;
 }
diff --git a/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h b/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h
index 7215398..945b860 100644
--- a/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h
+++ b/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h
@@ -72,8 +72,7 @@ public:
         const TargetRegisterClass *RC) const;
 
   const TargetRegisterClass *
-  getPointerRegClass(const MachineFunction &MF,
-                     unsigned Kind = 0) const override;
+  getPointerRegClass(unsigned Kind = 0) const override;
 
   bool isEHReturnCalleeSaveReg(Register Reg) const;
 };
diff --git a/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
index bc486cd..cb88d1a 100644
--- a/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
@@ -653,7 +653,7 @@ bool HexagonPacketizerList::canPromoteToNewValueStore(const MachineInstr &MI,
   const MCInstrDesc& MCID = PacketMI.getDesc();
 
   // First operand is always the result.
-  const TargetRegisterClass *PacketRC = HII->getRegClass(MCID, 0, HRI, MF);
+  const TargetRegisterClass *PacketRC = HII->getRegClass(MCID, 0, HRI);
   // Double regs can not feed into new value store: PRM section: 5.4.2.2.
   if (PacketRC == &Hexagon::DoubleRegsRegClass)
     return false;
@@ -866,7 +866,7 @@ bool HexagonPacketizerList::canPromoteToDotNew(const MachineInstr &MI,
     return false;
 
   const MCInstrDesc& MCID = PI.getDesc();
-  const TargetRegisterClass *VecRC = HII->getRegClass(MCID, 0, HRI, MF);
+  const TargetRegisterClass *VecRC = HII->getRegClass(MCID, 0, HRI);
   if (DisableVecDblNVStores && VecRC == &Hexagon::HvxWRRegClass)
     return false;
 
diff --git a/llvm/lib/Target/LoongArch/LoongArchDeadRegisterDefinitions.cpp b/llvm/lib/Target/LoongArch/LoongArchDeadRegisterDefinitions.cpp
index 069b181..0ccebeb3 100644
--- a/llvm/lib/Target/LoongArch/LoongArchDeadRegisterDefinitions.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchDeadRegisterDefinitions.cpp
@@ -86,7 +86,7 @@ bool LoongArchDeadRegisterDefinitions::runOnMachineFunction(
           continue;
         LLVM_DEBUG(dbgs() << "    Dead def operand #" << I << " in:\n      ";
                    MI.print(dbgs()));
-        const TargetRegisterClass *RC = TII->getRegClass(Desc, I, TRI, MF);
+        const TargetRegisterClass *RC = TII->getRegClass(Desc, I, TRI);
         if (!(RC && RC->contains(LoongArch::R0))) {
           LLVM_DEBUG(dbgs() << "    Ignoring, register is not a GPR.\n");
           continue;
diff --git a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.h b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.h
index d1e4025..53381c2 100644
--- a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.h
+++ b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.h
@@ -33,8 +33,7 @@ struct LoongArchRegisterInfo : public LoongArchGenRegisterInfo {
   BitVector getReservedRegs(const MachineFunction &MF) const override;
 
   const TargetRegisterClass *
-  getPointerRegClass(const MachineFunction &MF,
-                     unsigned Kind = 0) const override {
+  getPointerRegClass(unsigned Kind = 0) const override {
     return &LoongArch::GPRRegClass;
   }
 
diff --git a/llvm/lib/Target/MSP430/MSP430RegisterInfo.cpp b/llvm/lib/Target/MSP430/MSP430RegisterInfo.cpp
index 44596a1..c1a1e8e 100644
--- a/llvm/lib/Target/MSP430/MSP430RegisterInfo.cpp
+++ b/llvm/lib/Target/MSP430/MSP430RegisterInfo.cpp
@@ -91,8 +91,7 @@ BitVector MSP430RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
 }
 
 const TargetRegisterClass *
-MSP430RegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind)
-                                                                         const {
+MSP430RegisterInfo::getPointerRegClass(unsigned Kind) const {
   return &MSP430::GR16RegClass;
 }
 
diff --git a/llvm/lib/Target/MSP430/MSP430RegisterInfo.h b/llvm/lib/Target/MSP430/MSP430RegisterInfo.h
index 51e07f4..fbca973 100644
--- a/llvm/lib/Target/MSP430/MSP430RegisterInfo.h
+++ b/llvm/lib/Target/MSP430/MSP430RegisterInfo.h
@@ -28,9 +28,8 @@ public:
   const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
 
   BitVector getReservedRegs(const MachineFunction &MF) const override;
-  const TargetRegisterClass*
-  getPointerRegClass(const MachineFunction &MF,
-                     unsigned Kind = 0) const override;
+  const TargetRegisterClass *
+  getPointerRegClass(unsigned Kind = 0) const override;
 
   bool eliminateFrameIndex(MachineBasicBlock::iterator II,
                            int SPAdj, unsigned FIOperandNum,
diff --git a/llvm/lib/Target/Mips/Mips16InstrInfo.cpp b/llvm/lib/Target/Mips/Mips16InstrInfo.cpp
index cafc11b..5d08f56 100644
--- a/llvm/lib/Target/Mips/Mips16InstrInfo.cpp
+++ b/llvm/lib/Target/Mips/Mips16InstrInfo.cpp
@@ -37,7 +37,7 @@ using namespace llvm;
 #define DEBUG_TYPE "mips16-instrinfo"
 
 Mips16InstrInfo::Mips16InstrInfo(const MipsSubtarget &STI)
-    : MipsInstrInfo(STI, Mips::Bimm16) {}
+    : MipsInstrInfo(STI, Mips::Bimm16), RI(STI) {}
 
 const MipsRegisterInfo &Mips16InstrInfo::getRegisterInfo() const {
   return RI;
diff --git a/llvm/lib/Target/Mips/Mips16RegisterInfo.cpp b/llvm/lib/Target/Mips/Mips16RegisterInfo.cpp
index d257f02..6609959 100644
--- a/llvm/lib/Target/Mips/Mips16RegisterInfo.cpp
+++ b/llvm/lib/Target/Mips/Mips16RegisterInfo.cpp
@@ -28,7 +28,8 @@ using namespace llvm;
 
 #define DEBUG_TYPE "mips16-registerinfo"
 
-Mips16RegisterInfo::Mips16RegisterInfo() = default;
+Mips16RegisterInfo::Mips16RegisterInfo(const MipsSubtarget &STI)
+    : MipsRegisterInfo(STI) {}
 
 bool Mips16RegisterInfo::requiresRegisterScavenging
   (const MachineFunction &MF) const {
diff --git a/llvm/lib/Target/Mips/Mips16RegisterInfo.h b/llvm/lib/Target/Mips/Mips16RegisterInfo.h
index ff115b3..29d08b4 100644
--- a/llvm/lib/Target/Mips/Mips16RegisterInfo.h
+++ b/llvm/lib/Target/Mips/Mips16RegisterInfo.h
@@ -16,10 +16,9 @@
 #include "MipsRegisterInfo.h"
 
 namespace llvm {
-
 class Mips16RegisterInfo : public MipsRegisterInfo {
 public:
-  Mips16RegisterInfo();
+  explicit Mips16RegisterInfo(const MipsSubtarget &STI);
 
   bool requiresRegisterScavenging(const MachineFunction &MF) const override;
 
diff --git a/llvm/lib/Target/Mips/MipsRegisterInfo.cpp b/llvm/lib/Target/Mips/MipsRegisterInfo.cpp
index 539288e..4d105bd 100644
--- a/llvm/lib/Target/Mips/MipsRegisterInfo.cpp
+++ b/llvm/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -37,27 +37,26 @@ using namespace llvm;
 #define GET_REGINFO_TARGET_DESC
 #include "MipsGenRegisterInfo.inc"
 
-MipsRegisterInfo::MipsRegisterInfo() : MipsGenRegisterInfo(Mips::RA) {
+MipsRegisterInfo::MipsRegisterInfo(const MipsSubtarget &STI)
+    : MipsGenRegisterInfo(Mips::RA), ArePtrs64bit(STI.getABI().ArePtrs64bit()) {
   MIPS_MC::initLLVMToCVRegMapping(this);
 }
 
 unsigned MipsRegisterInfo::getPICCallReg() { return Mips::T9; }
 
 const TargetRegisterClass *
-MipsRegisterInfo::getPointerRegClass(const MachineFunction &MF,
-                                     unsigned Kind) const {
-  MipsABIInfo ABI = MF.getSubtarget<MipsSubtarget>().getABI();
+MipsRegisterInfo::getPointerRegClass(unsigned Kind) const {
   MipsPtrClass PtrClassKind = static_cast<MipsPtrClass>(Kind);
 
   switch (PtrClassKind) {
   case MipsPtrClass::Default:
-    return ABI.ArePtrs64bit() ? &Mips::GPR64RegClass : &Mips::GPR32RegClass;
+    return ArePtrs64bit ? &Mips::GPR64RegClass : &Mips::GPR32RegClass;
   case MipsPtrClass::GPR16MM:
     return &Mips::GPRMM16RegClass;
   case MipsPtrClass::StackPointer:
-    return ABI.ArePtrs64bit() ? &Mips::SP64RegClass : &Mips::SP32RegClass;
+    return ArePtrs64bit ? &Mips::SP64RegClass : &Mips::SP32RegClass;
   case MipsPtrClass::GlobalPointer:
-    return ABI.ArePtrs64bit() ? &Mips::GP64RegClass : &Mips::GP32RegClass;
+    return ArePtrs64bit ? &Mips::GP64RegClass : &Mips::GP32RegClass;
   }
 
   llvm_unreachable("Unknown pointer kind");
diff --git a/llvm/lib/Target/Mips/MipsRegisterInfo.h b/llvm/lib/Target/Mips/MipsRegisterInfo.h
index b002f4c..dbdb050 100644
--- a/llvm/lib/Target/Mips/MipsRegisterInfo.h
+++ b/llvm/lib/Target/Mips/MipsRegisterInfo.h
@@ -25,6 +25,9 @@ namespace llvm {
 class TargetRegisterClass;
 
 class MipsRegisterInfo : public MipsGenRegisterInfo {
+private:
+  const bool ArePtrs64bit;
+
 public:
   enum class MipsPtrClass {
     /// The default register class for integer values.
@@ -38,14 +41,13 @@ public:
     GlobalPointer = 3,
   };
 
-  MipsRegisterInfo();
+  explicit MipsRegisterInfo(const MipsSubtarget &STI);
 
   /// Get PIC indirect call register
   static unsigned getPICCallReg();
 
   /// Code Generation virtual methods...
-  const TargetRegisterClass *getPointerRegClass(const MachineFunction &MF,
-                                                unsigned Kind) const override;
+  const TargetRegisterClass *getPointerRegClass(unsigned Kind) const override;
 
   unsigned getRegPressureLimit(const TargetRegisterClass *RC,
                                MachineFunction &MF) const override;
diff --git a/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp b/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp
index caa20f7..dbdbb17 100644
--- a/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp
+++ b/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp
@@ -28,7 +28,7 @@ static unsigned getUnconditionalBranch(const MipsSubtarget &STI) {
 }
 
 MipsSEInstrInfo::MipsSEInstrInfo(const MipsSubtarget &STI)
-    : MipsInstrInfo(STI, getUnconditionalBranch(STI)), RI() {}
+    : MipsInstrInfo(STI, getUnconditionalBranch(STI)), RI(STI) {}
 
 const MipsRegisterInfo &MipsSEInstrInfo::getRegisterInfo() const {
   return RI;
@@ -682,8 +682,8 @@ MipsSEInstrInfo::compareOpndSize(unsigned Opc,
   const MCInstrDesc &Desc = get(Opc);
   assert(Desc.NumOperands == 2 && "Unary instruction expected.");
   const MipsRegisterInfo *RI = &getRegisterInfo();
-  unsigned DstRegSize = RI->getRegSizeInBits(*getRegClass(Desc, 0, RI, MF));
-  unsigned SrcRegSize = RI->getRegSizeInBits(*getRegClass(Desc, 1, RI, MF));
+  unsigned DstRegSize = RI->getRegSizeInBits(*getRegClass(Desc, 0, RI));
+  unsigned SrcRegSize = RI->getRegSizeInBits(*getRegClass(Desc, 1, RI));
 
   return std::make_pair(DstRegSize > SrcRegSize, DstRegSize < SrcRegSize);
 }
diff --git a/llvm/lib/Target/Mips/MipsSERegisterInfo.cpp b/llvm/lib/Target/Mips/MipsSERegisterInfo.cpp
index feb2b3d..1326878 100644
--- a/llvm/lib/Target/Mips/MipsSERegisterInfo.cpp
+++ b/llvm/lib/Target/Mips/MipsSERegisterInfo.cpp
@@ -34,7 +34,8 @@ using namespace llvm;
 
 #define DEBUG_TYPE "mips-reg-info"
 
-MipsSERegisterInfo::MipsSERegisterInfo() = default;
+MipsSERegisterInfo::MipsSERegisterInfo(const MipsSubtarget &STI)
+    : MipsRegisterInfo(STI) {}
 
 bool MipsSERegisterInfo::
 requiresRegisterScavenging(const MachineFunction &MF) const {
diff --git a/llvm/lib/Target/Mips/MipsSERegisterInfo.h b/llvm/lib/Target/Mips/MipsSERegisterInfo.h
index cc8496e..93de2c7 100644
--- a/llvm/lib/Target/Mips/MipsSERegisterInfo.h
+++ b/llvm/lib/Target/Mips/MipsSERegisterInfo.h
@@ -20,7 +20,7 @@ namespace llvm {
 
 class MipsSERegisterInfo : public MipsRegisterInfo {
 public:
-  MipsSERegisterInfo();
+  explicit MipsSERegisterInfo(const MipsSubtarget &STI);
 
   bool requiresRegisterScavenging(const MachineFunction &MF) const override;
 
diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 415164f..89165fa 100644
--- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -401,7 +401,7 @@ namespace {
         // We need to make sure that this one operand does not end up in r0
         // (because we might end up lowering this as 0(%op)).
         const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
-        const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF, /*Kind=*/1);
+        const TargetRegisterClass *TRC = TRI->getPointerRegClass(/*Kind=*/1);
         SDLoc dl(Op);
         SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i32);
         SDValue NewOp =
diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
index f123040..85b4072 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -164,8 +164,7 @@ PPCRegisterInfo::PPCRegisterInfo(const PPCTargetMachine &TM)
 /// getPointerRegClass - Return the register class to use to hold pointers.
 /// This is used for addressing modes.
 const TargetRegisterClass *
-PPCRegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind)
-                                                                       const {
+PPCRegisterInfo::getPointerRegClass(unsigned Kind) const {
   // Note that PPCInstrInfo::foldImmediate also directly uses this Kind value
   // when it checks for ZERO folding.
   if (Kind == 1) {
@@ -2022,9 +2021,9 @@ Register PPCRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
   const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
   const MCInstrDesc &MCID = TII.get(ADDriOpc);
   MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
-  const TargetRegisterClass *RC = getPointerRegClass(MF);
+  const TargetRegisterClass *RC = getPointerRegClass();
   Register BaseReg = MRI.createVirtualRegister(RC);
-  MRI.constrainRegClass(BaseReg, TII.getRegClass(MCID, 0, this, MF));
+  MRI.constrainRegClass(BaseReg, TII.getRegClass(MCID, 0, this));
 
   BuildMI(*MBB, Ins, DL, MCID, BaseReg)
     .addFrameIndex(FrameIdx).addImm(Offset);
@@ -2052,8 +2051,7 @@ void PPCRegisterInfo::resolveFrameIndex(MachineInstr &MI, Register BaseReg,
   const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
   const MCInstrDesc &MCID = MI.getDesc();
   MachineRegisterInfo &MRI = MF.getRegInfo();
-  MRI.constrainRegClass(BaseReg,
-                        TII.getRegClass(MCID, FIOperandNum, this, MF));
+  MRI.constrainRegClass(BaseReg, TII.getRegClass(MCID, FIOperandNum, this));
 }
 
 bool PPCRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.h b/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
index 849f856b..5606902 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -79,7 +79,7 @@ public:
   /// getPointerRegClass - Return the register class to use to hold pointers.
   /// This is used for addressing modes.
   const TargetRegisterClass *
-  getPointerRegClass(const MachineFunction &MF, unsigned Kind=0) const override;
+  getPointerRegClass(unsigned Kind = 0) const override;
 
   const TargetRegisterClass *
   getCrossCopyRegClass(const TargetRegisterClass *RC) const override;
diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
index cd83928..2b5f18d 100644
--- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
+++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
@@ -946,6 +946,11 @@ public:
     return isUImmPred([](int64_t Imm) { return 4 == Imm; });
   }
 
+  bool isImm5Zibi() const {
+    return isUImmPred(
+        [](int64_t Imm) { return (Imm != 0 && isUInt<5>(Imm)) || Imm == -1; });
+  }
+
   bool isSImm5Plus1() const {
     return isSImmPred(
         [](int64_t Imm) { return Imm != INT64_MIN && isInt<5>(Imm - 1); });
@@ -1643,6 +1648,10 @@ bool RISCVAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
                                       "operand must be a valid system register "
                                       "name or an integer in the range");
   }
+  case Match_InvalidImm5Zibi:
+    return generateImmOutOfRangeError(
+        Operands, ErrorInfo, -1, (1 << 5) - 1,
+        "immediate must be non-zero in the range");
   case Match_InvalidVTypeI: {
     SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc();
     return generateVTypeError(ErrorLoc);
diff --git a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
index 61b86ab..fb5a35d 100644
--- a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
+++ b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
@@ -491,6 +491,14 @@ static DecodeStatus decodeUImmPlus1Operand(MCInst &Inst, uint32_t Imm,
   return MCDisassembler::Success;
 }
 
+static DecodeStatus decodeImmZibiOperand(MCInst &Inst, uint32_t Imm,
+                                         int64_t Address,
+                                         const MCDisassembler *Decoder) {
+  assert(isUInt<5>(Imm) && "Invalid immediate");
+  Inst.addOperand(MCOperand::createImm(Imm ? Imm : -1LL));
+  return MCDisassembler::Success;
+}
+
 template <unsigned N>
 static DecodeStatus decodeSImmOperand(MCInst &Inst, uint32_t Imm,
                                       int64_t Address,
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
index fcea23a..70b7c43 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
@@ -341,6 +341,7 @@ enum OperandType : unsigned {
   OPERAND_UIMM64,
   OPERAND_THREE,
   OPERAND_FOUR,
+  OPERAND_IMM5_ZIBI,
   OPERAND_SIMM5,
   OPERAND_SIMM5_NONZERO,
   OPERAND_SIMM5_PLUS1,
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
index 717fba6..6d587e6 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
@@ -97,6 +97,10 @@ public:
                              SmallVectorImpl<MCFixup> &Fixups,
                              const MCSubtargetInfo &STI) const;
 
+  uint64_t getImmOpValueZibi(const MCInst &MI, unsigned OpNo,
+                             SmallVectorImpl<MCFixup> &Fixups,
+                             const MCSubtargetInfo &STI) const;
+
   uint64_t getImmOpValue(const MCInst &MI, unsigned OpNo,
                          SmallVectorImpl<MCFixup> &Fixups,
                          const MCSubtargetInfo &STI) const;
@@ -559,6 +563,19 @@ RISCVMCCodeEmitter::getImmOpValueAsrN(const MCInst &MI, unsigned OpNo,
   return getImmOpValue(MI, OpNo, Fixups, STI);
 }
 
+uint64_t
+RISCVMCCodeEmitter::getImmOpValueZibi(const MCInst &MI, unsigned OpNo,
+                                      SmallVectorImpl<MCFixup> &Fixups,
+                                      const MCSubtargetInfo &STI) const {
+  const MCOperand &MO = MI.getOperand(OpNo);
+  assert(MO.isImm() && "Zibi operand must be an immediate");
+  int64_t Res = MO.getImm();
+  if (Res == -1)
+    return 0;
+
+  return Res;
+}
+
 uint64_t RISCVMCCodeEmitter::getImmOpValue(const MCInst &MI, unsigned OpNo,
                                            SmallVectorImpl<MCFixup> &Fixups,
                                            const MCSubtargetInfo &STI) const {
diff --git a/llvm/lib/Target/RISCV/RISCVDeadRegisterDefinitions.cpp b/llvm/lib/Target/RISCV/RISCVDeadRegisterDefinitions.cpp
index 1c7aa73..51180f5 100644
--- a/llvm/lib/Target/RISCV/RISCVDeadRegisterDefinitions.cpp
+++ b/llvm/lib/Target/RISCV/RISCVDeadRegisterDefinitions.cpp
@@ -89,7 +89,7 @@ bool RISCVDeadRegisterDefinitions::runOnMachineFunction(MachineFunction &MF) {
         LLVM_DEBUG(dbgs() << "    Dead def operand #" << I << " in:\n      ";
                    MI.print(dbgs()));
         Register X0Reg;
-        const TargetRegisterClass *RC = TII->getRegClass(Desc, I, TRI, MF);
+        const TargetRegisterClass *RC = TII->getRegClass(Desc, I, TRI);
         if (RC && RC->contains(RISCV::X0)) {
           X0Reg = RISCV::X0;
         } else if (RC && RC->contains(RISCV::X0_W)) {
diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td
index 063963d..95703e3 100644
--- a/llvm/lib/Target/RISCV/RISCVFeatures.td
+++ b/llvm/lib/Target/RISCV/RISCVFeatures.td
@@ -78,6 +78,12 @@ def FeatureStdExtE
     : RISCVExtension<2, 0, "Embedded Instruction Set with 16 GPRs">,
       RISCVExtensionBitmask<0, 4>;
 
+def FeatureStdExtZibi
+    : RISCVExperimentalExtension<0, 1, "Branch with Immediate">;
+def HasStdExtZibi : Predicate<"Subtarget->hasStdExtZibi()">,
+                    AssemblerPredicate<(all_of FeatureStdExtZibi),
+                                       "'Zibi' (Branch with Immediate)">;
+
 def FeatureStdExtZic64b
     : RISCVExtension<1, 0, "Cache Block Size Is 64 Bytes">;
 
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 4f13775..f9b484b 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -18839,6 +18839,8 @@ static SDValue tryFoldSelectIntoOp(SDNode *N, SelectionDAG &DAG,
   case ISD::ADD:
   case ISD::OR:
   case ISD::XOR:
+  case ISD::UMIN:
+  case ISD::UMAX:
     break;
   }
 
@@ -18948,7 +18950,7 @@ static SDValue useInversedSetcc(SDNode *N, SelectionDAG &DAG,
 
   // Replace (setcc eq (and x, C)) with (setcc ne (and x, C))) to generate
   // BEXTI, where C is power of 2.
-  if (Subtarget.hasStdExtZbs() && VT.isScalarInteger() &&
+  if (Subtarget.hasBEXTILike() && VT.isScalarInteger() &&
       (Subtarget.hasCZEROLike() || Subtarget.hasVendorXTHeadCondMov())) {
     SDValue LHS = Cond.getOperand(0);
     SDValue RHS = Cond.getOperand(1);
@@ -23258,6 +23260,10 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
     if (VA.isRegLoc()) {
       // Queue up the argument copies and emit them at the end.
       RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
+
+      const TargetOptions &Options = DAG.getTarget().Options;
+      if (Options.EmitCallSiteInfo)
+        CSInfo.ArgRegPairs.emplace_back(VA.getLocReg(), i);
     } else {
       assert(VA.isMemLoc() && "Argument not register or memory");
       assert(!IsTailCall && "Tail call not allowed if stack is used "
@@ -23359,9 +23365,7 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
     if (CLI.CFIType)
       Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
     DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
-    if (MF.getTarget().Options.EmitCallGraphSection && CB &&
-        CB->isIndirectCall())
-      DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
+    DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
     return Ret;
   }
 
@@ -23370,10 +23374,8 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
   if (CLI.CFIType)
     Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
 
-  if (MF.getTarget().Options.EmitCallGraphSection && CB && CB->isIndirectCall())
-    DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
-
   DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
+  DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
   Glue = Chain.getValue(1);
 
   // Mark the end of the call, which is glued to the call itself.
@@ -24842,8 +24844,7 @@ bool RISCVTargetLowering::isCtpopFast(EVT VT) const {
     return isTypeLegal(VT) && Subtarget.hasStdExtZvbb();
   if (VT.isFixedLengthVector() && Subtarget.hasStdExtZvbb())
     return true;
-  // FIXME: Should use hasCPOPLike here.
-  return Subtarget.hasStdExtZbb() &&
+  return Subtarget.hasCPOPLike() &&
          (VT == MVT::i32 || VT == MVT::i64 || VT.isFixedLengthVector());
 }
 
@@ -24937,8 +24938,8 @@ RISCVTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
 
 bool RISCVTargetLowering::shouldFoldSelectWithSingleBitTest(
     EVT VT, const APInt &AndMask) const {
-  if (Subtarget.hasCZEROLike())
-    return !Subtarget.hasStdExtZbs() && AndMask.ugt(1024);
+  if (Subtarget.hasCZEROLike() || Subtarget.hasVendorXTHeadCondMov())
+    return !Subtarget.hasBEXTILike() && AndMask.ugt(1024);
   return TargetLowering::shouldFoldSelectWithSingleBitTest(VT, AndMask);
 }
 
diff --git a/llvm/lib/Target/RISCV/RISCVInstrFormats.td b/llvm/lib/Target/RISCV/RISCVInstrFormats.td
index c2667b0..2afd77a 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrFormats.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrFormats.td
@@ -498,6 +498,22 @@ class RVInstB<bits<3> funct3, RISCVOpcode opcode, dag outs, dag ins,
   let Inst{6-0} = opcode.Value;
 }
 
+class RVInstBIMM<bits<3> funct3, RISCVOpcode opcode, dag outs, dag ins,
+                 string opcodestr, string argstr>
+    : RVInst<outs, ins, opcodestr, argstr, [], InstFormatB> {
+  bits<12> imm12;
+  bits<5> cimm;
+  bits<5> rs1;
+  let Inst{31} = imm12{11};
+  let Inst{30-25} = imm12{9-4};
+  let Inst{24-20} = cimm;
+  let Inst{19-15} = rs1;
+  let Inst{14-12} = funct3;
+  let Inst{11-8} = imm12{3-0};
+  let Inst{7} = imm12{10};
+  let Inst{6-0} = opcode.Value;
+}
+
 class RVInstU<RISCVOpcode opcode, dag outs, dag ins, string opcodestr,
               string argstr>
     : RVInst<outs, ins, opcodestr, argstr, [], InstFormatU> {
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index 92552b3..7cdfb1e 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -2329,6 +2329,7 @@ include "RISCVInstrInfoZimop.td"
 include "RISCVInstrInfoZicbo.td"
 include "RISCVInstrInfoZicond.td"
 include "RISCVInstrInfoZilsd.td"
+include "RISCVInstrInfoZibi.td"
 
 // Scalar FP
 include "RISCVInstrInfoF.td"
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZibi.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZibi.td
new file mode 100644
index 0000000..1570355
--- /dev/null
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZibi.td
@@ -0,0 +1,44 @@
+//===-- RISCVInstrInfoZibi.td - 'Zibi' instructions --------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// This file describes the RISC-V instructions for 'Zibi' (branch with imm).
+///
+//===----------------------------------------------------------------------===//
+
+// A 5-bit unsigned immediate representing 1-31 and -1. 00000 represents -1.
+def imm5_zibi : RISCVOp<XLenVT>, ImmLeaf<XLenVT, [{
+    return (Imm != 0 && isUInt<5>(Imm)) || Imm == -1;
+}]> {
+  let ParserMatchClass = ImmAsmOperand<"", 5, "Zibi">;
+  let EncoderMethod = "getImmOpValueZibi";
+  let DecoderMethod = "decodeImmZibiOperand";
+  let MCOperandPredicate = [{
+    int64_t Imm;
+    if (!MCOp.evaluateAsConstantImm(Imm))
+      return false;
+    return (Imm >= 1 && Imm <= 31) || Imm == -1;
+  }];
+  let OperandType = "OPERAND_IMM5_ZIBI";
+}
+
+class Branch_imm<bits<3> funct3, string opcodestr>
+    : RVInstBIMM<funct3, OPC_BRANCH, (outs),
+                 (ins GPR:$rs1, imm5_zibi:$cimm, bare_simm13_lsb0:$imm12),
+                 opcodestr, "$rs1, $cimm, $imm12">,
+      Sched<[WriteJmp, ReadJmp]> {
+  let isBranch = 1;
+  let isTerminator = 1;
+  let hasSideEffects = 0;
+  let mayLoad = 0;
+  let mayStore = 0;
+}
+
+let Predicates = [HasStdExtZibi] in {
+  def BEQI : Branch_imm<0b010, "beqi">;
+  def BNEI : Branch_imm<0b011, "bnei">;
+} // Predicates = [HasStdExtZibi]
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.h b/llvm/lib/Target/RISCV/RISCVRegisterInfo.h
index 2810139..67726db 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.h
@@ -123,8 +123,7 @@ struct RISCVRegisterInfo : public RISCVGenRegisterInfo {
   }
 
   const TargetRegisterClass *
-  getPointerRegClass(const MachineFunction &MF,
-                     unsigned Kind = 0) const override {
+  getPointerRegClass(unsigned Kind = 0) const override {
     return &RISCV::GPRRegClass;
   }
 
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index a1ec24f..f81b1e12 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -163,6 +163,9 @@ RISCVTargetMachine::RISCVTargetMachine(const Target &T, const Triple &TT,
   setMachineOutliner(true);
   setSupportsDefaultOutlining(true);
 
+  // RISC-V supports the debug entry values.
+  setSupportsDebugEntryValues(true);
+
   if (TT.isOSFuchsia() && !TT.isArch64Bit())
     report_fatal_error("Fuchsia is only supported for 64-bit");
 
@@ -395,6 +398,7 @@ public:
   void addPreRegAlloc() override;
   void addPostRegAlloc() override;
   void addFastRegAlloc() override;
+  bool addILPOpts() override;
 
   std::unique_ptr<CSEConfigBase> getCSEConfig() const override;
 };
@@ -580,9 +584,6 @@ void RISCVPassConfig::addMachineSSAOptimization() {
 
   TargetPassConfig::addMachineSSAOptimization();
 
-  if (EnableMachineCombiner)
-    addPass(&MachineCombinerID);
-
   if (TM->getTargetTriple().isRISCV64()) {
     addPass(createRISCVOptWInstrsPass());
   }
@@ -617,6 +618,13 @@ void RISCVPassConfig::addPostRegAlloc() {
     addPass(createRISCVRedundantCopyEliminationPass());
 }
 
+bool RISCVPassConfig::addILPOpts() {
+  if (EnableMachineCombiner)
+    addPass(&MachineCombinerID);
+
+  return true;
+}
+
 void RISCVTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
   PB.registerLateLoopOptimizationsEPCallback([=](LoopPassManager &LPM,
                                                  OptimizationLevel Level) {
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 1ca5132..a06faa4 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -289,9 +289,7 @@ bool RISCVTTIImpl::hasActiveVectorLength() const {
 TargetTransformInfo::PopcntSupportKind
 RISCVTTIImpl::getPopcntSupport(unsigned TyWidth) const {
   assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
-  return ST->hasStdExtZbb() || (ST->hasVendorXCVbitmanip() && !ST->is64Bit())
-             ? TTI::PSK_FastHardware
-             : TTI::PSK_Software;
+  return ST->hasCPOPLike() ? TTI::PSK_FastHardware : TTI::PSK_Software;
 }
 
 InstructionCost RISCVTTIImpl::getPartialReductionCost(
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index 6bd7d51..47e0a25 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -141,6 +141,8 @@ public:
     return false;
   }
 
+  bool shouldConsiderVectorizationRegPressure() const override { return true; }
+
   InstructionCost
   getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
                         unsigned AddressSpace,
diff --git a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
index 6265118..ffba284 100644
--- a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
@@ -455,8 +455,7 @@ bool RISCVVectorPeephole::convertSameMaskVMergeToVMv(MachineInstr &MI) {
     True->getOperand(1).setReg(MI.getOperand(2).getReg());
     // If True is masked then its passthru needs to be in VRNoV0.
     MRI->constrainRegClass(True->getOperand(1).getReg(),
-                           TII->getRegClass(True->getDesc(), 1, TRI,
-                                            *True->getParent()->getParent()));
+                           TII->getRegClass(True->getDesc(), 1, TRI));
   }
 
   MI.setDesc(TII->get(NewOpc));
@@ -674,10 +673,9 @@ bool RISCVVectorPeephole::foldVMV_V_V(MachineInstr &MI) {
     SrcPassthru.setReg(Passthru.getReg());
     // If Src is masked then its passthru needs to be in VRNoV0.
     if (Passthru.getReg() != RISCV::NoRegister)
-      MRI->constrainRegClass(Passthru.getReg(),
-                             TII->getRegClass(Src->getDesc(),
-                                              SrcPassthru.getOperandNo(), TRI,
-                                              *Src->getParent()->getParent()));
+      MRI->constrainRegClass(
+          Passthru.getReg(),
+          TII->getRegClass(Src->getDesc(), SrcPassthru.getOperandNo(), TRI));
   }
 
   if (RISCVII::hasVecPolicyOp(Src->getDesc().TSFlags)) {
diff --git a/llvm/lib/Target/Sparc/SparcInstrInfo.cpp b/llvm/lib/Target/Sparc/SparcInstrInfo.cpp
index cd0f649..e28f445 100644
--- a/llvm/lib/Target/Sparc/SparcInstrInfo.cpp
+++ b/llvm/lib/Target/Sparc/SparcInstrInfo.cpp
@@ -38,7 +38,7 @@ static cl::opt<unsigned>
 void SparcInstrInfo::anchor() {}
 
 SparcInstrInfo::SparcInstrInfo(const SparcSubtarget &ST)
-    : SparcGenInstrInfo(ST, SP::ADJCALLSTACKDOWN, SP::ADJCALLSTACKUP), RI(),
+    : SparcGenInstrInfo(ST, SP::ADJCALLSTACKDOWN, SP::ADJCALLSTACKUP), RI(ST),
       Subtarget(ST) {}
 
 /// isLoadFromStackSlot - If the specified machine instruction is a direct
diff --git a/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp b/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp
index e4db27a..0a14746 100644
--- a/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp
+++ b/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp
@@ -31,7 +31,8 @@ static cl::opt<bool>
 ReserveAppRegisters("sparc-reserve-app-registers", cl::Hidden, cl::init(false),
                     cl::desc("Reserve application registers (%g2-%g4)"));
 
-SparcRegisterInfo::SparcRegisterInfo() : SparcGenRegisterInfo(SP::O7) {}
+SparcRegisterInfo::SparcRegisterInfo(const SparcSubtarget &STI)
+    : SparcGenRegisterInfo(SP::O7), Is64Bit(STI.is64Bit()) {}
 
 const MCPhysReg*
 SparcRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
@@ -111,11 +112,10 @@ bool SparcRegisterInfo::isReservedReg(const MachineFunction &MF,
   return getReservedRegs(MF)[Reg];
 }
 
-const TargetRegisterClass*
-SparcRegisterInfo::getPointerRegClass(const MachineFunction &MF,
-                                      unsigned Kind) const {
-  const SparcSubtarget &Subtarget = MF.getSubtarget<SparcSubtarget>();
-  return Subtarget.is64Bit() ? &SP::I64RegsRegClass : &SP::IntRegsRegClass;
+const TargetRegisterClass *
+SparcRegisterInfo::getPointerRegClass(unsigned Kind) const {
+  assert(Kind == 0 && "this should only be used for default cases");
+  return Is64Bit ? &SP::I64RegsRegClass : &SP::IntRegsRegClass;
 }
 
 static void replaceFI(MachineFunction &MF, MachineBasicBlock::iterator II,
diff --git a/llvm/lib/Target/Sparc/SparcRegisterInfo.h b/llvm/lib/Target/Sparc/SparcRegisterInfo.h
index eae859c..abd8bae 100644
--- a/llvm/lib/Target/Sparc/SparcRegisterInfo.h
+++ b/llvm/lib/Target/Sparc/SparcRegisterInfo.h
@@ -19,8 +19,14 @@
 #include "SparcGenRegisterInfo.inc"
 
 namespace llvm {
+class SparcSubtarget;
+
 struct SparcRegisterInfo : public SparcGenRegisterInfo {
-  SparcRegisterInfo();
+private:
+  const bool Is64Bit;
+
+public:
+  explicit SparcRegisterInfo(const SparcSubtarget &STI);
 
   /// Code Generation virtual methods...
   const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
@@ -32,8 +38,7 @@ struct SparcRegisterInfo : public SparcGenRegisterInfo {
   BitVector getReservedRegs(const MachineFunction &MF) const override;
   bool isReservedReg(const MachineFunction &MF, MCRegister Reg) const;
 
-  const TargetRegisterClass *getPointerRegClass(const MachineFunction &MF,
-                                                unsigned Kind) const override;
+  const TargetRegisterClass *getPointerRegClass(unsigned Kind) const override;
 
   bool eliminateFrameIndex(MachineBasicBlock::iterator II,
                            int SPAdj, unsigned FIOperandNum,
diff --git a/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp b/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp
index 34888f4..34d58e0 100644
--- a/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp
@@ -115,12 +115,11 @@ SystemZHazardRecognizer::fitsIntoCurrentGroup(SUnit *SU) const {
 }
 
 bool SystemZHazardRecognizer::has4RegOps(const MachineInstr *MI) const {
-  const MachineFunction &MF = *MI->getParent()->getParent();
   const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
   const MCInstrDesc &MID = MI->getDesc();
   unsigned Count = 0;
   for (unsigned OpIdx = 0; OpIdx < MID.getNumOperands(); OpIdx++) {
-    const TargetRegisterClass *RC = TII->getRegClass(MID, OpIdx, TRI, MF);
+    const TargetRegisterClass *RC = TII->getRegClass(MID, OpIdx, TRI);
     if (RC == nullptr)
       continue;
     if (OpIdx >= MID.getNumDefs() &&
diff --git a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
index 6f146b6..a05fdc7 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -1851,7 +1851,7 @@ bool SystemZDAGToDAGISel::SelectInlineAsmMemoryOperand(
 
   if (selectBDXAddr(Form, DispRange, Op, Base, Disp, Index)) {
     const TargetRegisterClass *TRC =
-      Subtarget->getRegisterInfo()->getPointerRegClass(*MF);
+        Subtarget->getRegisterInfo()->getPointerRegClass();
     SDLoc DL(Base);
     SDValue RC = CurDAG->getTargetConstant(TRC->getID(), DL, MVT::i32);
 
diff --git a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h
index 460be43..b1de145 100644
--- a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h
+++ b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h
@@ -135,8 +135,7 @@ public:
   /// This is currently only used by LOAD_STACK_GUARD, which requires a non-%r0
   /// register, hence ADDR64.
   const TargetRegisterClass *
-  getPointerRegClass(const MachineFunction &MF,
-                     unsigned Kind=0) const override {
+  getPointerRegClass(unsigned Kind = 0) const override {
     return &SystemZ::ADDR64BitRegClass;
   }
 
diff --git a/llvm/lib/Target/VE/VERegisterInfo.cpp b/llvm/lib/Target/VE/VERegisterInfo.cpp
index f381b7d..99e1f61 100644
--- a/llvm/lib/Target/VE/VERegisterInfo.cpp
+++ b/llvm/lib/Target/VE/VERegisterInfo.cpp
@@ -93,8 +93,7 @@ BitVector VERegisterInfo::getReservedRegs(const MachineFunction &MF) const {
 }
 
 const TargetRegisterClass *
-VERegisterInfo::getPointerRegClass(const MachineFunction &MF,
-                                   unsigned Kind) const {
+VERegisterInfo::getPointerRegClass(unsigned Kind) const {
   return &VE::I64RegClass;
 }
 
diff --git a/llvm/lib/Target/VE/VERegisterInfo.h b/llvm/lib/Target/VE/VERegisterInfo.h
index 3f6feed..999dc85 100644
--- a/llvm/lib/Target/VE/VERegisterInfo.h
+++ b/llvm/lib/Target/VE/VERegisterInfo.h
@@ -31,8 +31,7 @@ public:
 
   BitVector getReservedRegs(const MachineFunction &MF) const override;
 
-  const TargetRegisterClass *getPointerRegClass(const MachineFunction &MF,
-                                                unsigned Kind) const override;
+  const TargetRegisterClass *getPointerRegClass(unsigned Kind) const override;
 
   bool eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
                            unsigned FIOperandNum,
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
index 2f36e26..27f7e1a 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
@@ -278,7 +278,7 @@ void WebAssemblyFrameLowering::emitPrologue(MachineFunction &MF,
   DebugLoc DL;
 
   const TargetRegisterClass *PtrRC =
-      MRI.getTargetRegisterInfo()->getPointerRegClass(MF);
+      MRI.getTargetRegisterInfo()->getPointerRegClass();
   unsigned SPReg = getSPReg(MF);
   if (StackSize)
     SPReg = MRI.createVirtualRegister(PtrRC);
@@ -349,7 +349,7 @@ void WebAssemblyFrameLowering::emitEpilogue(MachineFunction &MF,
     SPReg = FI->getBasePointerVreg();
   } else if (StackSize) {
     const TargetRegisterClass *PtrRC =
-        MRI.getTargetRegisterInfo()->getPointerRegClass(MF);
+        MRI.getTargetRegisterInfo()->getPointerRegClass();
     Register OffsetReg = MRI.createVirtualRegister(PtrRC);
     BuildMI(MBB, InsertPt, DL, TII->get(getOpcConst(MF)), OffsetReg)
         .addImm(StackSize);
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISD.def b/llvm/lib/Target/WebAssembly/WebAssemblyISD.def
index 1eae358..23108e4 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISD.def
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISD.def
@@ -28,6 +28,7 @@ HANDLE_NODETYPE(BR_IF)
 HANDLE_NODETYPE(BR_TABLE)
 HANDLE_NODETYPE(DOT)
 HANDLE_NODETYPE(EXT_ADD_PAIRWISE_U)
+HANDLE_NODETYPE(EXT_ADD_PAIRWISE_S)
 HANDLE_NODETYPE(SHUFFLE)
 HANDLE_NODETYPE(SWIZZLE)
 HANDLE_NODETYPE(VEC_SHL)
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index fe100da..aea27ba 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -422,24 +422,30 @@ bool WebAssemblyTargetLowering::shouldExpandPartialReductionIntrinsic(
     return true;
 
   EVT VT = EVT::getEVT(I->getType());
+  if (VT.getSizeInBits() > 128)
+    return true;
+
   auto Op1 = I->getOperand(1);
 
   if (auto *InputInst = dyn_cast<Instruction>(Op1)) {
-    if (InstructionOpcodeToISD(InputInst->getOpcode()) != ISD::MUL)
-      return true;
-
-    if (isa<Instruction>(InputInst->getOperand(0)) &&
-        isa<Instruction>(InputInst->getOperand(1))) {
-      // dot only supports signed inputs but also support lowering unsigned.
-      if (cast<Instruction>(InputInst->getOperand(0))->getOpcode() !=
-          cast<Instruction>(InputInst->getOperand(1))->getOpcode())
-        return true;
-
-      EVT Op1VT = EVT::getEVT(Op1->getType());
-      if (Op1VT.getVectorElementType() == VT.getVectorElementType() &&
-          ((VT.getVectorElementCount() * 2 == Op1VT.getVectorElementCount()) ||
-           (VT.getVectorElementCount() * 4 == Op1VT.getVectorElementCount())))
-        return false;
+    unsigned Opcode = InstructionOpcodeToISD(InputInst->getOpcode());
+    if (Opcode == ISD::MUL) {
+      if (isa<Instruction>(InputInst->getOperand(0)) &&
+          isa<Instruction>(InputInst->getOperand(1))) {
+        // dot only supports signed inputs but also support lowering unsigned.
+        if (cast<Instruction>(InputInst->getOperand(0))->getOpcode() !=
+            cast<Instruction>(InputInst->getOperand(1))->getOpcode())
+          return true;
+
+        EVT Op1VT = EVT::getEVT(Op1->getType());
+        if (Op1VT.getVectorElementType() == VT.getVectorElementType() &&
+            ((VT.getVectorElementCount() * 2 ==
+              Op1VT.getVectorElementCount()) ||
+             (VT.getVectorElementCount() * 4 == Op1VT.getVectorElementCount())))
+          return false;
+      }
+    } else if (ISD::isExtOpcode(Opcode)) {
+      return false;
     }
   }
   return true;
@@ -2117,77 +2123,93 @@ SDValue performLowerPartialReduction(SDNode *N, SelectionDAG &DAG) {
 
   assert(N->getValueType(0) == MVT::v4i32 && "can only support v4i32");
   SDLoc DL(N);
-  SDValue Mul = N->getOperand(2);
-  assert(Mul->getOpcode() == ISD::MUL && "expected mul input");
-
-  SDValue ExtendLHS = Mul->getOperand(0);
-  SDValue ExtendRHS = Mul->getOperand(1);
-  assert((ISD::isExtOpcode(ExtendLHS.getOpcode()) &&
-          ISD::isExtOpcode(ExtendRHS.getOpcode())) &&
-         "expected widening mul");
-  assert(ExtendLHS.getOpcode() == ExtendRHS.getOpcode() &&
-         "expected mul to use the same extend for both operands");
-
-  SDValue ExtendInLHS = ExtendLHS->getOperand(0);
-  SDValue ExtendInRHS = ExtendRHS->getOperand(0);
-  bool IsSigned = ExtendLHS->getOpcode() == ISD::SIGN_EXTEND;
-
-  if (ExtendInLHS->getValueType(0) == MVT::v8i16) {
-    if (IsSigned) {
-      // i32x4.dot_i16x8_s
-      SDValue Dot = DAG.getNode(WebAssemblyISD::DOT, DL, MVT::v4i32,
-                                ExtendInLHS, ExtendInRHS);
-      return DAG.getNode(ISD::ADD, DL, MVT::v4i32, N->getOperand(1), Dot);
-    }
 
-    unsigned LowOpc = WebAssemblyISD::EXTEND_LOW_U;
-    unsigned HighOpc = WebAssemblyISD::EXTEND_HIGH_U;
+  SDValue Input = N->getOperand(2);
+  if (Input->getOpcode() == ISD::MUL) {
+    SDValue ExtendLHS = Input->getOperand(0);
+    SDValue ExtendRHS = Input->getOperand(1);
+    assert((ISD::isExtOpcode(ExtendLHS.getOpcode()) &&
+            ISD::isExtOpcode(ExtendRHS.getOpcode())) &&
+           "expected widening mul or add");
+    assert(ExtendLHS.getOpcode() == ExtendRHS.getOpcode() &&
+           "expected binop to use the same extend for both operands");
+
+    SDValue ExtendInLHS = ExtendLHS->getOperand(0);
+    SDValue ExtendInRHS = ExtendRHS->getOperand(0);
+    bool IsSigned = ExtendLHS->getOpcode() == ISD::SIGN_EXTEND;
+    unsigned LowOpc =
+        IsSigned ? WebAssemblyISD::EXTEND_LOW_S : WebAssemblyISD::EXTEND_LOW_U;
+    unsigned HighOpc = IsSigned ? WebAssemblyISD::EXTEND_HIGH_S
+                                : WebAssemblyISD::EXTEND_HIGH_U;
+    SDValue LowLHS;
+    SDValue LowRHS;
+    SDValue HighLHS;
+    SDValue HighRHS;
+
+    auto AssignInputs = [&](MVT VT) {
+      LowLHS = DAG.getNode(LowOpc, DL, VT, ExtendInLHS);
+      LowRHS = DAG.getNode(LowOpc, DL, VT, ExtendInRHS);
+      HighLHS = DAG.getNode(HighOpc, DL, VT, ExtendInLHS);
+      HighRHS = DAG.getNode(HighOpc, DL, VT, ExtendInRHS);
+    };
 
-    // (add (add (extmul_low_sx lhs, rhs), (extmul_high_sx lhs, rhs)))
-    SDValue LowLHS = DAG.getNode(LowOpc, DL, MVT::v4i32, ExtendInLHS);
-    SDValue LowRHS = DAG.getNode(LowOpc, DL, MVT::v4i32, ExtendInRHS);
-    SDValue HighLHS = DAG.getNode(HighOpc, DL, MVT::v4i32, ExtendInLHS);
-    SDValue HighRHS = DAG.getNode(HighOpc, DL, MVT::v4i32, ExtendInRHS);
+    if (ExtendInLHS->getValueType(0) == MVT::v8i16) {
+      if (IsSigned) {
+        // i32x4.dot_i16x8_s
+        SDValue Dot = DAG.getNode(WebAssemblyISD::DOT, DL, MVT::v4i32,
+                                  ExtendInLHS, ExtendInRHS);
+        return DAG.getNode(ISD::ADD, DL, MVT::v4i32, N->getOperand(1), Dot);
+      }
 
-    SDValue MulLow = DAG.getNode(ISD::MUL, DL, MVT::v4i32, LowLHS, LowRHS);
-    SDValue MulHigh = DAG.getNode(ISD::MUL, DL, MVT::v4i32, HighLHS, HighRHS);
-    SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::v4i32, MulLow, MulHigh);
-    return DAG.getNode(ISD::ADD, DL, MVT::v4i32, N->getOperand(1), Add);
+      // (add (add (extmul_low_sx lhs, rhs), (extmul_high_sx lhs, rhs)))
+      MVT VT = MVT::v4i32;
+      AssignInputs(VT);
+      SDValue MulLow = DAG.getNode(ISD::MUL, DL, VT, LowLHS, LowRHS);
+      SDValue MulHigh = DAG.getNode(ISD::MUL, DL, VT, HighLHS, HighRHS);
+      SDValue Add = DAG.getNode(ISD::ADD, DL, VT, MulLow, MulHigh);
+      return DAG.getNode(ISD::ADD, DL, VT, N->getOperand(1), Add);
+    } else {
+      assert(ExtendInLHS->getValueType(0) == MVT::v16i8 &&
+             "expected v16i8 input types");
+      AssignInputs(MVT::v8i16);
+      // Lower to a wider tree, using twice the operations compared to above.
+      if (IsSigned) {
+        // Use two dots
+        SDValue DotLHS =
+            DAG.getNode(WebAssemblyISD::DOT, DL, MVT::v4i32, LowLHS, LowRHS);
+        SDValue DotRHS =
+            DAG.getNode(WebAssemblyISD::DOT, DL, MVT::v4i32, HighLHS, HighRHS);
+        SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::v4i32, DotLHS, DotRHS);
+        return DAG.getNode(ISD::ADD, DL, MVT::v4i32, N->getOperand(1), Add);
+      }
+
+      SDValue MulLow = DAG.getNode(ISD::MUL, DL, MVT::v8i16, LowLHS, LowRHS);
+      SDValue MulHigh = DAG.getNode(ISD::MUL, DL, MVT::v8i16, HighLHS, HighRHS);
+
+      SDValue AddLow = DAG.getNode(WebAssemblyISD::EXT_ADD_PAIRWISE_U, DL,
+                                   MVT::v4i32, MulLow);
+      SDValue AddHigh = DAG.getNode(WebAssemblyISD::EXT_ADD_PAIRWISE_U, DL,
+                                    MVT::v4i32, MulHigh);
+      SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::v4i32, AddLow, AddHigh);
+      return DAG.getNode(ISD::ADD, DL, MVT::v4i32, N->getOperand(1), Add);
+    }
   } else {
-    assert(ExtendInLHS->getValueType(0) == MVT::v16i8 &&
-           "expected v16i8 input types");
-    // Lower to a wider tree, using twice the operations compared to above.
-    if (IsSigned) {
-      // Use two dots
-      unsigned LowOpc = WebAssemblyISD::EXTEND_LOW_S;
-      unsigned HighOpc = WebAssemblyISD::EXTEND_HIGH_S;
-      SDValue LowLHS = DAG.getNode(LowOpc, DL, MVT::v8i16, ExtendInLHS);
-      SDValue LowRHS = DAG.getNode(LowOpc, DL, MVT::v8i16, ExtendInRHS);
-      SDValue HighLHS = DAG.getNode(HighOpc, DL, MVT::v8i16, ExtendInLHS);
-      SDValue HighRHS = DAG.getNode(HighOpc, DL, MVT::v8i16, ExtendInRHS);
-      SDValue DotLHS =
-          DAG.getNode(WebAssemblyISD::DOT, DL, MVT::v4i32, LowLHS, LowRHS);
-      SDValue DotRHS =
-          DAG.getNode(WebAssemblyISD::DOT, DL, MVT::v4i32, HighLHS, HighRHS);
-      SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::v4i32, DotLHS, DotRHS);
+    // Accumulate the input using extadd_pairwise.
+    assert(ISD::isExtOpcode(Input.getOpcode()) && "expected extend");
+    bool IsSigned = Input->getOpcode() == ISD::SIGN_EXTEND;
+    unsigned PairwiseOpc = IsSigned ? WebAssemblyISD::EXT_ADD_PAIRWISE_S
+                                    : WebAssemblyISD::EXT_ADD_PAIRWISE_U;
+    SDValue ExtendIn = Input->getOperand(0);
+    if (ExtendIn->getValueType(0) == MVT::v8i16) {
+      SDValue Add = DAG.getNode(PairwiseOpc, DL, MVT::v4i32, ExtendIn);
       return DAG.getNode(ISD::ADD, DL, MVT::v4i32, N->getOperand(1), Add);
     }
 
-    unsigned LowOpc = WebAssemblyISD::EXTEND_LOW_U;
-    unsigned HighOpc = WebAssemblyISD::EXTEND_HIGH_U;
-    SDValue LowLHS = DAG.getNode(LowOpc, DL, MVT::v8i16, ExtendInLHS);
-    SDValue LowRHS = DAG.getNode(LowOpc, DL, MVT::v8i16, ExtendInRHS);
-    SDValue HighLHS = DAG.getNode(HighOpc, DL, MVT::v8i16, ExtendInLHS);
-    SDValue HighRHS = DAG.getNode(HighOpc, DL, MVT::v8i16, ExtendInRHS);
-
-    SDValue MulLow = DAG.getNode(ISD::MUL, DL, MVT::v8i16, LowLHS, LowRHS);
-    SDValue MulHigh = DAG.getNode(ISD::MUL, DL, MVT::v8i16, HighLHS, HighRHS);
-
-    SDValue AddLow =
-        DAG.getNode(WebAssemblyISD::EXT_ADD_PAIRWISE_U, DL, MVT::v4i32, MulLow);
-    SDValue AddHigh = DAG.getNode(WebAssemblyISD::EXT_ADD_PAIRWISE_U, DL,
-                                  MVT::v4i32, MulHigh);
-    SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::v4i32, AddLow, AddHigh);
+    assert(ExtendIn->getValueType(0) == MVT::v16i8 &&
+           "expected v16i8 input types");
+    SDValue Add =
+        DAG.getNode(PairwiseOpc, DL, MVT::v4i32,
+                    DAG.getNode(PairwiseOpc, DL, MVT::v8i16, ExtendIn));
     return DAG.getNode(ISD::ADD, DL, MVT::v4i32, N->getOperand(1), Add);
   }
 }
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
index 3c26b45..d8948ad 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -1454,12 +1454,13 @@ def : Pat<(t1.vt (bitconvert (t2.vt V128:$v))), (t1.vt V128:$v)>;
 
 // Extended pairwise addition
 def extadd_pairwise_u : SDNode<"WebAssemblyISD::EXT_ADD_PAIRWISE_U", extend_t>;
+def extadd_pairwise_s : SDNode<"WebAssemblyISD::EXT_ADD_PAIRWISE_S", extend_t>;
 
-defm "" : SIMDConvert<I16x8, I8x16, int_wasm_extadd_pairwise_signed,
+defm "" : SIMDConvert<I16x8, I8x16, extadd_pairwise_s,
                       "extadd_pairwise_i8x16_s", 0x7c>;
 defm "" : SIMDConvert<I16x8, I8x16, extadd_pairwise_u,
                       "extadd_pairwise_i8x16_u", 0x7d>;
-defm "" : SIMDConvert<I32x4, I16x8, int_wasm_extadd_pairwise_signed,
+defm "" : SIMDConvert<I32x4, I16x8, extadd_pairwise_s,
                       "extadd_pairwise_i16x8_s", 0x7e>;
 defm "" : SIMDConvert<I32x4, I16x8, extadd_pairwise_u,
                       "extadd_pairwise_i16x8_u", 0x7f>;
@@ -1468,6 +1469,10 @@ def : Pat<(v4i32 (int_wasm_extadd_pairwise_unsigned (v8i16 V128:$in))),
           (extadd_pairwise_u_I32x4 V128:$in)>;
 def : Pat<(v8i16 (int_wasm_extadd_pairwise_unsigned (v16i8 V128:$in))),
           (extadd_pairwise_u_I16x8 V128:$in)>;
+def : Pat<(v4i32 (int_wasm_extadd_pairwise_signed (v8i16 V128:$in))),
+          (extadd_pairwise_s_I32x4 V128:$in)>;
+def : Pat<(v8i16 (int_wasm_extadd_pairwise_signed (v16i8 V128:$in))),
+          (extadd_pairwise_s_I16x8 V128:$in)>;
 
 // f64x2 <-> f32x4 conversions
 def demote_t : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>;
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
index 18886ba..ebb5f55 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
@@ -117,7 +117,7 @@ bool WebAssemblyRegisterInfo::eliminateFrameIndex(
   if (FrameOffset) {
     // Create i32/64.add SP, offset and make it the operand.
     const TargetRegisterClass *PtrRC =
-        MRI.getTargetRegisterInfo()->getPointerRegClass(MF);
+        MRI.getTargetRegisterInfo()->getPointerRegClass();
     Register OffsetOp = MRI.createVirtualRegister(PtrRC);
     BuildMI(MBB, *II, II->getDebugLoc(),
             TII->get(WebAssemblyFrameLowering::getOpcConst(MF)),
@@ -149,10 +149,8 @@ WebAssemblyRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
 }
 
 const TargetRegisterClass *
-WebAssemblyRegisterInfo::getPointerRegClass(const MachineFunction &MF,
-                                            unsigned Kind) const {
+WebAssemblyRegisterInfo::getPointerRegClass(unsigned Kind) const {
   assert(Kind == 0 && "Only one kind of pointer on WebAssembly");
-  if (MF.getSubtarget<WebAssemblySubtarget>().hasAddr64())
-    return &WebAssembly::I64RegClass;
-  return &WebAssembly::I32RegClass;
+  return TT.getArch() == Triple::wasm64 ? &WebAssembly::I64RegClass
+                                        : &WebAssembly::I32RegClass;
 }
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.h b/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.h
index d875e4b..3a73ff6 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.h
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.h
@@ -42,8 +42,7 @@ public:
   Register getFrameRegister(const MachineFunction &MF) const override;
 
   const TargetRegisterClass *
-  getPointerRegClass(const MachineFunction &MF,
-                     unsigned Kind = 0) const override;
+  getPointerRegClass(unsigned Kind = 0) const override;
   // This does not apply to wasm.
   const uint32_t *getNoPreservedMask() const override { return nullptr; }
 };
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
index 0eefd3e..92a9812 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
@@ -316,7 +316,13 @@ InstructionCost WebAssemblyTTIImpl::getPartialReductionCost(
   if (CostKind != TTI::TCK_RecipThroughput)
     return Invalid;
 
-  InstructionCost Cost(TTI::TCC_Basic);
+  if (Opcode != Instruction::Add)
+    return Invalid;
+
+  EVT AccumEVT = EVT::getEVT(AccumType);
+  // TODO: Add i64 accumulator.
+  if (AccumEVT != MVT::i32)
+    return Invalid;
 
   // Possible options:
   // - i16x8.extadd_pairwise_i8x16_sx
@@ -324,23 +330,26 @@ InstructionCost WebAssemblyTTIImpl::getPartialReductionCost(
   // - i32x4.dot_i16x8_s
   // Only try to support dot, for now.
 
-  if (Opcode != Instruction::Add)
+  EVT InputEVT = EVT::getEVT(InputTypeA);
+  if (!((InputEVT == MVT::i16 && VF.getFixedValue() == 8) ||
+        (InputEVT == MVT::i8 && VF.getFixedValue() == 16))) {
     return Invalid;
+  }
 
-  if (!BinOp || *BinOp != Instruction::Mul)
+  if (OpAExtend == TTI::PR_None)
     return Invalid;
 
-  if (InputTypeA != InputTypeB)
-    return Invalid;
+  InstructionCost Cost(TTI::TCC_Basic);
+  if (!BinOp)
+    return Cost;
 
   if (OpAExtend != OpBExtend)
     return Invalid;
 
-  EVT InputEVT = EVT::getEVT(InputTypeA);
-  EVT AccumEVT = EVT::getEVT(AccumType);
+  if (*BinOp != Instruction::Mul)
+    return Invalid;
 
-  // TODO: Add i64 accumulator.
-  if (AccumEVT != MVT::i32)
+  if (InputTypeA != InputTypeB)
     return Invalid;
 
   // Signed inputs can lower to dot
diff --git a/llvm/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp b/llvm/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp
index 3ac7c82..d2e3527 100644
--- a/llvm/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp
+++ b/llvm/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp
@@ -388,7 +388,7 @@ void X86AvoidSFBPass::buildCopy(MachineInstr *LoadInst, unsigned NLoadOpcode,
   MachineMemOperand *SMMO = *StoreInst->memoperands_begin();
 
   Register Reg1 = MRI->createVirtualRegister(
-      TII->getRegClass(TII->get(NLoadOpcode), 0, TRI, *(MBB->getParent())));
+      TII->getRegClass(TII->get(NLoadOpcode), 0, TRI));
   MachineInstr *NewLoad =
       BuildMI(*MBB, LoadInst, LoadInst->getDebugLoc(), TII->get(NLoadOpcode),
               Reg1)
@@ -553,8 +553,7 @@ void X86AvoidSFBPass::findPotentiallylBlockedCopies(MachineFunction &MF) {
 }
 
 unsigned X86AvoidSFBPass::getRegSizeInBytes(MachineInstr *LoadInst) {
-  const auto *TRC = TII->getRegClass(TII->get(LoadInst->getOpcode()), 0, TRI,
-                              *LoadInst->getParent()->getParent());
+  const auto *TRC = TII->getRegClass(TII->get(LoadInst->getOpcode()), 0, TRI);
   return TRI->getRegSizeInBits(*TRC) / 8;
 }
 
diff --git a/llvm/lib/Target/X86/X86DomainReassignment.cpp b/llvm/lib/Target/X86/X86DomainReassignment.cpp
index 93e55ca..339e2f3 100644
--- a/llvm/lib/Target/X86/X86DomainReassignment.cpp
+++ b/llvm/lib/Target/X86/X86DomainReassignment.cpp
@@ -175,8 +175,7 @@ public:
     const DebugLoc &DL = MI->getDebugLoc();
 
     Register Reg = MRI->createVirtualRegister(
-        TII->getRegClass(TII->get(DstOpcode), 0, MRI->getTargetRegisterInfo(),
-                         *MBB->getParent()));
+        TII->getRegClass(TII->get(DstOpcode), 0, MRI->getTargetRegisterInfo()));
     MachineInstrBuilder Bld = BuildMI(*MBB, MI, DL, TII->get(DstOpcode), Reg);
     for (const MachineOperand &MO : llvm::drop_begin(MI->operands()))
       Bld.add(MO);
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index f109e29..58d5262 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -94,9 +94,8 @@ X86InstrInfo::X86InstrInfo(const X86Subtarget &STI)
 
 const TargetRegisterClass *
 X86InstrInfo::getRegClass(const MCInstrDesc &MCID, unsigned OpNum,
-                          const TargetRegisterInfo *TRI,
-                          const MachineFunction &MF) const {
-  auto *RC = TargetInstrInfo::getRegClass(MCID, OpNum, TRI, MF);
+                          const TargetRegisterInfo *TRI) const {
+  auto *RC = TargetInstrInfo::getRegClass(MCID, OpNum, TRI);
   // If the target does not have egpr, then r16-r31 will be resereved for all
   // instructions.
   if (!RC || !Subtarget.hasEGPR())
@@ -7249,8 +7248,8 @@ static void updateOperandRegConstraints(MachineFunction &MF,
     if (!Reg.isVirtual())
       continue;
 
-    auto *NewRC = MRI.constrainRegClass(
-        Reg, TII.getRegClass(NewMI.getDesc(), Idx, &TRI, MF));
+    auto *NewRC =
+        MRI.constrainRegClass(Reg, TII.getRegClass(NewMI.getDesc(), Idx, &TRI));
     if (!NewRC) {
       LLVM_DEBUG(
           dbgs() << "WARNING: Unable to update register constraint for operand "
@@ -7348,7 +7347,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandCustom(
       unsigned SrcIdx = (Imm >> 6) & 3;
 
       const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
-      const TargetRegisterClass *RC = getRegClass(MI.getDesc(), OpNum, &RI, MF);
+      const TargetRegisterClass *RC = getRegClass(MI.getDesc(), OpNum, &RI);
       unsigned RCSize = TRI.getRegSizeInBits(*RC) / 8;
       if ((Size == 0 || Size >= 16) && RCSize >= 16 &&
           (MI.getOpcode() != X86::INSERTPSrri || Alignment >= Align(4))) {
@@ -7373,7 +7372,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandCustom(
     // TODO: In most cases AVX doesn't have a 8-byte alignment requirement.
     if (OpNum == 2) {
       const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
-      const TargetRegisterClass *RC = getRegClass(MI.getDesc(), OpNum, &RI, MF);
+      const TargetRegisterClass *RC = getRegClass(MI.getDesc(), OpNum, &RI);
       unsigned RCSize = TRI.getRegSizeInBits(*RC) / 8;
       if ((Size == 0 || Size >= 16) && RCSize >= 16 && Alignment >= Align(8)) {
         unsigned NewOpCode =
@@ -7392,7 +7391,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandCustom(
     // table twice.
     if (OpNum == 2) {
       const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
-      const TargetRegisterClass *RC = getRegClass(MI.getDesc(), OpNum, &RI, MF);
+      const TargetRegisterClass *RC = getRegClass(MI.getDesc(), OpNum, &RI);
       unsigned RCSize = TRI.getRegSizeInBits(*RC) / 8;
       if ((Size == 0 || Size >= 16) && RCSize >= 16 && Alignment < Align(16)) {
         MachineInstr *NewMI =
@@ -7527,7 +7526,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
     bool NarrowToMOV32rm = false;
     if (Size) {
       const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
-      const TargetRegisterClass *RC = getRegClass(MI.getDesc(), OpNum, &RI, MF);
+      const TargetRegisterClass *RC = getRegClass(MI.getDesc(), OpNum, &RI);
       unsigned RCSize = TRI.getRegSizeInBits(*RC) / 8;
       // Check if it's safe to fold the load. If the size of the object is
       // narrower than the load width, then it's not.
@@ -8495,7 +8494,7 @@ bool X86InstrInfo::unfoldMemoryOperand(
 
   const MCInstrDesc &MCID = get(Opc);
 
-  const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI, MF);
+  const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI);
   const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
   // TODO: Check if 32-byte or greater accesses are slow too?
   if (!MI.hasOneMemOperand() && RC == &X86::VR128RegClass &&
@@ -8606,7 +8605,7 @@ bool X86InstrInfo::unfoldMemoryOperand(
 
   // Emit the store instruction.
   if (UnfoldStore) {
-    const TargetRegisterClass *DstRC = getRegClass(MCID, 0, &RI, MF);
+    const TargetRegisterClass *DstRC = getRegClass(MCID, 0, &RI);
     auto MMOs = extractStoreMMOs(MI.memoperands(), MF);
     unsigned Alignment = std::max<uint32_t>(TRI.getSpillSize(*DstRC), 16);
     bool isAligned = !MMOs.empty() && MMOs.front()->getAlign() >= Alignment;
@@ -8638,7 +8637,7 @@ bool X86InstrInfo::unfoldMemoryOperand(
   const MCInstrDesc &MCID = get(Opc);
   MachineFunction &MF = DAG.getMachineFunction();
   const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
-  const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI, MF);
+  const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI);
   unsigned NumDefs = MCID.NumDefs;
   std::vector<SDValue> AddrOps;
   std::vector<SDValue> BeforeOps;
@@ -8689,7 +8688,7 @@ bool X86InstrInfo::unfoldMemoryOperand(
   std::vector<EVT> VTs;
   const TargetRegisterClass *DstRC = nullptr;
   if (MCID.getNumDefs() > 0) {
-    DstRC = getRegClass(MCID, 0, &RI, MF);
+    DstRC = getRegClass(MCID, 0, &RI);
     VTs.push_back(*TRI.legalclasstypes_begin(*DstRC));
   }
   for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h
index f087b7f..86133b3 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.h
+++ b/llvm/lib/Target/X86/X86InstrInfo.h
@@ -248,8 +248,7 @@ public:
   /// GR*_NOREX2RegClass (Returned register class)
   const TargetRegisterClass *
   getRegClass(const MCInstrDesc &MCID, unsigned OpNum,
-              const TargetRegisterInfo *TRI,
-              const MachineFunction &MF) const override;
+              const TargetRegisterInfo *TRI) const override;
 
   /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info.  As
   /// such, whenever a client has an instance of instruction info, it should
diff --git a/llvm/lib/Target/X86/X86OptimizeLEAs.cpp b/llvm/lib/Target/X86/X86OptimizeLEAs.cpp
index 8dcd4b8..167bed1 100644
--- a/llvm/lib/Target/X86/X86OptimizeLEAs.cpp
+++ b/llvm/lib/Target/X86/X86OptimizeLEAs.cpp
@@ -339,7 +339,6 @@ int X86OptimizeLEAPass::calcInstrDist(const MachineInstr &First,
 bool X86OptimizeLEAPass::chooseBestLEA(
     const SmallVectorImpl<MachineInstr *> &List, const MachineInstr &MI,
     MachineInstr *&BestLEA, int64_t &AddrDispShift, int &Dist) {
-  const MachineFunction *MF = MI.getParent()->getParent();
   const MCInstrDesc &Desc = MI.getDesc();
   int MemOpNo = X86II::getMemoryOperandNo(Desc.TSFlags) +
                 X86II::getOperandBias(Desc);
@@ -360,7 +359,7 @@ bool X86OptimizeLEAPass::chooseBestLEA(
     // example MOV8mr_NOREX. We could constrain the register class of the LEA
     // def to suit MI, however since this case is very rare and hard to
     // reproduce in a test it's just more reliable to skip the LEA.
-    if (TII->getRegClass(Desc, MemOpNo + X86::AddrBaseReg, TRI, *MF) !=
+    if (TII->getRegClass(Desc, MemOpNo + X86::AddrBaseReg, TRI) !=
         MRI->getRegClass(DefMI->getOperand(0).getReg()))
       continue;
 
diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp
index 7963dc1..c47bb3e6 100644
--- a/llvm/lib/Target/X86/X86RegisterInfo.cpp
+++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp
@@ -61,6 +61,7 @@ X86RegisterInfo::X86RegisterInfo(const Triple &TT)
 
   // Cache some information.
   Is64Bit = TT.isArch64Bit();
+  IsTarget64BitLP64 = Is64Bit && !TT.isX32();
   IsWin64 = Is64Bit && TT.isOSWindows();
   IsUEFI64 = Is64Bit && TT.isUEFI();
 
@@ -192,13 +193,11 @@ X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
 }
 
 const TargetRegisterClass *
-X86RegisterInfo::getPointerRegClass(const MachineFunction &MF,
-                                    unsigned Kind) const {
-  const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
+X86RegisterInfo::getPointerRegClass(unsigned Kind) const {
   switch (Kind) {
   default: llvm_unreachable("Unexpected Kind in getPointerRegClass!");
   case 0: // Normal GPRs.
-    if (Subtarget.isTarget64BitLP64())
+    if (IsTarget64BitLP64)
       return &X86::GR64RegClass;
     // If the target is 64bit but we have been told to use 32bit addresses,
     // we can still use 64-bit register as long as we know the high bits
@@ -206,16 +205,16 @@ X86RegisterInfo::getPointerRegClass(const MachineFunction &MF,
     // Reflect that in the returned register class.
     return Is64Bit ? &X86::LOW32_ADDR_ACCESSRegClass : &X86::GR32RegClass;
   case 1: // Normal GPRs except the stack pointer (for encoding reasons).
-    if (Subtarget.isTarget64BitLP64())
+    if (IsTarget64BitLP64)
       return &X86::GR64_NOSPRegClass;
     // NOSP does not contain RIP, so no special case here.
     return &X86::GR32_NOSPRegClass;
   case 2: // NOREX GPRs.
-    if (Subtarget.isTarget64BitLP64())
+    if (IsTarget64BitLP64)
       return &X86::GR64_NOREXRegClass;
     return &X86::GR32_NOREXRegClass;
   case 3: // NOREX GPRs except the stack pointer (for encoding reasons).
-    if (Subtarget.isTarget64BitLP64())
+    if (IsTarget64BitLP64)
       return &X86::GR64_NOREX_NOSPRegClass;
     // NOSP does not contain RIP, so no special case here.
     return &X86::GR32_NOREX_NOSPRegClass;
diff --git a/llvm/lib/Target/X86/X86RegisterInfo.h b/llvm/lib/Target/X86/X86RegisterInfo.h
index d022e5a..e646591 100644
--- a/llvm/lib/Target/X86/X86RegisterInfo.h
+++ b/llvm/lib/Target/X86/X86RegisterInfo.h
@@ -28,6 +28,9 @@ private:
   ///
   bool Is64Bit;
 
+  /// Is this x86_64 with the LP64 programming model (standard AMD64, no x32)?
+  bool IsTarget64BitLP64;
+
   /// IsWin64 - Is the target on of win64 flavours
   ///
   bool IsWin64;
@@ -78,8 +81,7 @@ public:
   /// getPointerRegClass - Returns a TargetRegisterClass used for pointer
   /// values.
   const TargetRegisterClass *
-  getPointerRegClass(const MachineFunction &MF,
-                     unsigned Kind = 0) const override;
+  getPointerRegClass(unsigned Kind = 0) const override;
 
   /// getCrossCopyRegClass - Returns a legal register class to copy a register
   /// in the specified class to or from. Returns NULL if it is possible to copy
diff --git a/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp b/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp
index c28de14..e0b3b61 100644
--- a/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp
+++ b/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp
@@ -836,13 +836,12 @@ X86SpeculativeLoadHardeningPass::tracePredStateThroughCFG(
 /// a way to unfold into a newly created vreg rather than requiring a register
 /// input.
 static const TargetRegisterClass *
-getRegClassForUnfoldedLoad(MachineFunction &MF, const X86InstrInfo &TII,
-                           unsigned Opcode) {
+getRegClassForUnfoldedLoad(const X86InstrInfo &TII, unsigned Opcode) {
   unsigned Index;
   unsigned UnfoldedOpc = TII.getOpcodeAfterMemoryUnfold(
       Opcode, /*UnfoldLoad*/ true, /*UnfoldStore*/ false, &Index);
   const MCInstrDesc &MCID = TII.get(UnfoldedOpc);
-  return TII.getRegClass(MCID, Index, &TII.getRegisterInfo(), MF);
+  return TII.getRegClass(MCID, Index, &TII.getRegisterInfo());
 }
 
 void X86SpeculativeLoadHardeningPass::unfoldCallAndJumpLoads(
@@ -898,7 +897,7 @@ void X86SpeculativeLoadHardeningPass::unfoldCallAndJumpLoads(
         // Use the generic unfold logic now that we know we're dealing with
         // expected instructions.
         // FIXME: We don't have test coverage for all of these!
-        auto *UnfoldedRC = getRegClassForUnfoldedLoad(MF, *TII, MI.getOpcode());
+        auto *UnfoldedRC = getRegClassForUnfoldedLoad(*TII, MI.getOpcode());
         if (!UnfoldedRC) {
           LLVM_DEBUG(dbgs()
                          << "ERROR: Unable to unfold load from instruction:\n";
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 726d09a..00951fd 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -3155,16 +3155,6 @@ Instruction *InstCombinerImpl::visitFSub(BinaryOperator &I) {
   Value *X, *Y;
   Constant *C;
 
-  // B = fsub A, 0.0
-  // Z = Op B
-  // can be transformed into
-  // Z = Op A
-  // Where Op is such that we can ignore sign of 0 in fsub
-  Value *A;
-  if (match(&I, m_OneUse(m_FSub(m_Value(A), m_AnyZeroFP()))) &&
-      canIgnoreSignBitOfZero(*I.use_begin()))
-    return replaceInstUsesWith(I, A);
-
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
   // If Op0 is not -0.0 or we can ignore -0.0: Z - (X - Y) --> Z + (Y - X)
   // Canonicalize to fadd to make analysis easier.
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 33b66ae..17cf415 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -3385,12 +3385,13 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
       // TODO: apply range metadata for range check patterns?
     }
 
-    // Separate storage assumptions apply to the underlying allocations, not any
-    // particular pointer within them. When evaluating the hints for AA purposes
-    // we getUnderlyingObject them; by precomputing the answers here we can
-    // avoid having to do so repeatedly there.
     for (unsigned Idx = 0; Idx < II->getNumOperandBundles(); Idx++) {
       OperandBundleUse OBU = II->getOperandBundleAt(Idx);
+
+      // Separate storage assumptions apply to the underlying allocations, not
+      // any particular pointer within them. When evaluating the hints for AA
+      // purposes we getUnderlyingObject them; by precomputing the answers here
+      // we can avoid having to do so repeatedly there.
       if (OBU.getTagName() == "separate_storage") {
         assert(OBU.Inputs.size() == 2);
         auto MaybeSimplifyHint = [&](const Use &U) {
@@ -3404,6 +3405,32 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
         MaybeSimplifyHint(OBU.Inputs[0]);
         MaybeSimplifyHint(OBU.Inputs[1]);
       }
+
+      // Try to remove redundant alignment assumptions.
+      if (OBU.getTagName() == "align" && OBU.Inputs.size() == 2) {
+        RetainedKnowledge RK = getKnowledgeFromOperandInAssume(
+            *cast<AssumeInst>(II), II->arg_size() + Idx);
+        if (!RK || RK.AttrKind != Attribute::Alignment ||
+            !isPowerOf2_64(RK.ArgValue) || !isa<ConstantInt>(RK.IRArgValue))
+          continue;
+
+        // Don't try to remove align assumptions for pointers derived from
+        // arguments. We might lose information if the function gets inline and
+        // the align argument attribute disappears.
+        Value *UO = getUnderlyingObject(RK.WasOn);
+        if (!UO || isa<Argument>(UO))
+          continue;
+
+        // Compute known bits for the pointer, passing nullptr as context to
+        // avoid computeKnownBits using the assumption we are about to remove
+        // for reasoning.
+        KnownBits Known = computeKnownBits(RK.WasOn, /*CtxI=*/nullptr);
+        unsigned TZ = std::min(Known.countMinTrailingZeros(),
+                               Value::MaxAlignmentExponent);
+        if ((1ULL << TZ) < RK.ArgValue)
+          continue;
+        return CallBase::removeOperandBundle(II, OBU.getTagID());
+      }
     }
 
     // Convert nonnull assume like:
@@ -3925,6 +3952,19 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
     }
     break;
   }
+  case Intrinsic::get_active_lane_mask: {
+    const APInt *Op0, *Op1;
+    if (match(II->getOperand(0), m_StrictlyPositive(Op0)) &&
+        match(II->getOperand(1), m_APInt(Op1))) {
+      Type *OpTy = II->getOperand(0)->getType();
+      return replaceInstUsesWith(
+          *II, Builder.CreateIntrinsic(
+                   II->getType(), Intrinsic::get_active_lane_mask,
+                   {Constant::getNullValue(OpTy),
+                    ConstantInt::get(OpTy, Op1->usub_sat(*Op0))}));
+    }
+    break;
+  }
   default: {
     // Handle target specific intrinsics
     std::optional<Instruction *> V = targetInstCombineIntrinsic(*II);
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index ccf918f..9ca8194 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -977,8 +977,7 @@ Instruction *InstCombinerImpl::visitTrunc(TruncInst &Trunc) {
   // trunc ( OP i8 C1, V1) to i1 -> icmp eq V1, log_2(C1) iff C1 is power of 2
   if (DestWidth == 1 && match(Src, m_Shr(m_Power2(C1), m_Value(V1)))) {
     Value *Right = ConstantInt::get(V1->getType(), C1->countr_zero());
-    Value *Icmp = Builder.CreateICmpEQ(V1, Right);
-    return replaceInstUsesWith(Trunc, Icmp);
+    return new ICmpInst(ICmpInst::ICMP_EQ, V1, Right);
   }
 
   // OP = { lshr, ashr }
@@ -986,8 +985,15 @@ Instruction *InstCombinerImpl::visitTrunc(TruncInst &Trunc) {
   // power of 2
   if (DestWidth == 1 && match(Src, m_Shr(m_LowBitMask(C1), m_Value(V1)))) {
     Value *Right = ConstantInt::get(V1->getType(), C1->countr_one());
-    Value *Icmp = Builder.CreateICmpULT(V1, Right);
-    return replaceInstUsesWith(Trunc, Icmp);
+    return new ICmpInst(ICmpInst::ICMP_ULT, V1, Right);
+  }
+
+  // OP = { lshr, ashr }
+  // trunc ( OP i8 C1, V1) to i1 -> icmp ugt V1, cttz(C1) - 1 iff (C1) is
+  // negative power of 2
+  if (DestWidth == 1 && match(Src, m_Shr(m_NegatedPower2(C1), m_Value(V1)))) {
+    Value *Right = ConstantInt::get(V1->getType(), C1->countr_zero());
+    return new ICmpInst(ICmpInst::ICMP_UGE, V1, Right);
   }
 
   return Changed ? &Trunc : nullptr;
diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp
index 40104e8..092a0fb 100644
--- a/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -1705,10 +1705,7 @@ static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
       // time in isGuaranteedToExecute if we don't actually have anything to
       // drop.  It is a compile time optimization, not required for correctness.
       !SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop)) {
-    if (ProfcheckDisableMetadataFixes)
-      I.dropUBImplyingAttrsAndMetadata();
-    else
-      I.dropUBImplyingAttrsAndMetadata({LLVMContext::MD_prof});
+    I.dropUBImplyingAttrsAndMetadata();
   }
 
   if (isa<PHINode>(I))
diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp
index 2cfd70a..57dc1b3 100644
--- a/llvm/lib/Transforms/Utils/Local.cpp
+++ b/llvm/lib/Transforms/Utils/Local.cpp
@@ -3342,8 +3342,11 @@ void llvm::hoistAllInstructionsInto(BasicBlock *DomBlock, Instruction *InsertPt,
   // retain their original debug locations (DILocations) and debug intrinsic
   // instructions.
   //
-  // Doing so would degrade the debugging experience and adversely affect the
-  // accuracy of profiling information.
+  // Doing so would degrade the debugging experience.
+  //
+  // FIXME: Issue #152767: debug info should also be the same as the
+  // original branch, **if** the user explicitly indicated that (for sampling
+  // PGO)
   //
   // Currently, when hoisting the instructions, we take the following actions:
   // - Remove their debug intrinsic instructions.
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 850e57e..5a842f9 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -332,6 +332,17 @@ public:
   }
 };
 
+// we synthesize a || b as select a, true, b
+// we synthesize a && b as select a, b, false
+// this function determines if SI is playing one of those roles.
+[[maybe_unused]] bool
+isSelectInRoleOfConjunctionOrDisjunction(const SelectInst *SI) {
+  return ((isa<ConstantInt>(SI->getTrueValue()) &&
+           (dyn_cast<ConstantInt>(SI->getTrueValue())->isOne())) ||
+          (isa<ConstantInt>(SI->getFalseValue()) &&
+           (dyn_cast<ConstantInt>(SI->getFalseValue())->isNullValue())));
+}
+
 } // end anonymous namespace
 
 /// Return true if all the PHI nodes in the basic block \p BB
@@ -4033,6 +4044,7 @@ static bool performBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI,
 
   // Try to update branch weights.
   uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
+  SmallVector<uint32_t, 2> MDWeights;
   if (extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
                              SuccTrueWeight, SuccFalseWeight)) {
     SmallVector<uint64_t, 8> NewWeights;
@@ -4063,7 +4075,7 @@ static bool performBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI,
     // Halve the weights if any of them cannot fit in an uint32_t
     fitWeights(NewWeights);
 
-    SmallVector<uint32_t, 8> MDWeights(NewWeights.begin(), NewWeights.end());
+    append_range(MDWeights, NewWeights);
     setBranchWeights(PBI, MDWeights[0], MDWeights[1], /*IsExpected=*/false);
 
     // TODO: If BB is reachable from all paths through PredBlock, then we
@@ -4100,6 +4112,13 @@ static bool performBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI,
   Value *BICond = VMap[BI->getCondition()];
   PBI->setCondition(
       createLogicalOp(Builder, Opc, PBI->getCondition(), BICond, "or.cond"));
+  if (!ProfcheckDisableMetadataFixes)
+    if (auto *SI = dyn_cast<SelectInst>(PBI->getCondition()))
+      if (!MDWeights.empty()) {
+        assert(isSelectInRoleOfConjunctionOrDisjunction(SI));
+        setBranchWeights(SI, MDWeights[0], MDWeights[1],
+                         /*IsExpected=*/false);
+      }
 
   ++NumFoldBranchToCommonDest;
   return true;
@@ -4812,6 +4831,18 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
     fitWeights(NewWeights);
 
     setBranchWeights(PBI, NewWeights[0], NewWeights[1], /*IsExpected=*/false);
+    // Cond may be a select instruction with the first operand set to "true", or
+    // the second to "false" (see how createLogicalOp works for `and` and `or`)
+    if (!ProfcheckDisableMetadataFixes)
+      if (auto *SI = dyn_cast<SelectInst>(Cond)) {
+        assert(isSelectInRoleOfConjunctionOrDisjunction(SI));
+        // The select is predicated on PBICond
+        assert(dyn_cast<SelectInst>(SI)->getCondition() == PBICond);
+        // The corresponding probabilities are what was referred to above as
+        // PredCommon and PredOther.
+        setBranchWeights(SI, PredCommon, PredOther,
+                         /*IsExpected=*/false);
+      }
   }
 
   // OtherDest may have phi nodes.  If so, add an entry from PBI's
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 85a6fca..ff35db1 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -1903,11 +1903,12 @@ bool LoopVectorizationLegality::canUncountableExitConditionLoadBeMoved(
   SafetyInfo.computeLoopSafetyInfo(TheLoop);
   // We need to know that load will be executed before we can hoist a
   // copy out to run just before the first iteration.
-  // FIXME: Currently, other restrictions prevent us from reaching this point
-  //        with a loop where the uncountable exit condition is determined
-  //        by a conditional load.
-  assert(SafetyInfo.isGuaranteedToExecute(*Load, DT, TheLoop) &&
-         "Unhandled control flow in uncountable exit loop with side effects");
+  if (!SafetyInfo.isGuaranteedToExecute(*Load, DT, TheLoop)) {
+    reportVectorizationFailure(
+        "Load for uncountable exit not guaranteed to execute",
+        "ConditionalUncountableExitLoad", ORE, TheLoop);
+    return false;
+  }
 
   // Prohibit any potential aliasing with any instruction in the loop which
   // might store to memory.
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index b4acda8..c04b5cb 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -393,6 +393,10 @@ static cl::opt<bool> EnableEarlyExitVectorization(
     cl::desc(
         "Enable vectorization of early exit loops with uncountable exits."));
 
+static cl::opt<bool> ConsiderRegPressure(
+    "vectorizer-consider-reg-pressure", cl::init(false), cl::Hidden,
+    cl::desc("Discard VFs if their register pressure is too high."));
+
 // Likelyhood of bypassing the vectorized loop because there are zero trips left
 // after prolog. See `emitIterationCountCheck`.
 static constexpr uint32_t MinItersBypassWeights[] = {1, 127};
@@ -3693,6 +3697,14 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
 
 bool LoopVectorizationCostModel::shouldConsiderRegPressureForVF(
     ElementCount VF) {
+  if (ConsiderRegPressure.getNumOccurrences())
+    return ConsiderRegPressure;
+
+  // TODO: We should eventually consider register pressure for all targets. The
+  // TTI hook is temporary whilst target-specific issues are being fixed.
+  if (TTI.shouldConsiderVectorizationRegPressure())
+    return true;
+
   if (!useMaxBandwidth(VF.isScalable()
                            ? TargetTransformInfo::RGK_ScalableVector
                            : TargetTransformInfo::RGK_FixedWidthVector))
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 16b1b53..e324462 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -343,37 +343,21 @@ Value *VPTransformState::get(const VPValue *Def, bool NeedsScalar) {
     LastLane = 0;
   }
 
-  auto *LastInst = cast<Instruction>(get(Def, LastLane));
+  // We need to construct the vector value for a single-scalar value by
+  // broadcasting the scalar to all lanes.
+  // TODO: Replace by introducing Broadcast VPInstructions.
+  assert(IsSingleScalar && "must be a single-scalar at this point");
   // Set the insert point after the last scalarized instruction or after the
   // last PHI, if LastInst is a PHI. This ensures the insertelement sequence
   // will directly follow the scalar definitions.
   auto OldIP = Builder.saveIP();
+  auto *LastInst = cast<Instruction>(get(Def, LastLane));
   auto NewIP = isa<PHINode>(LastInst)
                    ? LastInst->getParent()->getFirstNonPHIIt()
                    : std::next(BasicBlock::iterator(LastInst));
   Builder.SetInsertPoint(&*NewIP);
-
-  // However, if we are vectorizing, we need to construct the vector values.
-  // If the value is known to be uniform after vectorization, we can just
-  // broadcast the scalar value corresponding to lane zero. Otherwise, we
-  // construct the vector values using insertelement instructions. Since the
-  // resulting vectors are stored in State, we will only generate the
-  // insertelements once.
-  Value *VectorValue = nullptr;
-  if (IsSingleScalar) {
-    VectorValue = GetBroadcastInstrs(ScalarValue);
-    set(Def, VectorValue);
-  } else {
-    assert(!VF.isScalable() && "VF is assumed to be non scalable.");
-    assert(isa<VPInstruction>(Def) &&
-           "Explicit BuildVector recipes must have"
-           "handled packing for non-VPInstructions.");
-    // Initialize packing with insertelements to start from poison.
-    VectorValue = PoisonValue::get(toVectorizedTy(LastInst->getType(), VF));
-    for (unsigned Lane = 0; Lane < VF.getFixedValue(); ++Lane)
-      VectorValue = packScalarIntoVectorizedValue(Def, VectorValue, Lane);
-    set(Def, VectorValue);
-  }
+  Value *VectorValue = GetBroadcastInstrs(ScalarValue);
+  set(Def, VectorValue);
   Builder.restoreIP(OldIP);
   return VectorValue;
 }
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 53291a9..997a45b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -908,6 +908,8 @@ struct VPRecipeWithIRFlags : public VPSingleDefRecipe, public VPIRFlags {
     return R && classof(R);
   }
 
+  virtual VPRecipeWithIRFlags *clone() override = 0;
+
   static inline bool classof(const VPSingleDefRecipe *U) {
     auto *R = dyn_cast<VPRecipeBase>(U);
     return R && classof(R);
@@ -1061,13 +1063,6 @@ public:
     VScale,
   };
 
-private:
-  typedef unsigned char OpcodeTy;
-  OpcodeTy Opcode;
-
-  /// An optional name that can be used for the generated IR instruction.
-  const std::string Name;
-
   /// Returns true if this VPInstruction generates scalar values for all lanes.
   /// Most VPInstructions generate a single value per part, either vector or
   /// scalar. VPReplicateRecipe takes care of generating multiple (scalar)
@@ -1076,6 +1071,13 @@ private:
   /// underlying ingredient.
   bool doesGeneratePerAllLanes() const;
 
+private:
+  typedef unsigned char OpcodeTy;
+  OpcodeTy Opcode;
+
+  /// An optional name that can be used for the generated IR instruction.
+  const std::string Name;
+
   /// Returns true if we can generate a scalar for the first lane only if
   /// needed.
   bool canGenerateScalarForFirstLane() const;
@@ -1085,11 +1087,6 @@ private:
   /// existing value is returned rather than a generated one.
   Value *generate(VPTransformState &State);
 
-  /// Utility methods serving execute(): generates a scalar single instance of
-  /// the modeled instruction for a given lane. \returns the scalar generated
-  /// value for lane \p Lane.
-  Value *generatePerLane(VPTransformState &State, const VPLane &Lane);
-
 #if !defined(NDEBUG)
   /// Return the number of operands determined by the opcode of the
   /// VPInstruction. Returns -1u if the number of operands cannot be determined
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index bf51489..11846f8 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -564,16 +564,6 @@ bool VPInstruction::canGenerateScalarForFirstLane() const {
   }
 }
 
-Value *VPInstruction::generatePerLane(VPTransformState &State,
-                                      const VPLane &Lane) {
-  IRBuilderBase &Builder = State.Builder;
-
-  assert(getOpcode() == VPInstruction::PtrAdd &&
-         "only PtrAdd opcodes are supported for now");
-  return Builder.CreatePtrAdd(State.get(getOperand(0), Lane),
-                              State.get(getOperand(1), Lane), Name);
-}
-
 /// Create a conditional branch using \p Cond branching to the successors of \p
 /// VPBB. Note that the first successor is always forward (i.e. not created yet)
 /// while the second successor may already have been created (if it is a header
@@ -1197,24 +1187,13 @@ void VPInstruction::execute(VPTransformState &State) {
          "Set flags not supported for the provided opcode");
   if (hasFastMathFlags())
     State.Builder.setFastMathFlags(getFastMathFlags());
-  bool GeneratesPerFirstLaneOnly = canGenerateScalarForFirstLane() &&
-                                   (vputils::onlyFirstLaneUsed(this) ||
-                                    isVectorToScalar() || isSingleScalar());
-  bool GeneratesPerAllLanes = doesGeneratePerAllLanes();
-  if (GeneratesPerAllLanes) {
-    for (unsigned Lane = 0, NumLanes = State.VF.getFixedValue();
-         Lane != NumLanes; ++Lane) {
-      Value *GeneratedValue = generatePerLane(State, VPLane(Lane));
-      assert(GeneratedValue && "generatePerLane must produce a value");
-      State.set(this, GeneratedValue, VPLane(Lane));
-    }
-    return;
-  }
-
   Value *GeneratedValue = generate(State);
   if (!hasResult())
     return;
   assert(GeneratedValue && "generate must produce a value");
+  bool GeneratesPerFirstLaneOnly = canGenerateScalarForFirstLane() &&
+                                   (vputils::onlyFirstLaneUsed(this) ||
+                                    isVectorToScalar() || isSingleScalar());
   assert((((GeneratedValue->getType()->isVectorTy() ||
             GeneratedValue->getType()->isStructTy()) ==
            !GeneratesPerFirstLaneOnly) ||
@@ -1287,6 +1266,12 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
   case VPInstruction::Broadcast:
   case VPInstruction::ReductionStartVector:
     return true;
+  case VPInstruction::BuildStructVector:
+  case VPInstruction::BuildVector:
+    // Before replicating by VF, Build(Struct)Vector uses all lanes of the
+    // operand, after replicating its operands only the first lane is used.
+    // Before replicating, it will have only a single operand.
+    return getNumOperands() > 1;
   case VPInstruction::PtrAdd:
     return Op == getOperand(0) || vputils::onlyFirstLaneUsed(this);
   case VPInstruction::WidePtrAdd:
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 2cac555..fcd85ba 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -3695,34 +3695,39 @@ void VPlanTransforms::materializeBuildVectors(VPlan &Plan) {
       vp_depth_first_shallow(Plan.getEntry()));
   auto VPBBsInsideLoopRegion = VPBlockUtils::blocksOnly<VPBasicBlock>(
       vp_depth_first_shallow(LoopRegion->getEntry()));
-  // Materialize Build(Struct)Vector for all replicating VPReplicateRecipes,
-  // excluding ones in replicate regions. Those are not materialized explicitly
-  // yet. Those vector users are still handled in VPReplicateRegion::execute(),
-  // via shouldPack().
+  // Materialize Build(Struct)Vector for all replicating VPReplicateRecipes and
+  // VPInstructions, excluding ones in replicate regions. Those are not
+  // materialized explicitly yet. Those vector users are still handled in
+  // VPReplicateRegion::execute(), via shouldPack().
   // TODO: materialize build vectors for replicating recipes in replicating
   // regions.
-  // TODO: materialize build vectors for VPInstructions.
   for (VPBasicBlock *VPBB :
        concat<VPBasicBlock *>(VPBBsOutsideLoopRegion, VPBBsInsideLoopRegion)) {
     for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
-      auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
-      auto UsesVectorOrInsideReplicateRegion = [RepR, LoopRegion](VPUser *U) {
+      if (!isa<VPReplicateRecipe, VPInstruction>(&R))
+        continue;
+      auto *DefR = cast<VPRecipeWithIRFlags>(&R);
+      auto UsesVectorOrInsideReplicateRegion = [DefR, LoopRegion](VPUser *U) {
         VPRegionBlock *ParentRegion =
             cast<VPRecipeBase>(U)->getParent()->getParent();
-        return !U->usesScalars(RepR) || ParentRegion != LoopRegion;
+        return !U->usesScalars(DefR) || ParentRegion != LoopRegion;
       };
-      if (!RepR || RepR->isSingleScalar() ||
-          none_of(RepR->users(), UsesVectorOrInsideReplicateRegion))
+      if ((isa<VPReplicateRecipe>(DefR) &&
+           cast<VPReplicateRecipe>(DefR)->isSingleScalar()) ||
+          (isa<VPInstruction>(DefR) &&
+           (vputils::onlyFirstLaneUsed(DefR) ||
+            !cast<VPInstruction>(DefR)->doesGeneratePerAllLanes())) ||
+          none_of(DefR->users(), UsesVectorOrInsideReplicateRegion))
         continue;
 
-      Type *ScalarTy = TypeInfo.inferScalarType(RepR);
+      Type *ScalarTy = TypeInfo.inferScalarType(DefR);
       unsigned Opcode = ScalarTy->isStructTy()
                             ? VPInstruction::BuildStructVector
                             : VPInstruction::BuildVector;
-      auto *BuildVector = new VPInstruction(Opcode, {RepR});
-      BuildVector->insertAfter(RepR);
+      auto *BuildVector = new VPInstruction(Opcode, {DefR});
+      BuildVector->insertAfter(DefR);
 
-      RepR->replaceUsesWithIf(
+      DefR->replaceUsesWithIf(
           BuildVector, [BuildVector, &UsesVectorOrInsideReplicateRegion](
                            VPUser &U, unsigned) {
             return &U != BuildVector && UsesVectorOrInsideReplicateRegion(&U);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index 1957428..69452a7 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -158,10 +158,10 @@ struct VPlanTransforms {
   /// Explicitly unroll \p Plan by \p UF.
   static void unrollByUF(VPlan &Plan, unsigned UF);
 
-  /// Replace each VPReplicateRecipe outside on any replicate region in \p Plan
-  /// with \p VF single-scalar recipes.
-  /// TODO: Also replicate VPReplicateRecipes inside replicate regions, thereby
-  /// dissolving the latter.
+  /// Replace each replicating VPReplicateRecipe and VPInstruction outside of
+  /// any replicate region in \p Plan with \p VF single-scalar recipes.
+  /// TODO: Also replicate VPScalarIVSteps and VPReplicateRecipes inside
+  /// replicate regions, thereby dissolving the latter.
   static void replicateByVF(VPlan &Plan, ElementCount VF);
 
   /// Optimize \p Plan based on \p BestVF and \p BestUF. This may restrict the
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
index 443df16..ce59494 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
@@ -463,15 +463,16 @@ void VPlanTransforms::unrollByUF(VPlan &Plan, unsigned UF) {
   VPlanTransforms::removeDeadRecipes(Plan);
 }
 
-/// Create a single-scalar clone of \p RepR for lane \p Lane. Use \p
-/// Def2LaneDefs to look up scalar definitions for operands of \RepR.
-static VPReplicateRecipe *
+/// Create a single-scalar clone of \p DefR (must be a VPReplicateRecipe or
+/// VPInstruction) for lane \p Lane. Use \p Def2LaneDefs to look up scalar
+/// definitions for operands of \DefR.
+static VPRecipeWithIRFlags *
 cloneForLane(VPlan &Plan, VPBuilder &Builder, Type *IdxTy,
-             VPReplicateRecipe *RepR, VPLane Lane,
+             VPRecipeWithIRFlags *DefR, VPLane Lane,
              const DenseMap<VPValue *, SmallVector<VPValue *>> &Def2LaneDefs) {
   // Collect the operands at Lane, creating extracts as needed.
   SmallVector<VPValue *> NewOps;
-  for (VPValue *Op : RepR->operands()) {
+  for (VPValue *Op : DefR->operands()) {
     // If Op is a definition that has been unrolled, directly use the clone for
     // the corresponding lane.
     auto LaneDefs = Def2LaneDefs.find(Op);
@@ -501,11 +502,24 @@ cloneForLane(VPlan &Plan, VPBuilder &Builder, Type *IdxTy,
     NewOps.push_back(Ext);
   }
 
-  auto *New =
-      new VPReplicateRecipe(RepR->getUnderlyingInstr(), NewOps,
-                            /*IsSingleScalar=*/true, /*Mask=*/nullptr, *RepR);
-  New->transferFlags(*RepR);
-  New->insertBefore(RepR);
+  VPRecipeWithIRFlags *New;
+  if (auto *RepR = dyn_cast<VPReplicateRecipe>(DefR)) {
+    // TODO: have cloning of replicate recipes also provide the desired result
+    // coupled with setting its operands to NewOps (deriving IsSingleScalar and
+    // Mask from the operands?)
+    New =
+        new VPReplicateRecipe(RepR->getUnderlyingInstr(), NewOps,
+                              /*IsSingleScalar=*/true, /*Mask=*/nullptr, *RepR);
+  } else {
+    assert(isa<VPInstruction>(DefR) &&
+           "DefR must be a VPReplicateRecipe or VPInstruction");
+    New = DefR->clone();
+    for (const auto &[Idx, Op] : enumerate(NewOps)) {
+      New->setOperand(Idx, Op);
+    }
+  }
+  New->transferFlags(*DefR);
+  New->insertBefore(DefR);
   return New;
 }
 
@@ -530,34 +544,38 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) {
   SmallVector<VPRecipeBase *> ToRemove;
   for (VPBasicBlock *VPBB : VPBBsToUnroll) {
     for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
-      auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
-      if (!RepR || RepR->isSingleScalar())
+      if (!isa<VPInstruction, VPReplicateRecipe>(&R) ||
+          (isa<VPReplicateRecipe>(&R) &&
+           cast<VPReplicateRecipe>(&R)->isSingleScalar()) ||
+          (isa<VPInstruction>(&R) &&
+           !cast<VPInstruction>(&R)->doesGeneratePerAllLanes()))
         continue;
 
-      VPBuilder Builder(RepR);
-      if (RepR->getNumUsers() == 0) {
-        // Create single-scalar version of RepR for all lanes.
+      auto *DefR = cast<VPRecipeWithIRFlags>(&R);
+      VPBuilder Builder(DefR);
+      if (DefR->getNumUsers() == 0) {
+        // Create single-scalar version of DefR for all lanes.
         for (unsigned I = 0; I != VF.getKnownMinValue(); ++I)
-          cloneForLane(Plan, Builder, IdxTy, RepR, VPLane(I), Def2LaneDefs);
-        RepR->eraseFromParent();
+          cloneForLane(Plan, Builder, IdxTy, DefR, VPLane(I), Def2LaneDefs);
+        DefR->eraseFromParent();
         continue;
       }
-      /// Create single-scalar version of RepR for all lanes.
+      /// Create single-scalar version of DefR for all lanes.
       SmallVector<VPValue *> LaneDefs;
       for (unsigned I = 0; I != VF.getKnownMinValue(); ++I)
         LaneDefs.push_back(
-            cloneForLane(Plan, Builder, IdxTy, RepR, VPLane(I), Def2LaneDefs));
+            cloneForLane(Plan, Builder, IdxTy, DefR, VPLane(I), Def2LaneDefs));
 
-      Def2LaneDefs[RepR] = LaneDefs;
+      Def2LaneDefs[DefR] = LaneDefs;
       /// Users that only demand the first lane can use the definition for lane
       /// 0.
-      RepR->replaceUsesWithIf(LaneDefs[0], [RepR](VPUser &U, unsigned) {
-        return U.onlyFirstLaneUsed(RepR);
+      DefR->replaceUsesWithIf(LaneDefs[0], [DefR](VPUser &U, unsigned) {
+        return U.onlyFirstLaneUsed(DefR);
       });
 
-      // Update each build vector user that currently has RepR as its only
+      // Update each build vector user that currently has DefR as its only
       // operand, to have all LaneDefs as its operands.
-      for (VPUser *U : to_vector(RepR->users())) {
+      for (VPUser *U : to_vector(DefR->users())) {
         auto *VPI = dyn_cast<VPInstruction>(U);
         if (!VPI || (VPI->getOpcode() != VPInstruction::BuildVector &&
                      VPI->getOpcode() != VPInstruction::BuildStructVector))
@@ -569,7 +587,7 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) {
         for (VPValue *LaneDef : drop_begin(LaneDefs))
           VPI->addOperand(LaneDef);
       }
-      ToRemove.push_back(RepR);
+      ToRemove.push_back(DefR);
     }
   }
   for (auto *R : reverse(ToRemove))
diff --git a/llvm/test/Analysis/BasicAA/featuretest.ll b/llvm/test/Analysis/BasicAA/featuretest.ll
index e4cb009..04c4725 100644
--- a/llvm/test/Analysis/BasicAA/featuretest.ll
+++ b/llvm/test/Analysis/BasicAA/featuretest.ll
@@ -15,24 +15,14 @@ declare void @llvm.assume(i1)
 ; operations on another array.  Important for scientific codes.
 ;
 define i32 @different_array_test(i64 %A, i64 %B) {
-; NO_ASSUME-LABEL: @different_array_test(
-; NO_ASSUME-NEXT:    [[ARRAY11:%.*]] = alloca [100 x i32], align 4
-; NO_ASSUME-NEXT:    [[ARRAY22:%.*]] = alloca [200 x i32], align 4
-; NO_ASSUME-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr [[ARRAY11]], i32 4) ]
-; NO_ASSUME-NEXT:    call void @external(ptr nonnull [[ARRAY11]])
-; NO_ASSUME-NEXT:    call void @external(ptr nonnull [[ARRAY22]])
-; NO_ASSUME-NEXT:    [[POINTER2:%.*]] = getelementptr i32, ptr [[ARRAY22]], i64 [[B:%.*]]
-; NO_ASSUME-NEXT:    store i32 7, ptr [[POINTER2]], align 4
-; NO_ASSUME-NEXT:    ret i32 0
-;
-; USE_ASSUME-LABEL: @different_array_test(
-; USE_ASSUME-NEXT:    [[ARRAY11:%.*]] = alloca [100 x i32], align 4
-; USE_ASSUME-NEXT:    [[ARRAY22:%.*]] = alloca [200 x i32], align 4
-; USE_ASSUME-NEXT:    call void @external(ptr nonnull [[ARRAY11]])
-; USE_ASSUME-NEXT:    call void @external(ptr nonnull [[ARRAY22]])
-; USE_ASSUME-NEXT:    [[POINTER2:%.*]] = getelementptr i32, ptr [[ARRAY22]], i64 [[B:%.*]]
-; USE_ASSUME-NEXT:    store i32 7, ptr [[POINTER2]], align 4
-; USE_ASSUME-NEXT:    ret i32 0
+; CHECK-LABEL: @different_array_test(
+; CHECK-NEXT:    [[ARRAY11:%.*]] = alloca [100 x i32], align 4
+; CHECK-NEXT:    [[ARRAY22:%.*]] = alloca [200 x i32], align 4
+; CHECK-NEXT:    call void @external(ptr nonnull [[ARRAY11]])
+; CHECK-NEXT:    call void @external(ptr nonnull [[ARRAY22]])
+; CHECK-NEXT:    [[POINTER2:%.*]] = getelementptr i32, ptr [[ARRAY22]], i64 [[B:%.*]]
+; CHECK-NEXT:    store i32 7, ptr [[POINTER2]], align 4
+; CHECK-NEXT:    ret i32 0
 ;
   %Array1 = alloca i32, i32 100
   %Array2 = alloca i32, i32 200
diff --git a/llvm/test/Analysis/ScalarEvolution/backedge-taken-count-guard-info-with-multiple-predecessors.ll b/llvm/test/Analysis/ScalarEvolution/backedge-taken-count-guard-info-with-multiple-predecessors.ll
index 28035b0..564ce6b 100644
--- a/llvm/test/Analysis/ScalarEvolution/backedge-taken-count-guard-info-with-multiple-predecessors.ll
+++ b/llvm/test/Analysis/ScalarEvolution/backedge-taken-count-guard-info-with-multiple-predecessors.ll
@@ -364,3 +364,29 @@ body:
 exit:
   ret void
 }
+
+define void @hang_due_to_unreachable_phi_inblock() personality ptr null {
+bb:
+  br label %bb6
+
+self-loop:                                        ; preds = %self-loop
+  %dead = invoke ptr null()
+          to label %self-loop unwind label %bb4
+
+bb4:                                              ; preds = %self-loop
+  %i5 = landingpad { ptr, i32 }
+          cleanup
+  br label %bb6
+
+bb6:                                              ; preds = %bb4, %bb
+  %i7 = phi ptr [ null, %bb4 ], [ null, %bb ]
+  br label %bb8
+
+bb8:                                              ; preds = %bb8, %bb6
+  %i9 = phi ptr [ null, %bb8 ], [ null, %bb6 ]
+  %i11 = icmp eq ptr %i9, null
+  br i1 %i11, label %bb12, label %bb8
+
+bb12:                                             ; preds = %bb8, %bb6
+  ret void
+}
diff --git a/llvm/test/Analysis/ScalarEvolution/mul-udiv-folds.ll b/llvm/test/Analysis/ScalarEvolution/mul-udiv-folds.ll
index 1d34706..8dd8ec4 100644
--- a/llvm/test/Analysis/ScalarEvolution/mul-udiv-folds.ll
+++ b/llvm/test/Analysis/ScalarEvolution/mul-udiv-folds.ll
@@ -21,7 +21,7 @@ define void @udiv4_and_udiv2(i1 %c, ptr %A) {
 ; CHECK-NEXT:    %gep.8 = getelementptr i8, ptr %A, i64 %iv
 ; CHECK-NEXT:    --> {(((zext i32 %start to i64) /u 4) + %A),+,1}<%loop> U: full-set S: full-set Exits: (((zext i32 %start to i64) /u 2) + %A) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %gep.16 = getelementptr i16, ptr %A, i64 %iv
-; CHECK-NEXT:    --> {(((zext i32 %start to i64) /u 2) + %A),+,2}<%loop> U: full-set S: full-set Exits: ((zext i32 %start to i64) + %A) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {((2 * ((zext i32 %start to i64) /u 4))<nuw><nsw> + %A),+,2}<%loop> U: full-set S: full-set Exits: ((zext i32 %start to i64) + %A) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %gep.32 = getelementptr i32, ptr %A, i64 %iv
 ; CHECK-NEXT:    --> {((zext i32 %start to i64) + %A),+,4}<%loop> U: full-set S: full-set Exits: ((2 * (zext i32 %start to i64))<nuw><nsw> + %A) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %gep.40 = getelementptr <{ i32, i8 }>, ptr %A, i64 %iv
diff --git a/llvm/test/Analysis/TypeBasedAliasAnalysis/dse.ll b/llvm/test/Analysis/TypeBasedAliasAnalysis/dse.ll
index 50ea191..5f04f12 100644
--- a/llvm/test/Analysis/TypeBasedAliasAnalysis/dse.ll
+++ b/llvm/test/Analysis/TypeBasedAliasAnalysis/dse.ll
@@ -1,14 +1,14 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt < %s -aa-pipeline=tbaa,basic-aa -passes=dse -S | FileCheck %s
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
 ; DSE should make use of TBAA.
 
 define i8 @test0_yes(ptr %a, ptr %b) nounwind {
-; CHECK-LABEL: define i8 @test0_yes
-; CHECK-SAME: (ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT:    [[Y:%.*]] = load i8, ptr [[B]], align 1, !tbaa [[TBAA0:![0-9]+]]
-; CHECK-NEXT:    store i8 1, ptr [[A]], align 1, !tbaa [[TBAA3:![0-9]+]]
+; CHECK-LABEL: define i8 @test0_yes(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    [[Y:%.*]] = load i8, ptr [[B]], align 1, !tbaa [[BAR_TBAA0:![0-9]+]]
+; CHECK-NEXT:    store i8 1, ptr [[A]], align 1, !tbaa [[FOO_TBAA3:![0-9]+]]
 ; CHECK-NEXT:    ret i8 [[Y]]
 ;
   store i8 0, ptr %a, !tbaa !1
@@ -18,11 +18,11 @@ define i8 @test0_yes(ptr %a, ptr %b) nounwind {
 }
 
 define i8 @test0_no(ptr %a, ptr %b) nounwind {
-; CHECK-LABEL: define i8 @test0_no
-; CHECK-SAME: (ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    store i8 0, ptr [[A]], align 1, !tbaa [[TBAA3]]
-; CHECK-NEXT:    [[Y:%.*]] = load i8, ptr [[B]], align 1, !tbaa [[TBAA5:![0-9]+]]
-; CHECK-NEXT:    store i8 1, ptr [[A]], align 1, !tbaa [[TBAA3]]
+; CHECK-LABEL: define i8 @test0_no(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    store i8 0, ptr [[A]], align 1, !tbaa [[FOO_TBAA3]]
+; CHECK-NEXT:    [[Y:%.*]] = load i8, ptr [[B]], align 1, !tbaa [[BAR_TBAA5:![0-9]+]]
+; CHECK-NEXT:    store i8 1, ptr [[A]], align 1, !tbaa [[FOO_TBAA3]]
 ; CHECK-NEXT:    ret i8 [[Y]]
 ;
   store i8 0, ptr %a, !tbaa !3
@@ -32,9 +32,9 @@ define i8 @test0_no(ptr %a, ptr %b) nounwind {
 }
 
 define i8 @test1_yes(ptr %a, ptr %b) nounwind {
-; CHECK-LABEL: define i8 @test1_yes
-; CHECK-SAME: (ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[Y:%.*]] = load i8, ptr [[B]], align 1, !tbaa [[TBAA8:![0-9]+]]
+; CHECK-LABEL: define i8 @test1_yes(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[Y:%.*]] = load i8, ptr [[B]], align 1, !tbaa [[QUX_TBAA8:![0-9]+]]
 ; CHECK-NEXT:    store i8 1, ptr [[A]], align 1
 ; CHECK-NEXT:    ret i8 [[Y]]
 ;
@@ -45,10 +45,10 @@ define i8 @test1_yes(ptr %a, ptr %b) nounwind {
 }
 
 define i8 @test1_no(ptr %a, ptr %b) nounwind {
-; CHECK-LABEL: define i8 @test1_no
-; CHECK-SAME: (ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-LABEL: define i8 @test1_no(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    store i8 0, ptr [[A]], align 1
-; CHECK-NEXT:    [[Y:%.*]] = load i8, ptr [[B]], align 1, !tbaa [[TBAA10:![0-9]+]]
+; CHECK-NEXT:    [[Y:%.*]] = load i8, ptr [[B]], align 1, !tbaa [[QUX_TBAA10:![0-9]+]]
 ; CHECK-NEXT:    store i8 1, ptr [[A]], align 1
 ; CHECK-NEXT:    ret i8 [[Y]]
 ;
@@ -80,3 +80,16 @@ define i8 @test1_no(ptr %a, ptr %b) nounwind {
 !10 = !{ !"bar", !12}
 !11 = !{ !"qux", !0}
 !12 = !{!"different"}
+;.
+; CHECK: [[BAR_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
+; CHECK: [[META1]] = !{!"bar", [[META2:![0-9]+]]}
+; CHECK: [[META2]] = !{}
+; CHECK: [[FOO_TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0}
+; CHECK: [[META4]] = !{!"foo", [[META2]]}
+; CHECK: [[BAR_TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0}
+; CHECK: [[META6]] = !{!"bar", [[META7:![0-9]+]]}
+; CHECK: [[META7]] = !{!"different"}
+; CHECK: [[QUX_TBAA8]] = !{[[META9:![0-9]+]], [[META9]], i64 0, i1 true}
+; CHECK: [[META9]] = !{!"qux", [[META2]]}
+; CHECK: [[QUX_TBAA10]] = !{[[META9]], [[META9]], i64 0, i1 false}
+;.
diff --git a/llvm/test/Analysis/TypeBasedAliasAnalysis/gvn-nonlocal-type-mismatch.ll b/llvm/test/Analysis/TypeBasedAliasAnalysis/gvn-nonlocal-type-mismatch.ll
index d896a1b..685c015 100644
--- a/llvm/test/Analysis/TypeBasedAliasAnalysis/gvn-nonlocal-type-mismatch.ll
+++ b/llvm/test/Analysis/TypeBasedAliasAnalysis/gvn-nonlocal-type-mismatch.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt -aa-pipeline=tbaa,basic-aa -passes=gvn -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-MEMDEP
 ; RUN: opt -aa-pipeline=tbaa,basic-aa -passes='gvn<memoryssa>' -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-MEMSSA
 
@@ -11,8 +11,8 @@ define void @yes(i1 %c, ptr %p, ptr %p1, ptr %q) nounwind {
 ; CHECK-MEMDEP-LABEL: define void @yes(
 ; CHECK-MEMDEP-SAME: i1 [[C:%.*]], ptr [[P:%.*]], ptr [[P1:%.*]], ptr [[Q:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK-MEMDEP-NEXT:  [[ENTRY:.*:]]
-; CHECK-MEMDEP-NEXT:    store i32 0, ptr [[P]], align 4, !tbaa [[TBAA0:![0-9]+]]
-; CHECK-MEMDEP-NEXT:    store i32 1, ptr [[P1]], align 4, !tbaa [[TBAA3:![0-9]+]]
+; CHECK-MEMDEP-NEXT:    store i32 0, ptr [[P]], align 4, !tbaa [[RED_TBAA0:![0-9]+]]
+; CHECK-MEMDEP-NEXT:    store i32 1, ptr [[P1]], align 4, !tbaa [[BLU_TBAA3:![0-9]+]]
 ; CHECK-MEMDEP-NEXT:    br i1 [[C]], label %[[IF_ELSE:.*]], label %[[IF_THEN:.*]]
 ; CHECK-MEMDEP:       [[IF_THEN]]:
 ; CHECK-MEMDEP-NEXT:    store i32 0, ptr [[Q]], align 4
@@ -23,11 +23,11 @@ define void @yes(i1 %c, ptr %p, ptr %p1, ptr %q) nounwind {
 ; CHECK-MEMSSA-LABEL: define void @yes(
 ; CHECK-MEMSSA-SAME: i1 [[C:%.*]], ptr [[P:%.*]], ptr [[P1:%.*]], ptr [[Q:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK-MEMSSA-NEXT:  [[ENTRY:.*:]]
-; CHECK-MEMSSA-NEXT:    store i32 0, ptr [[P]], align 4, !tbaa [[TBAA0:![0-9]+]]
-; CHECK-MEMSSA-NEXT:    store i32 1, ptr [[P1]], align 4, !tbaa [[TBAA3:![0-9]+]]
+; CHECK-MEMSSA-NEXT:    store i32 0, ptr [[P]], align 4, !tbaa [[RED_TBAA0:![0-9]+]]
+; CHECK-MEMSSA-NEXT:    store i32 1, ptr [[P1]], align 4, !tbaa [[BLU_TBAA3:![0-9]+]]
 ; CHECK-MEMSSA-NEXT:    br i1 [[C]], label %[[IF_ELSE:.*]], label %[[IF_THEN:.*]]
 ; CHECK-MEMSSA:       [[IF_THEN]]:
-; CHECK-MEMSSA-NEXT:    [[T:%.*]] = load i32, ptr [[P]], align 4, !tbaa [[TBAA0]]
+; CHECK-MEMSSA-NEXT:    [[T:%.*]] = load i32, ptr [[P]], align 4, !tbaa [[RED_TBAA0]]
 ; CHECK-MEMSSA-NEXT:    store i32 [[T]], ptr [[Q]], align 4
 ; CHECK-MEMSSA-NEXT:    ret void
 ; CHECK-MEMSSA:       [[IF_ELSE]]:
@@ -56,15 +56,15 @@ define void @watch_out_for_type_change(i1 %c, ptr %p, ptr %p1, ptr %q) nounwind
 ; CHECK-LABEL: define void @watch_out_for_type_change(
 ; CHECK-SAME: i1 [[C:%.*]], ptr [[P:%.*]], ptr [[P1:%.*]], ptr [[Q:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    store i32 0, ptr [[P]], align 4, !tbaa [[TBAA0:![0-9]+]]
-; CHECK-NEXT:    store i32 1, ptr [[P1]], align 4, !tbaa [[TBAA3:![0-9]+]]
+; CHECK-NEXT:    store i32 0, ptr [[P]], align 4, !tbaa [[RED_TBAA0:![0-9]+]]
+; CHECK-NEXT:    store i32 1, ptr [[P1]], align 4, !tbaa [[BLU_TBAA3:![0-9]+]]
 ; CHECK-NEXT:    br i1 [[C]], label %[[IF_ELSE:.*]], label %[[IF_THEN:.*]]
 ; CHECK:       [[IF_THEN]]:
-; CHECK-NEXT:    [[T:%.*]] = load i32, ptr [[P]], align 4, !tbaa [[TBAA5:![0-9]+]]
+; CHECK-NEXT:    [[T:%.*]] = load i32, ptr [[P]], align 4, !tbaa [[OUTER_SPACE_TBAA5:![0-9]+]]
 ; CHECK-NEXT:    store i32 [[T]], ptr [[Q]], align 4
 ; CHECK-NEXT:    ret void
 ; CHECK:       [[IF_ELSE]]:
-; CHECK-NEXT:    [[U:%.*]] = load i32, ptr [[P]], align 4, !tbaa [[TBAA8:![0-9]+]]
+; CHECK-NEXT:    [[U:%.*]] = load i32, ptr [[P]], align 4, !tbaa [[BRICK_RED_TBAA8:![0-9]+]]
 ; CHECK-NEXT:    store i32 [[U]], ptr [[Q]], align 4
 ; CHECK-NEXT:    ret void
 ;
@@ -91,29 +91,29 @@ define void @watch_out_for_another_type_change(i1 %c, ptr %p, ptr %p1, ptr %q) n
 ; CHECK-MEMDEP-LABEL: define void @watch_out_for_another_type_change(
 ; CHECK-MEMDEP-SAME: i1 [[C:%.*]], ptr [[P:%.*]], ptr [[P1:%.*]], ptr [[Q:%.*]]) #[[ATTR0]] {
 ; CHECK-MEMDEP-NEXT:  [[ENTRY:.*:]]
-; CHECK-MEMDEP-NEXT:    store i32 0, ptr [[P]], align 4, !tbaa [[TBAA0]]
-; CHECK-MEMDEP-NEXT:    store i32 1, ptr [[P1]], align 4, !tbaa [[TBAA3]]
+; CHECK-MEMDEP-NEXT:    store i32 0, ptr [[P]], align 4, !tbaa [[RED_TBAA0]]
+; CHECK-MEMDEP-NEXT:    store i32 1, ptr [[P1]], align 4, !tbaa [[BLU_TBAA3]]
 ; CHECK-MEMDEP-NEXT:    br i1 [[C]], label %[[IF_ELSE:.*]], label %[[IF_THEN:.*]]
 ; CHECK-MEMDEP:       [[IF_THEN]]:
 ; CHECK-MEMDEP-NEXT:    store i32 0, ptr [[Q]], align 4
 ; CHECK-MEMDEP-NEXT:    ret void
 ; CHECK-MEMDEP:       [[IF_ELSE]]:
-; CHECK-MEMDEP-NEXT:    [[U:%.*]] = load i32, ptr [[P]], align 4, !tbaa [[TBAA5]]
+; CHECK-MEMDEP-NEXT:    [[U:%.*]] = load i32, ptr [[P]], align 4, !tbaa [[OUTER_SPACE_TBAA5]]
 ; CHECK-MEMDEP-NEXT:    store i32 [[U]], ptr [[Q]], align 4
 ; CHECK-MEMDEP-NEXT:    ret void
 ;
 ; CHECK-MEMSSA-LABEL: define void @watch_out_for_another_type_change(
 ; CHECK-MEMSSA-SAME: i1 [[C:%.*]], ptr [[P:%.*]], ptr [[P1:%.*]], ptr [[Q:%.*]]) #[[ATTR0]] {
 ; CHECK-MEMSSA-NEXT:  [[ENTRY:.*:]]
-; CHECK-MEMSSA-NEXT:    store i32 0, ptr [[P]], align 4, !tbaa [[TBAA0]]
-; CHECK-MEMSSA-NEXT:    store i32 1, ptr [[P1]], align 4, !tbaa [[TBAA3]]
+; CHECK-MEMSSA-NEXT:    store i32 0, ptr [[P]], align 4, !tbaa [[RED_TBAA0]]
+; CHECK-MEMSSA-NEXT:    store i32 1, ptr [[P1]], align 4, !tbaa [[BLU_TBAA3]]
 ; CHECK-MEMSSA-NEXT:    br i1 [[C]], label %[[IF_ELSE:.*]], label %[[IF_THEN:.*]]
 ; CHECK-MEMSSA:       [[IF_THEN]]:
-; CHECK-MEMSSA-NEXT:    [[T:%.*]] = load i32, ptr [[P]], align 4, !tbaa [[TBAA8]]
+; CHECK-MEMSSA-NEXT:    [[T:%.*]] = load i32, ptr [[P]], align 4, !tbaa [[BRICK_RED_TBAA8]]
 ; CHECK-MEMSSA-NEXT:    store i32 [[T]], ptr [[Q]], align 4
 ; CHECK-MEMSSA-NEXT:    ret void
 ; CHECK-MEMSSA:       [[IF_ELSE]]:
-; CHECK-MEMSSA-NEXT:    [[U:%.*]] = load i32, ptr [[P]], align 4, !tbaa [[TBAA5]]
+; CHECK-MEMSSA-NEXT:    [[U:%.*]] = load i32, ptr [[P]], align 4, !tbaa [[OUTER_SPACE_TBAA5]]
 ; CHECK-MEMSSA-NEXT:    store i32 [[U]], ptr [[Q]], align 4
 ; CHECK-MEMSSA-NEXT:    ret void
 ;
@@ -144,25 +144,25 @@ if.else:
 !8 = !{!"brick red", !5}
 !9 = !{!"observable universe"}
 ;.
-; CHECK-MEMDEP: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
+; CHECK-MEMDEP: [[RED_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
 ; CHECK-MEMDEP: [[META1]] = !{!"red", [[META2:![0-9]+]]}
 ; CHECK-MEMDEP: [[META2]] = !{}
-; CHECK-MEMDEP: [[TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0}
+; CHECK-MEMDEP: [[BLU_TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0}
 ; CHECK-MEMDEP: [[META4]] = !{!"blu", [[META2]]}
-; CHECK-MEMDEP: [[TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0}
+; CHECK-MEMDEP: [[OUTER_SPACE_TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0}
 ; CHECK-MEMDEP: [[META6]] = !{!"outer space", [[META7:![0-9]+]]}
 ; CHECK-MEMDEP: [[META7]] = !{!"observable universe"}
-; CHECK-MEMDEP: [[TBAA8]] = !{[[META9:![0-9]+]], [[META9]], i64 0}
+; CHECK-MEMDEP: [[BRICK_RED_TBAA8]] = !{[[META9:![0-9]+]], [[META9]], i64 0}
 ; CHECK-MEMDEP: [[META9]] = !{!"brick red", [[META1]]}
 ;.
-; CHECK-MEMSSA: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
+; CHECK-MEMSSA: [[RED_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
 ; CHECK-MEMSSA: [[META1]] = !{!"red", [[META2:![0-9]+]]}
 ; CHECK-MEMSSA: [[META2]] = !{}
-; CHECK-MEMSSA: [[TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0}
+; CHECK-MEMSSA: [[BLU_TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0}
 ; CHECK-MEMSSA: [[META4]] = !{!"blu", [[META2]]}
-; CHECK-MEMSSA: [[TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0}
+; CHECK-MEMSSA: [[OUTER_SPACE_TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0}
 ; CHECK-MEMSSA: [[META6]] = !{!"outer space", [[META7:![0-9]+]]}
 ; CHECK-MEMSSA: [[META7]] = !{!"observable universe"}
-; CHECK-MEMSSA: [[TBAA8]] = !{[[META9:![0-9]+]], [[META9]], i64 0}
+; CHECK-MEMSSA: [[BRICK_RED_TBAA8]] = !{[[META9:![0-9]+]], [[META9]], i64 0}
 ; CHECK-MEMSSA: [[META9]] = !{!"brick red", [[META1]]}
 ;.
diff --git a/llvm/test/Analysis/TypeBasedAliasAnalysis/memcpyopt.ll b/llvm/test/Analysis/TypeBasedAliasAnalysis/memcpyopt.ll
index 47dd886..f605b51 100644
--- a/llvm/test/Analysis/TypeBasedAliasAnalysis/memcpyopt.ll
+++ b/llvm/test/Analysis/TypeBasedAliasAnalysis/memcpyopt.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt -S -aa-pipeline=tbaa,basic-aa -passes=memcpyopt,instcombine < %s | FileCheck %s
 
 target datalayout = "e-p:64:64:64"
@@ -7,10 +7,12 @@ target datalayout = "e-p:64:64:64"
 ; it has a TBAA tag which declares that it is unrelated.
 
 define void @foo(ptr nocapture %p, ptr nocapture %q, ptr nocapture %s) nounwind {
-; CHECK: @foo
-; CHECK-NEXT: tail call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) %p, ptr noundef nonnull align 1 dereferenceable(16) %q, i64 16, i1 false), !tbaa !0
-; CHECK-NEXT: store i8 2, ptr %s, align 1, !tbaa [[TAGA:!.*]]
-; CHECK-NEXT: ret void
+; CHECK-LABEL: define void @foo(
+; CHECK-SAME: ptr captures(none) [[P:%.*]], ptr captures(none) [[Q:%.*]], ptr captures(none) [[S:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    tail call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[P]], ptr noundef nonnull align 1 dereferenceable(16) [[Q]], i64 16, i1 false), !tbaa [[B_TBAA0:![0-9]+]]
+; CHECK-NEXT:    store i8 2, ptr [[S]], align 1, !tbaa [[A_TBAA3:![0-9]+]]
+; CHECK-NEXT:    ret void
+;
   tail call void @llvm.memcpy.p0.p0.i64(ptr %p, ptr %q, i64 16, i1 false), !tbaa !2
   store i8 2, ptr %s, align 1, !tbaa !1
   tail call void @llvm.memcpy.p0.p0.i64(ptr %q, ptr %p, i64 16, i1 false), !tbaa !2
@@ -19,10 +21,15 @@ define void @foo(ptr nocapture %p, ptr nocapture %q, ptr nocapture %s) nounwind
 
 declare void @llvm.memcpy.p0.p0.i64(ptr nocapture, ptr nocapture, i64, i1) nounwind
 
-; CHECK: [[TAGA]] = !{[[TYPEA:!.*]], [[TYPEA]], i64 0}
-; CHECK: [[TYPEA]] = !{!"A", !{{.*}}}
 !0 = !{!"tbaa root"}
 !1 = !{!3, !3, i64 0}
 !2 = !{!4, !4, i64 0}
 !3 = !{!"A", !0}
 !4 = !{!"B", !0}
+;.
+; CHECK: [[B_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
+; CHECK: [[META1]] = !{!"B", [[META2:![0-9]+]]}
+; CHECK: [[META2]] = !{!"tbaa root"}
+; CHECK: [[A_TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0}
+; CHECK: [[META4]] = !{!"A", [[META2]]}
+;.
diff --git a/llvm/test/Bitcode/upgrade-masked-keep-metadata.ll b/llvm/test/Bitcode/upgrade-masked-keep-metadata.ll
index 0bcdfed..a4667ab 100644
--- a/llvm/test/Bitcode/upgrade-masked-keep-metadata.ll
+++ b/llvm/test/Bitcode/upgrade-masked-keep-metadata.ll
@@ -1,9 +1,10 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt -S < %s | FileCheck %s
 define <4 x i32> @load(ptr nocapture readonly %a0) !dbg !8 {
-; CHECK-LABEL: @load(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[V0:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[A0:%.*]], i32 16, <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> undef), !dbg [[DBG19:![0-9]+]], !tbaa [[TBAA20:![0-9]+]]
+; CHECK-LABEL: define <4 x i32> @load(
+; CHECK-SAME: ptr readonly captures(none) [[A0:%.*]]) !dbg [[DBG8:![0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[V0:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[A0]], i32 16, <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> undef), !dbg [[DBG19:![0-9]+]], !tbaa [[CHAR_TBAA20:![0-9]+]]
 ; CHECK-NEXT:    ret <4 x i32> [[V0]], !dbg [[DBG23:![0-9]+]]
 ;
 entry:
@@ -12,9 +13,10 @@ entry:
 }
 
 define void @store(<4 x i32> %a0, ptr nocapture %a1) !dbg !24 {
-; CHECK-LABEL: @store(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    call void @llvm.masked.store.v4i32.p0(<4 x i32> [[A0:%.*]], ptr [[A1:%.*]], i32 16, <4 x i1> <i1 false, i1 true, i1 false, i1 true>), !dbg [[DBG30:![0-9]+]], !tbaa [[TBAA20]]
+; CHECK-LABEL: define void @store(
+; CHECK-SAME: <4 x i32> [[A0:%.*]], ptr captures(none) [[A1:%.*]]) !dbg [[DBG24:![0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    call void @llvm.masked.store.v4i32.p0(<4 x i32> [[A0]], ptr [[A1]], i32 16, <4 x i1> <i1 false, i1 true, i1 false, i1 true>), !dbg [[DBG30:![0-9]+]], !tbaa [[CHAR_TBAA20]]
 ; CHECK-NEXT:    ret void, !dbg [[DBG31:![0-9]+]]
 ;
 entry:
@@ -23,9 +25,10 @@ entry:
 }
 
 define <4 x i32> @gather(<4 x ptr> %a0) !dbg !32 {
-; CHECK-LABEL: @gather(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[V0:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[A0:%.*]], i32 16, <4 x i1> <i1 true, i1 true, i1 true, i1 false>, <4 x i32> undef), !dbg [[DBG35:![0-9]+]], !tbaa [[TBAA20]]
+; CHECK-LABEL: define <4 x i32> @gather(
+; CHECK-SAME: <4 x ptr> [[A0:%.*]]) !dbg [[DBG32:![0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[V0:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[A0]], i32 16, <4 x i1> <i1 true, i1 true, i1 true, i1 false>, <4 x i32> undef), !dbg [[DBG35:![0-9]+]], !tbaa [[CHAR_TBAA20]]
 ; CHECK-NEXT:    ret <4 x i32> [[V0]], !dbg [[DBG36:![0-9]+]]
 ;
 entry:
@@ -34,9 +37,10 @@ entry:
 }
 
 define void @scatter(<4 x i32> %a0, <4 x ptr> %a1) !dbg !37 {
-; CHECK-LABEL: @scatter(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> [[A0:%.*]], <4 x ptr> [[A1:%.*]], i32 16, <4 x i1> <i1 false, i1 true, i1 true, i1 true>), !dbg [[DBG41:![0-9]+]], !tbaa [[TBAA20]]
+; CHECK-LABEL: define void @scatter(
+; CHECK-SAME: <4 x i32> [[A0:%.*]], <4 x ptr> [[A1:%.*]]) !dbg [[DBG37:![0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> [[A0]], <4 x ptr> [[A1]], i32 16, <4 x i1> <i1 false, i1 true, i1 true, i1 true>), !dbg [[DBG41:![0-9]+]], !tbaa [[CHAR_TBAA20]]
 ; CHECK-NEXT:    ret void, !dbg [[DBG42:![0-9]+]]
 ;
 entry:
@@ -45,9 +49,10 @@ entry:
 }
 
 define <4 x i32> @expandload(ptr nocapture readonly %a0) !dbg !43 {
-; CHECK-LABEL: @expandload(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[V0:%.*]] = call <4 x i32> @llvm.masked.expandload.v4i32(ptr [[A0:%.*]], <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x i32> undef), !dbg [[DBG49:![0-9]+]], !tbaa [[TBAA50:![0-9]+]]
+; CHECK-LABEL: define <4 x i32> @expandload(
+; CHECK-SAME: ptr readonly captures(none) [[A0:%.*]]) !dbg [[DBG43:![0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[V0:%.*]] = call <4 x i32> @llvm.masked.expandload.v4i32(ptr [[A0]], <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x i32> undef), !dbg [[DBG49:![0-9]+]], !tbaa [[INT_TBAA50:![0-9]+]]
 ; CHECK-NEXT:    ret <4 x i32> [[V0]], !dbg [[DBG52:![0-9]+]]
 ;
 entry:
@@ -56,9 +61,10 @@ entry:
 }
 
 define void @compressstore(<4 x i32> %a0, ptr nocapture %a1) !dbg !53 {
-; CHECK-LABEL: @compressstore(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    call void @llvm.masked.compressstore.v4i32(<4 x i32> [[A0:%.*]], ptr [[A1:%.*]], <4 x i1> <i1 false, i1 false, i1 true, i1 true>), !dbg [[DBG59:![0-9]+]], !tbaa [[TBAA50]]
+; CHECK-LABEL: define void @compressstore(
+; CHECK-SAME: <4 x i32> [[A0:%.*]], ptr captures(none) [[A1:%.*]]) !dbg [[DBG53:![0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    call void @llvm.masked.compressstore.v4i32(<4 x i32> [[A0]], ptr [[A1]], <4 x i1> <i1 false, i1 false, i1 true, i1 true>), !dbg [[DBG59:![0-9]+]], !tbaa [[INT_TBAA50]]
 ; CHECK-NEXT:    ret void, !dbg [[DBG60:![0-9]+]]
 ;
 entry:
diff --git a/llvm/test/CodeGen/AArch64/fprcvt-cvtf.ll b/llvm/test/CodeGen/AArch64/fprcvt-cvtf.ll
index 9da6f58..3ea1a01 100644
--- a/llvm/test/CodeGen/AArch64/fprcvt-cvtf.ll
+++ b/llvm/test/CodeGen/AArch64/fprcvt-cvtf.ll
@@ -94,16 +94,10 @@ define double @scvtf_f64i32_neg(<4 x i32> %x) {
  ret double %conv
 }
 
-; This test does not give the indended result of scvtf d0, s0
-; This is due to the input being loaded as a 2 item vector and
-; therefore using vector inputs that do not match the pattern
-; This test will be fixed in a future revision
 define <1 x double> @scvtf_f64i32_simple(<1 x i32> %x) {
 ; CHECK-LABEL: scvtf_f64i32_simple:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sshll v0.2d, v0.2s, #0
-; CHECK-NEXT:    scvtf v0.2d, v0.2d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    scvtf d0, s0
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-NO-FPRCVT-LABEL: scvtf_f64i32_simple:
@@ -315,16 +309,10 @@ define double @ucvtf_f64i32_neg(<4 x i32> %x) {
  ret double %conv
 }
 
-; This test does not give the indended result of ucvtf d0, s0
-; This is due to the input being loaded as a 2 item vector and
-; therefore using vector inputs that do not match the pattern
-; This test will be fixed in a future revision
 define <1 x double> @ucvtf_f64i32_simple(<1 x i32> %x) {
 ; CHECK-LABEL: ucvtf_f64i32_simple:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ushll v0.2d, v0.2s, #0
-; CHECK-NEXT:    ucvtf v0.2d, v0.2d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ucvtf d0, s0
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-NO-FPRCVT-LABEL: ucvtf_f64i32_simple:
@@ -449,3 +437,24 @@ define <1 x float> @ucvtf_f32i64_simple(<1 x i64> %x) {
  %conv = uitofp <1 x i64> %x to <1 x float>
  ret <1 x float> %conv
 }
+
+define <1 x double> @uitofp_sext_v2i32_extract_lane0(<2 x i32> %x) {
+; CHECK-LABEL: uitofp_sext_v2i32_extract_lane0:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sshll v0.2d, v0.2s, #0
+; CHECK-NEXT:    ucvtf v0.2d, v0.2d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
+;
+; CHECK-NO-FPRCVT-LABEL: uitofp_sext_v2i32_extract_lane0:
+; CHECK-NO-FPRCVT:       // %bb.0:
+; CHECK-NO-FPRCVT-NEXT:    sshll v0.2d, v0.2s, #0
+; CHECK-NO-FPRCVT-NEXT:    ucvtf v0.2d, v0.2d
+; CHECK-NO-FPRCVT-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NO-FPRCVT-NEXT:    ret
+  %wide  = sext <2 x i32> %x to <2 x i64>
+  %fpv2  = uitofp <2 x i64> %wide to <2 x double>
+  %lane0 = shufflevector <2 x double> %fpv2, <2 x double> poison, <1 x i32> zeroinitializer
+  ret <1 x double> %lane0
+}
+
diff --git a/llvm/test/CodeGen/AArch64/sme-intrinsics-rdsvl.ll b/llvm/test/CodeGen/AArch64/sme-intrinsics-rdsvl.ll
index 5d10d7e..06c53d8 100644
--- a/llvm/test/CodeGen/AArch64/sme-intrinsics-rdsvl.ll
+++ b/llvm/test/CodeGen/AArch64/sme-intrinsics-rdsvl.ll
@@ -1,46 +1,89 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -verify-machineinstrs < %s | FileCheck %s
 
-define i64 @sme_cntsb() {
-; CHECK-LABEL: sme_cntsb:
+define i64 @cntsb() {
+; CHECK-LABEL: cntsb:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    rdsvl x0, #1
 ; CHECK-NEXT:    ret
-  %v = call i64 @llvm.aarch64.sme.cntsb()
-  ret i64 %v
+  %1 = call i64 @llvm.aarch64.sme.cntsd()
+  %res = shl nuw nsw i64 %1, 3
+  ret i64 %res
 }
 
-define i64 @sme_cntsh() {
-; CHECK-LABEL: sme_cntsh:
+define i64 @cntsh() {
+; CHECK-LABEL: cntsh:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    rdsvl x8, #1
 ; CHECK-NEXT:    lsr x0, x8, #1
 ; CHECK-NEXT:    ret
-  %v = call i64 @llvm.aarch64.sme.cntsh()
-  ret i64 %v
+  %1 = call i64 @llvm.aarch64.sme.cntsd()
+  %res = shl nuw nsw i64 %1, 2
+  ret i64 %res
 }
 
-define i64 @sme_cntsw() {
-; CHECK-LABEL: sme_cntsw:
+define i64 @cntsw() {
+; CHECK-LABEL: cntsw:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    rdsvl x8, #1
 ; CHECK-NEXT:    lsr x0, x8, #2
 ; CHECK-NEXT:    ret
-  %v = call i64 @llvm.aarch64.sme.cntsw()
-  ret i64 %v
+  %1 = call i64 @llvm.aarch64.sme.cntsd()
+  %res = shl nuw nsw i64 %1, 1
+  ret i64 %res
 }
 
-define i64 @sme_cntsd() {
-; CHECK-LABEL: sme_cntsd:
+define i64 @cntsd() {
+; CHECK-LABEL: cntsd:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    rdsvl x8, #1
 ; CHECK-NEXT:    lsr x0, x8, #3
 ; CHECK-NEXT:    ret
+  %res = call i64 @llvm.aarch64.sme.cntsd()
+  ret i64 %res
+}
+
+define i64 @sme_cntsb_mul() {
+; CHECK-LABEL: sme_cntsb_mul:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rdsvl x0, #4
+; CHECK-NEXT:    ret
+  %v = call i64 @llvm.aarch64.sme.cntsd()
+  %shl = shl nuw nsw i64 %v, 3
+  %res = mul nuw nsw i64 %shl, 4
+  ret i64 %res
+}
+
+define i64 @sme_cntsh_mul() {
+; CHECK-LABEL: sme_cntsh_mul:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rdsvl x0, #4
+; CHECK-NEXT:    ret
+  %v = call i64 @llvm.aarch64.sme.cntsd()
+  %shl = shl nuw nsw i64 %v, 2
+  %res = mul nuw nsw i64 %shl, 8
+  ret i64 %res
+}
+
+define i64 @sme_cntsw_mul() {
+; CHECK-LABEL: sme_cntsw_mul:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rdsvl x0, #4
+; CHECK-NEXT:    ret
+  %v = call i64 @llvm.aarch64.sme.cntsd()
+  %shl = shl nuw nsw i64 %v, 1
+  %res = mul nuw nsw i64 %shl, 16
+  ret i64 %res
+}
+
+define i64 @sme_cntsd_mul() {
+; CHECK-LABEL: sme_cntsd_mul:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rdsvl x0, #4
+; CHECK-NEXT:    ret
   %v = call i64 @llvm.aarch64.sme.cntsd()
-  ret i64 %v
+  %res = mul nuw nsw i64 %v, 32
+  ret i64 %res
 }
 
-declare i64 @llvm.aarch64.sme.cntsb()
-declare i64 @llvm.aarch64.sme.cntsh()
-declare i64 @llvm.aarch64.sme.cntsw()
 declare i64 @llvm.aarch64.sme.cntsd()
diff --git a/llvm/test/CodeGen/AArch64/sme-streaming-interface-remarks.ll b/llvm/test/CodeGen/AArch64/sme-streaming-interface-remarks.ll
index e1a474d..2806f86 100644
--- a/llvm/test/CodeGen/AArch64/sme-streaming-interface-remarks.ll
+++ b/llvm/test/CodeGen/AArch64/sme-streaming-interface-remarks.ll
@@ -76,14 +76,14 @@ entry:
   %Data1 = alloca <vscale x 16 x i8>, align 16
   %Data2 = alloca <vscale x 16 x i8>, align 16
   %Data3 = alloca <vscale x 16 x i8>, align 16
-  %0 = tail call i64 @llvm.aarch64.sme.cntsb()
+  %0 = tail call i64 @llvm.aarch64.sme.cntsd()
   call void @foo(ptr noundef nonnull %Data1, ptr noundef nonnull %Data2, ptr noundef nonnull %Data3, i64 noundef %0)
   %1 = load <vscale x 16 x i8>, ptr %Data1, align 16
   %vecext = extractelement <vscale x 16 x i8> %1, i64 0
   ret i8 %vecext
 }
 
-declare i64 @llvm.aarch64.sme.cntsb()
+declare i64 @llvm.aarch64.sme.cntsd()
 
 declare void @foo(ptr noundef, ptr noundef, ptr noundef, i64 noundef)
 
diff --git a/llvm/test/CodeGen/AArch64/sme-streaming-interface.ll b/llvm/test/CodeGen/AArch64/sme-streaming-interface.ll
index 8c4d57e..505a40c1 100644
--- a/llvm/test/CodeGen/AArch64/sme-streaming-interface.ll
+++ b/llvm/test/CodeGen/AArch64/sme-streaming-interface.ll
@@ -366,9 +366,10 @@ define i8 @call_to_non_streaming_pass_sve_objects(ptr nocapture noundef readnone
 ; CHECK-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill
 ; CHECK-NEXT:    stp x29, x30, [sp, #64] // 16-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-3
-; CHECK-NEXT:    rdsvl x3, #1
+; CHECK-NEXT:    rdsvl x8, #1
 ; CHECK-NEXT:    addvl x0, sp, #2
 ; CHECK-NEXT:    addvl x1, sp, #1
+; CHECK-NEXT:    lsr x3, x8, #3
 ; CHECK-NEXT:    mov x2, sp
 ; CHECK-NEXT:    smstop sm
 ; CHECK-NEXT:    bl foo
@@ -386,7 +387,7 @@ entry:
   %Data1 = alloca <vscale x 16 x i8>, align 16
   %Data2 = alloca <vscale x 16 x i8>, align 16
   %Data3 = alloca <vscale x 16 x i8>, align 16
-  %0 = tail call i64 @llvm.aarch64.sme.cntsb()
+  %0 = tail call i64 @llvm.aarch64.sme.cntsd()
   call void @foo(ptr noundef nonnull %Data1, ptr noundef nonnull %Data2, ptr noundef nonnull %Data3, i64 noundef %0)
   %1 = load <vscale x 16 x i8>, ptr %Data1, align 16
   %vecext = extractelement <vscale x 16 x i8> %1, i64 0
@@ -421,7 +422,7 @@ entry:
   ret void
 }
 
-declare i64 @llvm.aarch64.sme.cntsb()
+declare i64 @llvm.aarch64.sme.cntsd()
 
 declare void @foo(ptr noundef, ptr noundef, ptr noundef, i64 noundef)
 declare void @bar(ptr noundef, i64 noundef, i64 noundef, i32 noundef, i32 noundef, float noundef, float noundef, double noundef, double noundef)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cluster.workgroup.id.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cluster.workgroup.id.ll
new file mode 100644
index 0000000..aa3b7b3
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cluster.workgroup.id.ll
@@ -0,0 +1,1258 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1250 %s -o - | FileCheck --check-prefixes=CHECK-UNKNOWN %s
+; RUN: llc -mtriple=amdgcn-unknown-mesa3d -mcpu=gfx1250 %s -o - | FileCheck -check-prefixes=CHECK-MESA3D %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1250 %s -o - | FileCheck --check-prefixes=CHECK-G-UNKNOWN %s
+; RUN: llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mcpu=gfx1250 %s -o - | FileCheck -check-prefixes=CHECK-G-MESA3D %s
+
+declare i32 @llvm.amdgcn.cluster.workgroup.id.x() #0
+declare i32 @llvm.amdgcn.cluster.workgroup.id.y() #0
+declare i32 @llvm.amdgcn.cluster.workgroup.id.z() #0
+
+define amdgpu_kernel void @test_workgroup_id_x(ptr addrspace(1) %out) #1 {
+; CHECK-UNKNOWN-LABEL: test_workgroup_id_x:
+; CHECK-UNKNOWN:       ; %bb.0:
+; CHECK-UNKNOWN-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; CHECK-UNKNOWN-NEXT:    s_and_b32 s2, ttmp6, 15
+; CHECK-UNKNOWN-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-UNKNOWN-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; CHECK-UNKNOWN-NEXT:    s_wait_kmcnt 0x0
+; CHECK-UNKNOWN-NEXT:    global_store_b32 v0, v1, s[0:1]
+; CHECK-UNKNOWN-NEXT:    s_endpgm
+;
+; CHECK-MESA3D-LABEL: test_workgroup_id_x:
+; CHECK-MESA3D:         .amd_kernel_code_t
+; CHECK-MESA3D-NEXT:     amd_code_version_major = 1
+; CHECK-MESA3D-NEXT:     amd_code_version_minor = 2
+; CHECK-MESA3D-NEXT:     amd_machine_kind = 1
+; CHECK-MESA3D-NEXT:     amd_machine_version_major = 12
+; CHECK-MESA3D-NEXT:     amd_machine_version_minor = 5
+; CHECK-MESA3D-NEXT:     amd_machine_version_stepping = 0
+; CHECK-MESA3D-NEXT:     kernel_code_entry_byte_offset = 256
+; CHECK-MESA3D-NEXT:     kernel_code_prefetch_byte_size = 0
+; CHECK-MESA3D-NEXT:     granulated_workitem_vgpr_count = 0
+; CHECK-MESA3D-NEXT:     granulated_wavefront_sgpr_count = 0
+; CHECK-MESA3D-NEXT:     priority = 0
+; CHECK-MESA3D-NEXT:     float_mode = 240
+; CHECK-MESA3D-NEXT:     priv = 0
+; CHECK-MESA3D-NEXT:     enable_dx10_clamp = 0
+; CHECK-MESA3D-NEXT:     debug_mode = 0
+; CHECK-MESA3D-NEXT:     enable_ieee_mode = 0
+; CHECK-MESA3D-NEXT:     enable_wgp_mode = 0
+; CHECK-MESA3D-NEXT:     enable_mem_ordered = 1
+; CHECK-MESA3D-NEXT:     enable_fwd_progress = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
+; CHECK-MESA3D-NEXT:     user_sgpr_count = 8
+; CHECK-MESA3D-NEXT:     enable_trap_handler = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_id_x = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_id_y = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_id_z = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_info = 0
+; CHECK-MESA3D-NEXT:     enable_vgpr_workitem_id = 2
+; CHECK-MESA3D-NEXT:     enable_exception_msb = 0
+; CHECK-MESA3D-NEXT:     granulated_lds_size = 0
+; CHECK-MESA3D-NEXT:     enable_exception = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_private_segment_buffer = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_dispatch_ptr = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_queue_ptr = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_dispatch_id = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_flat_scratch_init = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_private_segment_size = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; CHECK-MESA3D-NEXT:     enable_wavefront_size32 = 1
+; CHECK-MESA3D-NEXT:     enable_ordered_append_gds = 0
+; CHECK-MESA3D-NEXT:     private_element_size = 1
+; CHECK-MESA3D-NEXT:     is_ptr64 = 1
+; CHECK-MESA3D-NEXT:     is_dynamic_callstack = 0
+; CHECK-MESA3D-NEXT:     is_debug_enabled = 0
+; CHECK-MESA3D-NEXT:     is_xnack_enabled = 0
+; CHECK-MESA3D-NEXT:     workitem_private_segment_byte_size = 0
+; CHECK-MESA3D-NEXT:     workgroup_group_segment_byte_size = 0
+; CHECK-MESA3D-NEXT:     gds_segment_byte_size = 0
+; CHECK-MESA3D-NEXT:     kernarg_segment_byte_size = 24
+; CHECK-MESA3D-NEXT:     workgroup_fbarrier_count = 0
+; CHECK-MESA3D-NEXT:     wavefront_sgpr_count = 6
+; CHECK-MESA3D-NEXT:     workitem_vgpr_count = 2
+; CHECK-MESA3D-NEXT:     reserved_vgpr_first = 0
+; CHECK-MESA3D-NEXT:     reserved_vgpr_count = 0
+; CHECK-MESA3D-NEXT:     reserved_sgpr_first = 0
+; CHECK-MESA3D-NEXT:     reserved_sgpr_count = 0
+; CHECK-MESA3D-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; CHECK-MESA3D-NEXT:     debug_private_segment_buffer_sgpr = 0
+; CHECK-MESA3D-NEXT:     kernarg_segment_alignment = 4
+; CHECK-MESA3D-NEXT:     group_segment_alignment = 4
+; CHECK-MESA3D-NEXT:     private_segment_alignment = 4
+; CHECK-MESA3D-NEXT:     wavefront_size = 5
+; CHECK-MESA3D-NEXT:     call_convention = -1
+; CHECK-MESA3D-NEXT:     runtime_loader_kernel_symbol = 0
+; CHECK-MESA3D-NEXT:    .end_amd_kernel_code_t
+; CHECK-MESA3D-NEXT:  ; %bb.0:
+; CHECK-MESA3D-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-MESA3D-NEXT:    s_and_b32 s2, ttmp6, 15
+; CHECK-MESA3D-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-MESA3D-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; CHECK-MESA3D-NEXT:    s_wait_kmcnt 0x0
+; CHECK-MESA3D-NEXT:    global_store_b32 v0, v1, s[0:1]
+; CHECK-MESA3D-NEXT:    s_endpgm
+;
+; CHECK-G-UNKNOWN-LABEL: test_workgroup_id_x:
+; CHECK-G-UNKNOWN:       ; %bb.0:
+; CHECK-G-UNKNOWN-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; CHECK-G-UNKNOWN-NEXT:    s_and_b32 s2, ttmp6, 15
+; CHECK-G-UNKNOWN-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-G-UNKNOWN-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
+; CHECK-G-UNKNOWN-NEXT:    s_wait_kmcnt 0x0
+; CHECK-G-UNKNOWN-NEXT:    global_store_b32 v1, v0, s[0:1]
+; CHECK-G-UNKNOWN-NEXT:    s_endpgm
+;
+; CHECK-G-MESA3D-LABEL: test_workgroup_id_x:
+; CHECK-G-MESA3D:         .amd_kernel_code_t
+; CHECK-G-MESA3D-NEXT:     amd_code_version_major = 1
+; CHECK-G-MESA3D-NEXT:     amd_code_version_minor = 2
+; CHECK-G-MESA3D-NEXT:     amd_machine_kind = 1
+; CHECK-G-MESA3D-NEXT:     amd_machine_version_major = 12
+; CHECK-G-MESA3D-NEXT:     amd_machine_version_minor = 5
+; CHECK-G-MESA3D-NEXT:     amd_machine_version_stepping = 0
+; CHECK-G-MESA3D-NEXT:     kernel_code_entry_byte_offset = 256
+; CHECK-G-MESA3D-NEXT:     kernel_code_prefetch_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     granulated_workitem_vgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     granulated_wavefront_sgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     priority = 0
+; CHECK-G-MESA3D-NEXT:     float_mode = 240
+; CHECK-G-MESA3D-NEXT:     priv = 0
+; CHECK-G-MESA3D-NEXT:     enable_dx10_clamp = 0
+; CHECK-G-MESA3D-NEXT:     debug_mode = 0
+; CHECK-G-MESA3D-NEXT:     enable_ieee_mode = 0
+; CHECK-G-MESA3D-NEXT:     enable_wgp_mode = 0
+; CHECK-G-MESA3D-NEXT:     enable_mem_ordered = 1
+; CHECK-G-MESA3D-NEXT:     enable_fwd_progress = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
+; CHECK-G-MESA3D-NEXT:     user_sgpr_count = 8
+; CHECK-G-MESA3D-NEXT:     enable_trap_handler = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_id_x = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_id_y = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_id_z = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_info = 0
+; CHECK-G-MESA3D-NEXT:     enable_vgpr_workitem_id = 2
+; CHECK-G-MESA3D-NEXT:     enable_exception_msb = 0
+; CHECK-G-MESA3D-NEXT:     granulated_lds_size = 0
+; CHECK-G-MESA3D-NEXT:     enable_exception = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_private_segment_buffer = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_dispatch_ptr = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_queue_ptr = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_dispatch_id = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_flat_scratch_init = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_private_segment_size = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; CHECK-G-MESA3D-NEXT:     enable_wavefront_size32 = 1
+; CHECK-G-MESA3D-NEXT:     enable_ordered_append_gds = 0
+; CHECK-G-MESA3D-NEXT:     private_element_size = 1
+; CHECK-G-MESA3D-NEXT:     is_ptr64 = 1
+; CHECK-G-MESA3D-NEXT:     is_dynamic_callstack = 0
+; CHECK-G-MESA3D-NEXT:     is_debug_enabled = 0
+; CHECK-G-MESA3D-NEXT:     is_xnack_enabled = 0
+; CHECK-G-MESA3D-NEXT:     workitem_private_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     workgroup_group_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     gds_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     kernarg_segment_byte_size = 24
+; CHECK-G-MESA3D-NEXT:     workgroup_fbarrier_count = 0
+; CHECK-G-MESA3D-NEXT:     wavefront_sgpr_count = 6
+; CHECK-G-MESA3D-NEXT:     workitem_vgpr_count = 2
+; CHECK-G-MESA3D-NEXT:     reserved_vgpr_first = 0
+; CHECK-G-MESA3D-NEXT:     reserved_vgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     reserved_sgpr_first = 0
+; CHECK-G-MESA3D-NEXT:     reserved_sgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; CHECK-G-MESA3D-NEXT:     debug_private_segment_buffer_sgpr = 0
+; CHECK-G-MESA3D-NEXT:     kernarg_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT:     group_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT:     private_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT:     wavefront_size = 5
+; CHECK-G-MESA3D-NEXT:     call_convention = -1
+; CHECK-G-MESA3D-NEXT:     runtime_loader_kernel_symbol = 0
+; CHECK-G-MESA3D-NEXT:    .end_amd_kernel_code_t
+; CHECK-G-MESA3D-NEXT:  ; %bb.0:
+; CHECK-G-MESA3D-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-G-MESA3D-NEXT:    s_and_b32 s2, ttmp6, 15
+; CHECK-G-MESA3D-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-G-MESA3D-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
+; CHECK-G-MESA3D-NEXT:    s_wait_kmcnt 0x0
+; CHECK-G-MESA3D-NEXT:    global_store_b32 v1, v0, s[0:1]
+; CHECK-G-MESA3D-NEXT:    s_endpgm
+  %id = call i32 @llvm.amdgcn.cluster.workgroup.id.x()
+  store i32 %id, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @test_workgroup_id_x_optimized(ptr addrspace(1) %out) "amdgpu-cluster-dims"="1,2,2" {
+; CHECK-UNKNOWN-LABEL: test_workgroup_id_x_optimized:
+; CHECK-UNKNOWN:       ; %bb.0:
+; CHECK-UNKNOWN-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; CHECK-UNKNOWN-NEXT:    v_mov_b32_e32 v0, 0
+; CHECK-UNKNOWN-NEXT:    s_wait_kmcnt 0x0
+; CHECK-UNKNOWN-NEXT:    global_store_b32 v0, v0, s[0:1]
+; CHECK-UNKNOWN-NEXT:    s_endpgm
+;
+; CHECK-MESA3D-LABEL: test_workgroup_id_x_optimized:
+; CHECK-MESA3D:         .amd_kernel_code_t
+; CHECK-MESA3D-NEXT:     amd_code_version_major = 1
+; CHECK-MESA3D-NEXT:     amd_code_version_minor = 2
+; CHECK-MESA3D-NEXT:     amd_machine_kind = 1
+; CHECK-MESA3D-NEXT:     amd_machine_version_major = 12
+; CHECK-MESA3D-NEXT:     amd_machine_version_minor = 5
+; CHECK-MESA3D-NEXT:     amd_machine_version_stepping = 0
+; CHECK-MESA3D-NEXT:     kernel_code_entry_byte_offset = 256
+; CHECK-MESA3D-NEXT:     kernel_code_prefetch_byte_size = 0
+; CHECK-MESA3D-NEXT:     granulated_workitem_vgpr_count = 0
+; CHECK-MESA3D-NEXT:     granulated_wavefront_sgpr_count = 0
+; CHECK-MESA3D-NEXT:     priority = 0
+; CHECK-MESA3D-NEXT:     float_mode = 240
+; CHECK-MESA3D-NEXT:     priv = 0
+; CHECK-MESA3D-NEXT:     enable_dx10_clamp = 0
+; CHECK-MESA3D-NEXT:     debug_mode = 0
+; CHECK-MESA3D-NEXT:     enable_ieee_mode = 0
+; CHECK-MESA3D-NEXT:     enable_wgp_mode = 0
+; CHECK-MESA3D-NEXT:     enable_mem_ordered = 1
+; CHECK-MESA3D-NEXT:     enable_fwd_progress = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
+; CHECK-MESA3D-NEXT:     user_sgpr_count = 8
+; CHECK-MESA3D-NEXT:     enable_trap_handler = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_id_x = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_id_y = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_id_z = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_info = 0
+; CHECK-MESA3D-NEXT:     enable_vgpr_workitem_id = 2
+; CHECK-MESA3D-NEXT:     enable_exception_msb = 0
+; CHECK-MESA3D-NEXT:     granulated_lds_size = 0
+; CHECK-MESA3D-NEXT:     enable_exception = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_private_segment_buffer = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_dispatch_ptr = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_queue_ptr = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_dispatch_id = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_flat_scratch_init = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_private_segment_size = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; CHECK-MESA3D-NEXT:     enable_wavefront_size32 = 1
+; CHECK-MESA3D-NEXT:     enable_ordered_append_gds = 0
+; CHECK-MESA3D-NEXT:     private_element_size = 1
+; CHECK-MESA3D-NEXT:     is_ptr64 = 1
+; CHECK-MESA3D-NEXT:     is_dynamic_callstack = 0
+; CHECK-MESA3D-NEXT:     is_debug_enabled = 0
+; CHECK-MESA3D-NEXT:     is_xnack_enabled = 0
+; CHECK-MESA3D-NEXT:     workitem_private_segment_byte_size = 0
+; CHECK-MESA3D-NEXT:     workgroup_group_segment_byte_size = 0
+; CHECK-MESA3D-NEXT:     gds_segment_byte_size = 0
+; CHECK-MESA3D-NEXT:     kernarg_segment_byte_size = 24
+; CHECK-MESA3D-NEXT:     workgroup_fbarrier_count = 0
+; CHECK-MESA3D-NEXT:     wavefront_sgpr_count = 6
+; CHECK-MESA3D-NEXT:     workitem_vgpr_count = 1
+; CHECK-MESA3D-NEXT:     reserved_vgpr_first = 0
+; CHECK-MESA3D-NEXT:     reserved_vgpr_count = 0
+; CHECK-MESA3D-NEXT:     reserved_sgpr_first = 0
+; CHECK-MESA3D-NEXT:     reserved_sgpr_count = 0
+; CHECK-MESA3D-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; CHECK-MESA3D-NEXT:     debug_private_segment_buffer_sgpr = 0
+; CHECK-MESA3D-NEXT:     kernarg_segment_alignment = 4
+; CHECK-MESA3D-NEXT:     group_segment_alignment = 4
+; CHECK-MESA3D-NEXT:     private_segment_alignment = 4
+; CHECK-MESA3D-NEXT:     wavefront_size = 5
+; CHECK-MESA3D-NEXT:     call_convention = -1
+; CHECK-MESA3D-NEXT:     runtime_loader_kernel_symbol = 0
+; CHECK-MESA3D-NEXT:    .end_amd_kernel_code_t
+; CHECK-MESA3D-NEXT:  ; %bb.0:
+; CHECK-MESA3D-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-MESA3D-NEXT:    v_mov_b32_e32 v0, 0
+; CHECK-MESA3D-NEXT:    s_wait_kmcnt 0x0
+; CHECK-MESA3D-NEXT:    global_store_b32 v0, v0, s[0:1]
+; CHECK-MESA3D-NEXT:    s_endpgm
+;
+; CHECK-G-UNKNOWN-LABEL: test_workgroup_id_x_optimized:
+; CHECK-G-UNKNOWN:       ; %bb.0:
+; CHECK-G-UNKNOWN-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; CHECK-G-UNKNOWN-NEXT:    v_mov_b32_e32 v0, 0
+; CHECK-G-UNKNOWN-NEXT:    s_wait_kmcnt 0x0
+; CHECK-G-UNKNOWN-NEXT:    global_store_b32 v0, v0, s[0:1]
+; CHECK-G-UNKNOWN-NEXT:    s_endpgm
+;
+; CHECK-G-MESA3D-LABEL: test_workgroup_id_x_optimized:
+; CHECK-G-MESA3D:         .amd_kernel_code_t
+; CHECK-G-MESA3D-NEXT:     amd_code_version_major = 1
+; CHECK-G-MESA3D-NEXT:     amd_code_version_minor = 2
+; CHECK-G-MESA3D-NEXT:     amd_machine_kind = 1
+; CHECK-G-MESA3D-NEXT:     amd_machine_version_major = 12
+; CHECK-G-MESA3D-NEXT:     amd_machine_version_minor = 5
+; CHECK-G-MESA3D-NEXT:     amd_machine_version_stepping = 0
+; CHECK-G-MESA3D-NEXT:     kernel_code_entry_byte_offset = 256
+; CHECK-G-MESA3D-NEXT:     kernel_code_prefetch_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     granulated_workitem_vgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     granulated_wavefront_sgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     priority = 0
+; CHECK-G-MESA3D-NEXT:     float_mode = 240
+; CHECK-G-MESA3D-NEXT:     priv = 0
+; CHECK-G-MESA3D-NEXT:     enable_dx10_clamp = 0
+; CHECK-G-MESA3D-NEXT:     debug_mode = 0
+; CHECK-G-MESA3D-NEXT:     enable_ieee_mode = 0
+; CHECK-G-MESA3D-NEXT:     enable_wgp_mode = 0
+; CHECK-G-MESA3D-NEXT:     enable_mem_ordered = 1
+; CHECK-G-MESA3D-NEXT:     enable_fwd_progress = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
+; CHECK-G-MESA3D-NEXT:     user_sgpr_count = 8
+; CHECK-G-MESA3D-NEXT:     enable_trap_handler = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_id_x = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_id_y = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_id_z = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_info = 0
+; CHECK-G-MESA3D-NEXT:     enable_vgpr_workitem_id = 2
+; CHECK-G-MESA3D-NEXT:     enable_exception_msb = 0
+; CHECK-G-MESA3D-NEXT:     granulated_lds_size = 0
+; CHECK-G-MESA3D-NEXT:     enable_exception = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_private_segment_buffer = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_dispatch_ptr = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_queue_ptr = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_dispatch_id = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_flat_scratch_init = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_private_segment_size = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; CHECK-G-MESA3D-NEXT:     enable_wavefront_size32 = 1
+; CHECK-G-MESA3D-NEXT:     enable_ordered_append_gds = 0
+; CHECK-G-MESA3D-NEXT:     private_element_size = 1
+; CHECK-G-MESA3D-NEXT:     is_ptr64 = 1
+; CHECK-G-MESA3D-NEXT:     is_dynamic_callstack = 0
+; CHECK-G-MESA3D-NEXT:     is_debug_enabled = 0
+; CHECK-G-MESA3D-NEXT:     is_xnack_enabled = 0
+; CHECK-G-MESA3D-NEXT:     workitem_private_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     workgroup_group_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     gds_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     kernarg_segment_byte_size = 24
+; CHECK-G-MESA3D-NEXT:     workgroup_fbarrier_count = 0
+; CHECK-G-MESA3D-NEXT:     wavefront_sgpr_count = 6
+; CHECK-G-MESA3D-NEXT:     workitem_vgpr_count = 1
+; CHECK-G-MESA3D-NEXT:     reserved_vgpr_first = 0
+; CHECK-G-MESA3D-NEXT:     reserved_vgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     reserved_sgpr_first = 0
+; CHECK-G-MESA3D-NEXT:     reserved_sgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; CHECK-G-MESA3D-NEXT:     debug_private_segment_buffer_sgpr = 0
+; CHECK-G-MESA3D-NEXT:     kernarg_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT:     group_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT:     private_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT:     wavefront_size = 5
+; CHECK-G-MESA3D-NEXT:     call_convention = -1
+; CHECK-G-MESA3D-NEXT:     runtime_loader_kernel_symbol = 0
+; CHECK-G-MESA3D-NEXT:    .end_amd_kernel_code_t
+; CHECK-G-MESA3D-NEXT:  ; %bb.0:
+; CHECK-G-MESA3D-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-G-MESA3D-NEXT:    v_mov_b32_e32 v0, 0
+; CHECK-G-MESA3D-NEXT:    s_wait_kmcnt 0x0
+; CHECK-G-MESA3D-NEXT:    global_store_b32 v0, v0, s[0:1]
+; CHECK-G-MESA3D-NEXT:    s_endpgm
+  %id = call i32 @llvm.amdgcn.cluster.workgroup.id.x()
+  store i32 %id, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @test_workgroup_id_y(ptr addrspace(1) %out) #1 {
+; CHECK-UNKNOWN-LABEL: test_workgroup_id_y:
+; CHECK-UNKNOWN:       ; %bb.0:
+; CHECK-UNKNOWN-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; CHECK-UNKNOWN-NEXT:    s_bfe_u32 s2, ttmp6, 0x40004
+; CHECK-UNKNOWN-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-UNKNOWN-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; CHECK-UNKNOWN-NEXT:    s_wait_kmcnt 0x0
+; CHECK-UNKNOWN-NEXT:    global_store_b32 v0, v1, s[0:1]
+; CHECK-UNKNOWN-NEXT:    s_endpgm
+;
+; CHECK-MESA3D-LABEL: test_workgroup_id_y:
+; CHECK-MESA3D:         .amd_kernel_code_t
+; CHECK-MESA3D-NEXT:     amd_code_version_major = 1
+; CHECK-MESA3D-NEXT:     amd_code_version_minor = 2
+; CHECK-MESA3D-NEXT:     amd_machine_kind = 1
+; CHECK-MESA3D-NEXT:     amd_machine_version_major = 12
+; CHECK-MESA3D-NEXT:     amd_machine_version_minor = 5
+; CHECK-MESA3D-NEXT:     amd_machine_version_stepping = 0
+; CHECK-MESA3D-NEXT:     kernel_code_entry_byte_offset = 256
+; CHECK-MESA3D-NEXT:     kernel_code_prefetch_byte_size = 0
+; CHECK-MESA3D-NEXT:     granulated_workitem_vgpr_count = 0
+; CHECK-MESA3D-NEXT:     granulated_wavefront_sgpr_count = 0
+; CHECK-MESA3D-NEXT:     priority = 0
+; CHECK-MESA3D-NEXT:     float_mode = 240
+; CHECK-MESA3D-NEXT:     priv = 0
+; CHECK-MESA3D-NEXT:     enable_dx10_clamp = 0
+; CHECK-MESA3D-NEXT:     debug_mode = 0
+; CHECK-MESA3D-NEXT:     enable_ieee_mode = 0
+; CHECK-MESA3D-NEXT:     enable_wgp_mode = 0
+; CHECK-MESA3D-NEXT:     enable_mem_ordered = 1
+; CHECK-MESA3D-NEXT:     enable_fwd_progress = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
+; CHECK-MESA3D-NEXT:     user_sgpr_count = 8
+; CHECK-MESA3D-NEXT:     enable_trap_handler = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_id_x = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_id_y = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_id_z = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_info = 0
+; CHECK-MESA3D-NEXT:     enable_vgpr_workitem_id = 2
+; CHECK-MESA3D-NEXT:     enable_exception_msb = 0
+; CHECK-MESA3D-NEXT:     granulated_lds_size = 0
+; CHECK-MESA3D-NEXT:     enable_exception = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_private_segment_buffer = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_dispatch_ptr = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_queue_ptr = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_dispatch_id = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_flat_scratch_init = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_private_segment_size = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; CHECK-MESA3D-NEXT:     enable_wavefront_size32 = 1
+; CHECK-MESA3D-NEXT:     enable_ordered_append_gds = 0
+; CHECK-MESA3D-NEXT:     private_element_size = 1
+; CHECK-MESA3D-NEXT:     is_ptr64 = 1
+; CHECK-MESA3D-NEXT:     is_dynamic_callstack = 0
+; CHECK-MESA3D-NEXT:     is_debug_enabled = 0
+; CHECK-MESA3D-NEXT:     is_xnack_enabled = 0
+; CHECK-MESA3D-NEXT:     workitem_private_segment_byte_size = 0
+; CHECK-MESA3D-NEXT:     workgroup_group_segment_byte_size = 0
+; CHECK-MESA3D-NEXT:     gds_segment_byte_size = 0
+; CHECK-MESA3D-NEXT:     kernarg_segment_byte_size = 24
+; CHECK-MESA3D-NEXT:     workgroup_fbarrier_count = 0
+; CHECK-MESA3D-NEXT:     wavefront_sgpr_count = 6
+; CHECK-MESA3D-NEXT:     workitem_vgpr_count = 2
+; CHECK-MESA3D-NEXT:     reserved_vgpr_first = 0
+; CHECK-MESA3D-NEXT:     reserved_vgpr_count = 0
+; CHECK-MESA3D-NEXT:     reserved_sgpr_first = 0
+; CHECK-MESA3D-NEXT:     reserved_sgpr_count = 0
+; CHECK-MESA3D-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; CHECK-MESA3D-NEXT:     debug_private_segment_buffer_sgpr = 0
+; CHECK-MESA3D-NEXT:     kernarg_segment_alignment = 4
+; CHECK-MESA3D-NEXT:     group_segment_alignment = 4
+; CHECK-MESA3D-NEXT:     private_segment_alignment = 4
+; CHECK-MESA3D-NEXT:     wavefront_size = 5
+; CHECK-MESA3D-NEXT:     call_convention = -1
+; CHECK-MESA3D-NEXT:     runtime_loader_kernel_symbol = 0
+; CHECK-MESA3D-NEXT:    .end_amd_kernel_code_t
+; CHECK-MESA3D-NEXT:  ; %bb.0:
+; CHECK-MESA3D-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-MESA3D-NEXT:    s_bfe_u32 s2, ttmp6, 0x40004
+; CHECK-MESA3D-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-MESA3D-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; CHECK-MESA3D-NEXT:    s_wait_kmcnt 0x0
+; CHECK-MESA3D-NEXT:    global_store_b32 v0, v1, s[0:1]
+; CHECK-MESA3D-NEXT:    s_endpgm
+;
+; CHECK-G-UNKNOWN-LABEL: test_workgroup_id_y:
+; CHECK-G-UNKNOWN:       ; %bb.0:
+; CHECK-G-UNKNOWN-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; CHECK-G-UNKNOWN-NEXT:    s_bfe_u32 s2, ttmp6, 0x40004
+; CHECK-G-UNKNOWN-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-G-UNKNOWN-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
+; CHECK-G-UNKNOWN-NEXT:    s_wait_kmcnt 0x0
+; CHECK-G-UNKNOWN-NEXT:    global_store_b32 v1, v0, s[0:1]
+; CHECK-G-UNKNOWN-NEXT:    s_endpgm
+;
+; CHECK-G-MESA3D-LABEL: test_workgroup_id_y:
+; CHECK-G-MESA3D:         .amd_kernel_code_t
+; CHECK-G-MESA3D-NEXT:     amd_code_version_major = 1
+; CHECK-G-MESA3D-NEXT:     amd_code_version_minor = 2
+; CHECK-G-MESA3D-NEXT:     amd_machine_kind = 1
+; CHECK-G-MESA3D-NEXT:     amd_machine_version_major = 12
+; CHECK-G-MESA3D-NEXT:     amd_machine_version_minor = 5
+; CHECK-G-MESA3D-NEXT:     amd_machine_version_stepping = 0
+; CHECK-G-MESA3D-NEXT:     kernel_code_entry_byte_offset = 256
+; CHECK-G-MESA3D-NEXT:     kernel_code_prefetch_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     granulated_workitem_vgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     granulated_wavefront_sgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     priority = 0
+; CHECK-G-MESA3D-NEXT:     float_mode = 240
+; CHECK-G-MESA3D-NEXT:     priv = 0
+; CHECK-G-MESA3D-NEXT:     enable_dx10_clamp = 0
+; CHECK-G-MESA3D-NEXT:     debug_mode = 0
+; CHECK-G-MESA3D-NEXT:     enable_ieee_mode = 0
+; CHECK-G-MESA3D-NEXT:     enable_wgp_mode = 0
+; CHECK-G-MESA3D-NEXT:     enable_mem_ordered = 1
+; CHECK-G-MESA3D-NEXT:     enable_fwd_progress = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
+; CHECK-G-MESA3D-NEXT:     user_sgpr_count = 8
+; CHECK-G-MESA3D-NEXT:     enable_trap_handler = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_id_x = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_id_y = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_id_z = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_info = 0
+; CHECK-G-MESA3D-NEXT:     enable_vgpr_workitem_id = 2
+; CHECK-G-MESA3D-NEXT:     enable_exception_msb = 0
+; CHECK-G-MESA3D-NEXT:     granulated_lds_size = 0
+; CHECK-G-MESA3D-NEXT:     enable_exception = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_private_segment_buffer = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_dispatch_ptr = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_queue_ptr = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_dispatch_id = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_flat_scratch_init = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_private_segment_size = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; CHECK-G-MESA3D-NEXT:     enable_wavefront_size32 = 1
+; CHECK-G-MESA3D-NEXT:     enable_ordered_append_gds = 0
+; CHECK-G-MESA3D-NEXT:     private_element_size = 1
+; CHECK-G-MESA3D-NEXT:     is_ptr64 = 1
+; CHECK-G-MESA3D-NEXT:     is_dynamic_callstack = 0
+; CHECK-G-MESA3D-NEXT:     is_debug_enabled = 0
+; CHECK-G-MESA3D-NEXT:     is_xnack_enabled = 0
+; CHECK-G-MESA3D-NEXT:     workitem_private_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     workgroup_group_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     gds_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     kernarg_segment_byte_size = 24
+; CHECK-G-MESA3D-NEXT:     workgroup_fbarrier_count = 0
+; CHECK-G-MESA3D-NEXT:     wavefront_sgpr_count = 6
+; CHECK-G-MESA3D-NEXT:     workitem_vgpr_count = 2
+; CHECK-G-MESA3D-NEXT:     reserved_vgpr_first = 0
+; CHECK-G-MESA3D-NEXT:     reserved_vgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     reserved_sgpr_first = 0
+; CHECK-G-MESA3D-NEXT:     reserved_sgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; CHECK-G-MESA3D-NEXT:     debug_private_segment_buffer_sgpr = 0
+; CHECK-G-MESA3D-NEXT:     kernarg_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT:     group_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT:     private_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT:     wavefront_size = 5
+; CHECK-G-MESA3D-NEXT:     call_convention = -1
+; CHECK-G-MESA3D-NEXT:     runtime_loader_kernel_symbol = 0
+; CHECK-G-MESA3D-NEXT:    .end_amd_kernel_code_t
+; CHECK-G-MESA3D-NEXT:  ; %bb.0:
+; CHECK-G-MESA3D-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-G-MESA3D-NEXT:    s_bfe_u32 s2, ttmp6, 0x40004
+; CHECK-G-MESA3D-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-G-MESA3D-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
+; CHECK-G-MESA3D-NEXT:    s_wait_kmcnt 0x0
+; CHECK-G-MESA3D-NEXT:    global_store_b32 v1, v0, s[0:1]
+; CHECK-G-MESA3D-NEXT:    s_endpgm
+  %id = call i32 @llvm.amdgcn.cluster.workgroup.id.y()
+  store i32 %id, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @test_workgroup_id_y_optimized(ptr addrspace(1) %out) "amdgpu-cluster-dims"="2,1,2" {
+; CHECK-UNKNOWN-LABEL: test_workgroup_id_y_optimized:
+; CHECK-UNKNOWN:       ; %bb.0:
+; CHECK-UNKNOWN-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; CHECK-UNKNOWN-NEXT:    v_mov_b32_e32 v0, 0
+; CHECK-UNKNOWN-NEXT:    s_wait_kmcnt 0x0
+; CHECK-UNKNOWN-NEXT:    global_store_b32 v0, v0, s[0:1]
+; CHECK-UNKNOWN-NEXT:    s_endpgm
+;
+; CHECK-MESA3D-LABEL: test_workgroup_id_y_optimized:
+; CHECK-MESA3D:         .amd_kernel_code_t
+; CHECK-MESA3D-NEXT:     amd_code_version_major = 1
+; CHECK-MESA3D-NEXT:     amd_code_version_minor = 2
+; CHECK-MESA3D-NEXT:     amd_machine_kind = 1
+; CHECK-MESA3D-NEXT:     amd_machine_version_major = 12
+; CHECK-MESA3D-NEXT:     amd_machine_version_minor = 5
+; CHECK-MESA3D-NEXT:     amd_machine_version_stepping = 0
+; CHECK-MESA3D-NEXT:     kernel_code_entry_byte_offset = 256
+; CHECK-MESA3D-NEXT:     kernel_code_prefetch_byte_size = 0
+; CHECK-MESA3D-NEXT:     granulated_workitem_vgpr_count = 0
+; CHECK-MESA3D-NEXT:     granulated_wavefront_sgpr_count = 0
+; CHECK-MESA3D-NEXT:     priority = 0
+; CHECK-MESA3D-NEXT:     float_mode = 240
+; CHECK-MESA3D-NEXT:     priv = 0
+; CHECK-MESA3D-NEXT:     enable_dx10_clamp = 0
+; CHECK-MESA3D-NEXT:     debug_mode = 0
+; CHECK-MESA3D-NEXT:     enable_ieee_mode = 0
+; CHECK-MESA3D-NEXT:     enable_wgp_mode = 0
+; CHECK-MESA3D-NEXT:     enable_mem_ordered = 1
+; CHECK-MESA3D-NEXT:     enable_fwd_progress = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
+; CHECK-MESA3D-NEXT:     user_sgpr_count = 8
+; CHECK-MESA3D-NEXT:     enable_trap_handler = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_id_x = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_id_y = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_id_z = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_info = 0
+; CHECK-MESA3D-NEXT:     enable_vgpr_workitem_id = 2
+; CHECK-MESA3D-NEXT:     enable_exception_msb = 0
+; CHECK-MESA3D-NEXT:     granulated_lds_size = 0
+; CHECK-MESA3D-NEXT:     enable_exception = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_private_segment_buffer = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_dispatch_ptr = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_queue_ptr = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_dispatch_id = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_flat_scratch_init = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_private_segment_size = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; CHECK-MESA3D-NEXT:     enable_wavefront_size32 = 1
+; CHECK-MESA3D-NEXT:     enable_ordered_append_gds = 0
+; CHECK-MESA3D-NEXT:     private_element_size = 1
+; CHECK-MESA3D-NEXT:     is_ptr64 = 1
+; CHECK-MESA3D-NEXT:     is_dynamic_callstack = 0
+; CHECK-MESA3D-NEXT:     is_debug_enabled = 0
+; CHECK-MESA3D-NEXT:     is_xnack_enabled = 0
+; CHECK-MESA3D-NEXT:     workitem_private_segment_byte_size = 0
+; CHECK-MESA3D-NEXT:     workgroup_group_segment_byte_size = 0
+; CHECK-MESA3D-NEXT:     gds_segment_byte_size = 0
+; CHECK-MESA3D-NEXT:     kernarg_segment_byte_size = 24
+; CHECK-MESA3D-NEXT:     workgroup_fbarrier_count = 0
+; CHECK-MESA3D-NEXT:     wavefront_sgpr_count = 6
+; CHECK-MESA3D-NEXT:     workitem_vgpr_count = 1
+; CHECK-MESA3D-NEXT:     reserved_vgpr_first = 0
+; CHECK-MESA3D-NEXT:     reserved_vgpr_count = 0
+; CHECK-MESA3D-NEXT:     reserved_sgpr_first = 0
+; CHECK-MESA3D-NEXT:     reserved_sgpr_count = 0
+; CHECK-MESA3D-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; CHECK-MESA3D-NEXT:     debug_private_segment_buffer_sgpr = 0
+; CHECK-MESA3D-NEXT:     kernarg_segment_alignment = 4
+; CHECK-MESA3D-NEXT:     group_segment_alignment = 4
+; CHECK-MESA3D-NEXT:     private_segment_alignment = 4
+; CHECK-MESA3D-NEXT:     wavefront_size = 5
+; CHECK-MESA3D-NEXT:     call_convention = -1
+; CHECK-MESA3D-NEXT:     runtime_loader_kernel_symbol = 0
+; CHECK-MESA3D-NEXT:    .end_amd_kernel_code_t
+; CHECK-MESA3D-NEXT:  ; %bb.0:
+; CHECK-MESA3D-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-MESA3D-NEXT:    v_mov_b32_e32 v0, 0
+; CHECK-MESA3D-NEXT:    s_wait_kmcnt 0x0
+; CHECK-MESA3D-NEXT:    global_store_b32 v0, v0, s[0:1]
+; CHECK-MESA3D-NEXT:    s_endpgm
+;
+; CHECK-G-UNKNOWN-LABEL: test_workgroup_id_y_optimized:
+; CHECK-G-UNKNOWN:       ; %bb.0:
+; CHECK-G-UNKNOWN-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; CHECK-G-UNKNOWN-NEXT:    v_mov_b32_e32 v0, 0
+; CHECK-G-UNKNOWN-NEXT:    s_wait_kmcnt 0x0
+; CHECK-G-UNKNOWN-NEXT:    global_store_b32 v0, v0, s[0:1]
+; CHECK-G-UNKNOWN-NEXT:    s_endpgm
+;
+; CHECK-G-MESA3D-LABEL: test_workgroup_id_y_optimized:
+; CHECK-G-MESA3D:         .amd_kernel_code_t
+; CHECK-G-MESA3D-NEXT:     amd_code_version_major = 1
+; CHECK-G-MESA3D-NEXT:     amd_code_version_minor = 2
+; CHECK-G-MESA3D-NEXT:     amd_machine_kind = 1
+; CHECK-G-MESA3D-NEXT:     amd_machine_version_major = 12
+; CHECK-G-MESA3D-NEXT:     amd_machine_version_minor = 5
+; CHECK-G-MESA3D-NEXT:     amd_machine_version_stepping = 0
+; CHECK-G-MESA3D-NEXT:     kernel_code_entry_byte_offset = 256
+; CHECK-G-MESA3D-NEXT:     kernel_code_prefetch_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     granulated_workitem_vgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     granulated_wavefront_sgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     priority = 0
+; CHECK-G-MESA3D-NEXT:     float_mode = 240
+; CHECK-G-MESA3D-NEXT:     priv = 0
+; CHECK-G-MESA3D-NEXT:     enable_dx10_clamp = 0
+; CHECK-G-MESA3D-NEXT:     debug_mode = 0
+; CHECK-G-MESA3D-NEXT:     enable_ieee_mode = 0
+; CHECK-G-MESA3D-NEXT:     enable_wgp_mode = 0
+; CHECK-G-MESA3D-NEXT:     enable_mem_ordered = 1
+; CHECK-G-MESA3D-NEXT:     enable_fwd_progress = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
+; CHECK-G-MESA3D-NEXT:     user_sgpr_count = 8
+; CHECK-G-MESA3D-NEXT:     enable_trap_handler = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_id_x = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_id_y = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_id_z = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_info = 0
+; CHECK-G-MESA3D-NEXT:     enable_vgpr_workitem_id = 2
+; CHECK-G-MESA3D-NEXT:     enable_exception_msb = 0
+; CHECK-G-MESA3D-NEXT:     granulated_lds_size = 0
+; CHECK-G-MESA3D-NEXT:     enable_exception = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_private_segment_buffer = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_dispatch_ptr = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_queue_ptr = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_dispatch_id = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_flat_scratch_init = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_private_segment_size = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; CHECK-G-MESA3D-NEXT:     enable_wavefront_size32 = 1
+; CHECK-G-MESA3D-NEXT:     enable_ordered_append_gds = 0
+; CHECK-G-MESA3D-NEXT:     private_element_size = 1
+; CHECK-G-MESA3D-NEXT:     is_ptr64 = 1
+; CHECK-G-MESA3D-NEXT:     is_dynamic_callstack = 0
+; CHECK-G-MESA3D-NEXT:     is_debug_enabled = 0
+; CHECK-G-MESA3D-NEXT:     is_xnack_enabled = 0
+; CHECK-G-MESA3D-NEXT:     workitem_private_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     workgroup_group_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     gds_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     kernarg_segment_byte_size = 24
+; CHECK-G-MESA3D-NEXT:     workgroup_fbarrier_count = 0
+; CHECK-G-MESA3D-NEXT:     wavefront_sgpr_count = 6
+; CHECK-G-MESA3D-NEXT:     workitem_vgpr_count = 1
+; CHECK-G-MESA3D-NEXT:     reserved_vgpr_first = 0
+; CHECK-G-MESA3D-NEXT:     reserved_vgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     reserved_sgpr_first = 0
+; CHECK-G-MESA3D-NEXT:     reserved_sgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; CHECK-G-MESA3D-NEXT:     debug_private_segment_buffer_sgpr = 0
+; CHECK-G-MESA3D-NEXT:     kernarg_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT:     group_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT:     private_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT:     wavefront_size = 5
+; CHECK-G-MESA3D-NEXT:     call_convention = -1
+; CHECK-G-MESA3D-NEXT:     runtime_loader_kernel_symbol = 0
+; CHECK-G-MESA3D-NEXT:    .end_amd_kernel_code_t
+; CHECK-G-MESA3D-NEXT:  ; %bb.0:
+; CHECK-G-MESA3D-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-G-MESA3D-NEXT:    v_mov_b32_e32 v0, 0
+; CHECK-G-MESA3D-NEXT:    s_wait_kmcnt 0x0
+; CHECK-G-MESA3D-NEXT:    global_store_b32 v0, v0, s[0:1]
+; CHECK-G-MESA3D-NEXT:    s_endpgm
+  %id = call i32 @llvm.amdgcn.cluster.workgroup.id.y()
+  store i32 %id, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @test_workgroup_id_z(ptr addrspace(1) %out) #1 {
+; CHECK-UNKNOWN-LABEL: test_workgroup_id_z:
+; CHECK-UNKNOWN:       ; %bb.0:
+; CHECK-UNKNOWN-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; CHECK-UNKNOWN-NEXT:    s_bfe_u32 s2, ttmp6, 0x40008
+; CHECK-UNKNOWN-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-UNKNOWN-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; CHECK-UNKNOWN-NEXT:    s_wait_kmcnt 0x0
+; CHECK-UNKNOWN-NEXT:    global_store_b32 v0, v1, s[0:1]
+; CHECK-UNKNOWN-NEXT:    s_endpgm
+;
+; CHECK-MESA3D-LABEL: test_workgroup_id_z:
+; CHECK-MESA3D:         .amd_kernel_code_t
+; CHECK-MESA3D-NEXT:     amd_code_version_major = 1
+; CHECK-MESA3D-NEXT:     amd_code_version_minor = 2
+; CHECK-MESA3D-NEXT:     amd_machine_kind = 1
+; CHECK-MESA3D-NEXT:     amd_machine_version_major = 12
+; CHECK-MESA3D-NEXT:     amd_machine_version_minor = 5
+; CHECK-MESA3D-NEXT:     amd_machine_version_stepping = 0
+; CHECK-MESA3D-NEXT:     kernel_code_entry_byte_offset = 256
+; CHECK-MESA3D-NEXT:     kernel_code_prefetch_byte_size = 0
+; CHECK-MESA3D-NEXT:     granulated_workitem_vgpr_count = 0
+; CHECK-MESA3D-NEXT:     granulated_wavefront_sgpr_count = 0
+; CHECK-MESA3D-NEXT:     priority = 0
+; CHECK-MESA3D-NEXT:     float_mode = 240
+; CHECK-MESA3D-NEXT:     priv = 0
+; CHECK-MESA3D-NEXT:     enable_dx10_clamp = 0
+; CHECK-MESA3D-NEXT:     debug_mode = 0
+; CHECK-MESA3D-NEXT:     enable_ieee_mode = 0
+; CHECK-MESA3D-NEXT:     enable_wgp_mode = 0
+; CHECK-MESA3D-NEXT:     enable_mem_ordered = 1
+; CHECK-MESA3D-NEXT:     enable_fwd_progress = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
+; CHECK-MESA3D-NEXT:     user_sgpr_count = 8
+; CHECK-MESA3D-NEXT:     enable_trap_handler = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_id_x = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_id_y = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_id_z = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_info = 0
+; CHECK-MESA3D-NEXT:     enable_vgpr_workitem_id = 2
+; CHECK-MESA3D-NEXT:     enable_exception_msb = 0
+; CHECK-MESA3D-NEXT:     granulated_lds_size = 0
+; CHECK-MESA3D-NEXT:     enable_exception = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_private_segment_buffer = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_dispatch_ptr = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_queue_ptr = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_dispatch_id = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_flat_scratch_init = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_private_segment_size = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; CHECK-MESA3D-NEXT:     enable_wavefront_size32 = 1
+; CHECK-MESA3D-NEXT:     enable_ordered_append_gds = 0
+; CHECK-MESA3D-NEXT:     private_element_size = 1
+; CHECK-MESA3D-NEXT:     is_ptr64 = 1
+; CHECK-MESA3D-NEXT:     is_dynamic_callstack = 0
+; CHECK-MESA3D-NEXT:     is_debug_enabled = 0
+; CHECK-MESA3D-NEXT:     is_xnack_enabled = 0
+; CHECK-MESA3D-NEXT:     workitem_private_segment_byte_size = 0
+; CHECK-MESA3D-NEXT:     workgroup_group_segment_byte_size = 0
+; CHECK-MESA3D-NEXT:     gds_segment_byte_size = 0
+; CHECK-MESA3D-NEXT:     kernarg_segment_byte_size = 24
+; CHECK-MESA3D-NEXT:     workgroup_fbarrier_count = 0
+; CHECK-MESA3D-NEXT:     wavefront_sgpr_count = 6
+; CHECK-MESA3D-NEXT:     workitem_vgpr_count = 2
+; CHECK-MESA3D-NEXT:     reserved_vgpr_first = 0
+; CHECK-MESA3D-NEXT:     reserved_vgpr_count = 0
+; CHECK-MESA3D-NEXT:     reserved_sgpr_first = 0
+; CHECK-MESA3D-NEXT:     reserved_sgpr_count = 0
+; CHECK-MESA3D-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; CHECK-MESA3D-NEXT:     debug_private_segment_buffer_sgpr = 0
+; CHECK-MESA3D-NEXT:     kernarg_segment_alignment = 4
+; CHECK-MESA3D-NEXT:     group_segment_alignment = 4
+; CHECK-MESA3D-NEXT:     private_segment_alignment = 4
+; CHECK-MESA3D-NEXT:     wavefront_size = 5
+; CHECK-MESA3D-NEXT:     call_convention = -1
+; CHECK-MESA3D-NEXT:     runtime_loader_kernel_symbol = 0
+; CHECK-MESA3D-NEXT:    .end_amd_kernel_code_t
+; CHECK-MESA3D-NEXT:  ; %bb.0:
+; CHECK-MESA3D-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-MESA3D-NEXT:    s_bfe_u32 s2, ttmp6, 0x40008
+; CHECK-MESA3D-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-MESA3D-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; CHECK-MESA3D-NEXT:    s_wait_kmcnt 0x0
+; CHECK-MESA3D-NEXT:    global_store_b32 v0, v1, s[0:1]
+; CHECK-MESA3D-NEXT:    s_endpgm
+;
+; CHECK-G-UNKNOWN-LABEL: test_workgroup_id_z:
+; CHECK-G-UNKNOWN:       ; %bb.0:
+; CHECK-G-UNKNOWN-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; CHECK-G-UNKNOWN-NEXT:    s_bfe_u32 s2, ttmp6, 0x40008
+; CHECK-G-UNKNOWN-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-G-UNKNOWN-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
+; CHECK-G-UNKNOWN-NEXT:    s_wait_kmcnt 0x0
+; CHECK-G-UNKNOWN-NEXT:    global_store_b32 v1, v0, s[0:1]
+; CHECK-G-UNKNOWN-NEXT:    s_endpgm
+;
+; CHECK-G-MESA3D-LABEL: test_workgroup_id_z:
+; CHECK-G-MESA3D:         .amd_kernel_code_t
+; CHECK-G-MESA3D-NEXT:     amd_code_version_major = 1
+; CHECK-G-MESA3D-NEXT:     amd_code_version_minor = 2
+; CHECK-G-MESA3D-NEXT:     amd_machine_kind = 1
+; CHECK-G-MESA3D-NEXT:     amd_machine_version_major = 12
+; CHECK-G-MESA3D-NEXT:     amd_machine_version_minor = 5
+; CHECK-G-MESA3D-NEXT:     amd_machine_version_stepping = 0
+; CHECK-G-MESA3D-NEXT:     kernel_code_entry_byte_offset = 256
+; CHECK-G-MESA3D-NEXT:     kernel_code_prefetch_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     granulated_workitem_vgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     granulated_wavefront_sgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     priority = 0
+; CHECK-G-MESA3D-NEXT:     float_mode = 240
+; CHECK-G-MESA3D-NEXT:     priv = 0
+; CHECK-G-MESA3D-NEXT:     enable_dx10_clamp = 0
+; CHECK-G-MESA3D-NEXT:     debug_mode = 0
+; CHECK-G-MESA3D-NEXT:     enable_ieee_mode = 0
+; CHECK-G-MESA3D-NEXT:     enable_wgp_mode = 0
+; CHECK-G-MESA3D-NEXT:     enable_mem_ordered = 1
+; CHECK-G-MESA3D-NEXT:     enable_fwd_progress = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
+; CHECK-G-MESA3D-NEXT:     user_sgpr_count = 8
+; CHECK-G-MESA3D-NEXT:     enable_trap_handler = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_id_x = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_id_y = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_id_z = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_info = 0
+; CHECK-G-MESA3D-NEXT:     enable_vgpr_workitem_id = 2
+; CHECK-G-MESA3D-NEXT:     enable_exception_msb = 0
+; CHECK-G-MESA3D-NEXT:     granulated_lds_size = 0
+; CHECK-G-MESA3D-NEXT:     enable_exception = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_private_segment_buffer = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_dispatch_ptr = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_queue_ptr = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_dispatch_id = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_flat_scratch_init = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_private_segment_size = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; CHECK-G-MESA3D-NEXT:     enable_wavefront_size32 = 1
+; CHECK-G-MESA3D-NEXT:     enable_ordered_append_gds = 0
+; CHECK-G-MESA3D-NEXT:     private_element_size = 1
+; CHECK-G-MESA3D-NEXT:     is_ptr64 = 1
+; CHECK-G-MESA3D-NEXT:     is_dynamic_callstack = 0
+; CHECK-G-MESA3D-NEXT:     is_debug_enabled = 0
+; CHECK-G-MESA3D-NEXT:     is_xnack_enabled = 0
+; CHECK-G-MESA3D-NEXT:     workitem_private_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     workgroup_group_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     gds_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     kernarg_segment_byte_size = 24
+; CHECK-G-MESA3D-NEXT:     workgroup_fbarrier_count = 0
+; CHECK-G-MESA3D-NEXT:     wavefront_sgpr_count = 6
+; CHECK-G-MESA3D-NEXT:     workitem_vgpr_count = 2
+; CHECK-G-MESA3D-NEXT:     reserved_vgpr_first = 0
+; CHECK-G-MESA3D-NEXT:     reserved_vgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     reserved_sgpr_first = 0
+; CHECK-G-MESA3D-NEXT:     reserved_sgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; CHECK-G-MESA3D-NEXT:     debug_private_segment_buffer_sgpr = 0
+; CHECK-G-MESA3D-NEXT:     kernarg_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT:     group_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT:     private_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT:     wavefront_size = 5
+; CHECK-G-MESA3D-NEXT:     call_convention = -1
+; CHECK-G-MESA3D-NEXT:     runtime_loader_kernel_symbol = 0
+; CHECK-G-MESA3D-NEXT:    .end_amd_kernel_code_t
+; CHECK-G-MESA3D-NEXT:  ; %bb.0:
+; CHECK-G-MESA3D-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-G-MESA3D-NEXT:    s_bfe_u32 s2, ttmp6, 0x40008
+; CHECK-G-MESA3D-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-G-MESA3D-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
+; CHECK-G-MESA3D-NEXT:    s_wait_kmcnt 0x0
+; CHECK-G-MESA3D-NEXT:    global_store_b32 v1, v0, s[0:1]
+; CHECK-G-MESA3D-NEXT:    s_endpgm
+  %id = call i32 @llvm.amdgcn.cluster.workgroup.id.z()
+  store i32 %id, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @test_workgroup_flat_id(ptr addrspace(1) %out) {
+; CHECK-UNKNOWN-LABEL: test_workgroup_flat_id:
+; CHECK-UNKNOWN:       ; %bb.0:
+; CHECK-UNKNOWN-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; CHECK-UNKNOWN-NEXT:    s_getreg_b32 s2, hwreg(HW_REG_IB_STS2, 21, 4)
+; CHECK-UNKNOWN-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-UNKNOWN-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; CHECK-UNKNOWN-NEXT:    s_wait_kmcnt 0x0
+; CHECK-UNKNOWN-NEXT:    global_store_b32 v0, v1, s[0:1]
+; CHECK-UNKNOWN-NEXT:    s_endpgm
+;
+; CHECK-MESA3D-LABEL: test_workgroup_flat_id:
+; CHECK-MESA3D:         .amd_kernel_code_t
+; CHECK-MESA3D-NEXT:     amd_code_version_major = 1
+; CHECK-MESA3D-NEXT:     amd_code_version_minor = 2
+; CHECK-MESA3D-NEXT:     amd_machine_kind = 1
+; CHECK-MESA3D-NEXT:     amd_machine_version_major = 12
+; CHECK-MESA3D-NEXT:     amd_machine_version_minor = 5
+; CHECK-MESA3D-NEXT:     amd_machine_version_stepping = 0
+; CHECK-MESA3D-NEXT:     kernel_code_entry_byte_offset = 256
+; CHECK-MESA3D-NEXT:     kernel_code_prefetch_byte_size = 0
+; CHECK-MESA3D-NEXT:     granulated_workitem_vgpr_count = 0
+; CHECK-MESA3D-NEXT:     granulated_wavefront_sgpr_count = 0
+; CHECK-MESA3D-NEXT:     priority = 0
+; CHECK-MESA3D-NEXT:     float_mode = 240
+; CHECK-MESA3D-NEXT:     priv = 0
+; CHECK-MESA3D-NEXT:     enable_dx10_clamp = 0
+; CHECK-MESA3D-NEXT:     debug_mode = 0
+; CHECK-MESA3D-NEXT:     enable_ieee_mode = 0
+; CHECK-MESA3D-NEXT:     enable_wgp_mode = 0
+; CHECK-MESA3D-NEXT:     enable_mem_ordered = 1
+; CHECK-MESA3D-NEXT:     enable_fwd_progress = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
+; CHECK-MESA3D-NEXT:     user_sgpr_count = 8
+; CHECK-MESA3D-NEXT:     enable_trap_handler = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_id_x = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_id_y = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_id_z = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_info = 0
+; CHECK-MESA3D-NEXT:     enable_vgpr_workitem_id = 2
+; CHECK-MESA3D-NEXT:     enable_exception_msb = 0
+; CHECK-MESA3D-NEXT:     granulated_lds_size = 0
+; CHECK-MESA3D-NEXT:     enable_exception = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_private_segment_buffer = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_dispatch_ptr = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_queue_ptr = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_dispatch_id = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_flat_scratch_init = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_private_segment_size = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; CHECK-MESA3D-NEXT:     enable_wavefront_size32 = 1
+; CHECK-MESA3D-NEXT:     enable_ordered_append_gds = 0
+; CHECK-MESA3D-NEXT:     private_element_size = 1
+; CHECK-MESA3D-NEXT:     is_ptr64 = 1
+; CHECK-MESA3D-NEXT:     is_dynamic_callstack = 0
+; CHECK-MESA3D-NEXT:     is_debug_enabled = 0
+; CHECK-MESA3D-NEXT:     is_xnack_enabled = 0
+; CHECK-MESA3D-NEXT:     workitem_private_segment_byte_size = 0
+; CHECK-MESA3D-NEXT:     workgroup_group_segment_byte_size = 0
+; CHECK-MESA3D-NEXT:     gds_segment_byte_size = 0
+; CHECK-MESA3D-NEXT:     kernarg_segment_byte_size = 24
+; CHECK-MESA3D-NEXT:     workgroup_fbarrier_count = 0
+; CHECK-MESA3D-NEXT:     wavefront_sgpr_count = 6
+; CHECK-MESA3D-NEXT:     workitem_vgpr_count = 2
+; CHECK-MESA3D-NEXT:     reserved_vgpr_first = 0
+; CHECK-MESA3D-NEXT:     reserved_vgpr_count = 0
+; CHECK-MESA3D-NEXT:     reserved_sgpr_first = 0
+; CHECK-MESA3D-NEXT:     reserved_sgpr_count = 0
+; CHECK-MESA3D-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; CHECK-MESA3D-NEXT:     debug_private_segment_buffer_sgpr = 0
+; CHECK-MESA3D-NEXT:     kernarg_segment_alignment = 4
+; CHECK-MESA3D-NEXT:     group_segment_alignment = 4
+; CHECK-MESA3D-NEXT:     private_segment_alignment = 4
+; CHECK-MESA3D-NEXT:     wavefront_size = 5
+; CHECK-MESA3D-NEXT:     call_convention = -1
+; CHECK-MESA3D-NEXT:     runtime_loader_kernel_symbol = 0
+; CHECK-MESA3D-NEXT:    .end_amd_kernel_code_t
+; CHECK-MESA3D-NEXT:  ; %bb.0:
+; CHECK-MESA3D-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-MESA3D-NEXT:    s_getreg_b32 s2, hwreg(HW_REG_IB_STS2, 21, 4)
+; CHECK-MESA3D-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-MESA3D-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; CHECK-MESA3D-NEXT:    s_wait_kmcnt 0x0
+; CHECK-MESA3D-NEXT:    global_store_b32 v0, v1, s[0:1]
+; CHECK-MESA3D-NEXT:    s_endpgm
+;
+; CHECK-G-UNKNOWN-LABEL: test_workgroup_flat_id:
+; CHECK-G-UNKNOWN:       ; %bb.0:
+; CHECK-G-UNKNOWN-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; CHECK-G-UNKNOWN-NEXT:    s_getreg_b32 s2, hwreg(HW_REG_IB_STS2, 21, 4)
+; CHECK-G-UNKNOWN-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-G-UNKNOWN-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
+; CHECK-G-UNKNOWN-NEXT:    s_wait_kmcnt 0x0
+; CHECK-G-UNKNOWN-NEXT:    global_store_b32 v1, v0, s[0:1]
+; CHECK-G-UNKNOWN-NEXT:    s_endpgm
+;
+; CHECK-G-MESA3D-LABEL: test_workgroup_flat_id:
+; CHECK-G-MESA3D:         .amd_kernel_code_t
+; CHECK-G-MESA3D-NEXT:     amd_code_version_major = 1
+; CHECK-G-MESA3D-NEXT:     amd_code_version_minor = 2
+; CHECK-G-MESA3D-NEXT:     amd_machine_kind = 1
+; CHECK-G-MESA3D-NEXT:     amd_machine_version_major = 12
+; CHECK-G-MESA3D-NEXT:     amd_machine_version_minor = 5
+; CHECK-G-MESA3D-NEXT:     amd_machine_version_stepping = 0
+; CHECK-G-MESA3D-NEXT:     kernel_code_entry_byte_offset = 256
+; CHECK-G-MESA3D-NEXT:     kernel_code_prefetch_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     granulated_workitem_vgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     granulated_wavefront_sgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     priority = 0
+; CHECK-G-MESA3D-NEXT:     float_mode = 240
+; CHECK-G-MESA3D-NEXT:     priv = 0
+; CHECK-G-MESA3D-NEXT:     enable_dx10_clamp = 0
+; CHECK-G-MESA3D-NEXT:     debug_mode = 0
+; CHECK-G-MESA3D-NEXT:     enable_ieee_mode = 0
+; CHECK-G-MESA3D-NEXT:     enable_wgp_mode = 0
+; CHECK-G-MESA3D-NEXT:     enable_mem_ordered = 1
+; CHECK-G-MESA3D-NEXT:     enable_fwd_progress = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
+; CHECK-G-MESA3D-NEXT:     user_sgpr_count = 8
+; CHECK-G-MESA3D-NEXT:     enable_trap_handler = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_id_x = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_id_y = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_id_z = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_info = 0
+; CHECK-G-MESA3D-NEXT:     enable_vgpr_workitem_id = 2
+; CHECK-G-MESA3D-NEXT:     enable_exception_msb = 0
+; CHECK-G-MESA3D-NEXT:     granulated_lds_size = 0
+; CHECK-G-MESA3D-NEXT:     enable_exception = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_private_segment_buffer = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_dispatch_ptr = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_queue_ptr = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_dispatch_id = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_flat_scratch_init = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_private_segment_size = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; CHECK-G-MESA3D-NEXT:     enable_wavefront_size32 = 1
+; CHECK-G-MESA3D-NEXT:     enable_ordered_append_gds = 0
+; CHECK-G-MESA3D-NEXT:     private_element_size = 1
+; CHECK-G-MESA3D-NEXT:     is_ptr64 = 1
+; CHECK-G-MESA3D-NEXT:     is_dynamic_callstack = 0
+; CHECK-G-MESA3D-NEXT:     is_debug_enabled = 0
+; CHECK-G-MESA3D-NEXT:     is_xnack_enabled = 0
+; CHECK-G-MESA3D-NEXT:     workitem_private_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     workgroup_group_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     gds_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     kernarg_segment_byte_size = 24
+; CHECK-G-MESA3D-NEXT:     workgroup_fbarrier_count = 0
+; CHECK-G-MESA3D-NEXT:     wavefront_sgpr_count = 6
+; CHECK-G-MESA3D-NEXT:     workitem_vgpr_count = 2
+; CHECK-G-MESA3D-NEXT:     reserved_vgpr_first = 0
+; CHECK-G-MESA3D-NEXT:     reserved_vgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     reserved_sgpr_first = 0
+; CHECK-G-MESA3D-NEXT:     reserved_sgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; CHECK-G-MESA3D-NEXT:     debug_private_segment_buffer_sgpr = 0
+; CHECK-G-MESA3D-NEXT:     kernarg_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT:     group_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT:     private_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT:     wavefront_size = 5
+; CHECK-G-MESA3D-NEXT:     call_convention = -1
+; CHECK-G-MESA3D-NEXT:     runtime_loader_kernel_symbol = 0
+; CHECK-G-MESA3D-NEXT:    .end_amd_kernel_code_t
+; CHECK-G-MESA3D-NEXT:  ; %bb.0:
+; CHECK-G-MESA3D-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-G-MESA3D-NEXT:    s_getreg_b32 s2, hwreg(HW_REG_IB_STS2, 21, 4)
+; CHECK-G-MESA3D-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-G-MESA3D-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
+; CHECK-G-MESA3D-NEXT:    s_wait_kmcnt 0x0
+; CHECK-G-MESA3D-NEXT:    global_store_b32 v1, v0, s[0:1]
+; CHECK-G-MESA3D-NEXT:    s_endpgm
+  %id = call i32 @llvm.amdgcn.cluster.workgroup.flat.id()
+  store i32 %id, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @test_workgroup_id_z_optimized(ptr addrspace(1) %out) "amdgpu-cluster-dims"="2,2,1" {
+; CHECK-UNKNOWN-LABEL: test_workgroup_id_z_optimized:
+; CHECK-UNKNOWN:       ; %bb.0:
+; CHECK-UNKNOWN-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; CHECK-UNKNOWN-NEXT:    v_mov_b32_e32 v0, 0
+; CHECK-UNKNOWN-NEXT:    s_wait_kmcnt 0x0
+; CHECK-UNKNOWN-NEXT:    global_store_b32 v0, v0, s[0:1]
+; CHECK-UNKNOWN-NEXT:    s_endpgm
+;
+; CHECK-MESA3D-LABEL: test_workgroup_id_z_optimized:
+; CHECK-MESA3D:         .amd_kernel_code_t
+; CHECK-MESA3D-NEXT:     amd_code_version_major = 1
+; CHECK-MESA3D-NEXT:     amd_code_version_minor = 2
+; CHECK-MESA3D-NEXT:     amd_machine_kind = 1
+; CHECK-MESA3D-NEXT:     amd_machine_version_major = 12
+; CHECK-MESA3D-NEXT:     amd_machine_version_minor = 5
+; CHECK-MESA3D-NEXT:     amd_machine_version_stepping = 0
+; CHECK-MESA3D-NEXT:     kernel_code_entry_byte_offset = 256
+; CHECK-MESA3D-NEXT:     kernel_code_prefetch_byte_size = 0
+; CHECK-MESA3D-NEXT:     granulated_workitem_vgpr_count = 0
+; CHECK-MESA3D-NEXT:     granulated_wavefront_sgpr_count = 0
+; CHECK-MESA3D-NEXT:     priority = 0
+; CHECK-MESA3D-NEXT:     float_mode = 240
+; CHECK-MESA3D-NEXT:     priv = 0
+; CHECK-MESA3D-NEXT:     enable_dx10_clamp = 0
+; CHECK-MESA3D-NEXT:     debug_mode = 0
+; CHECK-MESA3D-NEXT:     enable_ieee_mode = 0
+; CHECK-MESA3D-NEXT:     enable_wgp_mode = 0
+; CHECK-MESA3D-NEXT:     enable_mem_ordered = 1
+; CHECK-MESA3D-NEXT:     enable_fwd_progress = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
+; CHECK-MESA3D-NEXT:     user_sgpr_count = 8
+; CHECK-MESA3D-NEXT:     enable_trap_handler = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_id_x = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_id_y = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_id_z = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_info = 0
+; CHECK-MESA3D-NEXT:     enable_vgpr_workitem_id = 2
+; CHECK-MESA3D-NEXT:     enable_exception_msb = 0
+; CHECK-MESA3D-NEXT:     granulated_lds_size = 0
+; CHECK-MESA3D-NEXT:     enable_exception = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_private_segment_buffer = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_dispatch_ptr = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_queue_ptr = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_dispatch_id = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_flat_scratch_init = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_private_segment_size = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; CHECK-MESA3D-NEXT:     enable_wavefront_size32 = 1
+; CHECK-MESA3D-NEXT:     enable_ordered_append_gds = 0
+; CHECK-MESA3D-NEXT:     private_element_size = 1
+; CHECK-MESA3D-NEXT:     is_ptr64 = 1
+; CHECK-MESA3D-NEXT:     is_dynamic_callstack = 0
+; CHECK-MESA3D-NEXT:     is_debug_enabled = 0
+; CHECK-MESA3D-NEXT:     is_xnack_enabled = 0
+; CHECK-MESA3D-NEXT:     workitem_private_segment_byte_size = 0
+; CHECK-MESA3D-NEXT:     workgroup_group_segment_byte_size = 0
+; CHECK-MESA3D-NEXT:     gds_segment_byte_size = 0
+; CHECK-MESA3D-NEXT:     kernarg_segment_byte_size = 24
+; CHECK-MESA3D-NEXT:     workgroup_fbarrier_count = 0
+; CHECK-MESA3D-NEXT:     wavefront_sgpr_count = 6
+; CHECK-MESA3D-NEXT:     workitem_vgpr_count = 1
+; CHECK-MESA3D-NEXT:     reserved_vgpr_first = 0
+; CHECK-MESA3D-NEXT:     reserved_vgpr_count = 0
+; CHECK-MESA3D-NEXT:     reserved_sgpr_first = 0
+; CHECK-MESA3D-NEXT:     reserved_sgpr_count = 0
+; CHECK-MESA3D-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; CHECK-MESA3D-NEXT:     debug_private_segment_buffer_sgpr = 0
+; CHECK-MESA3D-NEXT:     kernarg_segment_alignment = 4
+; CHECK-MESA3D-NEXT:     group_segment_alignment = 4
+; CHECK-MESA3D-NEXT:     private_segment_alignment = 4
+; CHECK-MESA3D-NEXT:     wavefront_size = 5
+; CHECK-MESA3D-NEXT:     call_convention = -1
+; CHECK-MESA3D-NEXT:     runtime_loader_kernel_symbol = 0
+; CHECK-MESA3D-NEXT:    .end_amd_kernel_code_t
+; CHECK-MESA3D-NEXT:  ; %bb.0:
+; CHECK-MESA3D-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-MESA3D-NEXT:    v_mov_b32_e32 v0, 0
+; CHECK-MESA3D-NEXT:    s_wait_kmcnt 0x0
+; CHECK-MESA3D-NEXT:    global_store_b32 v0, v0, s[0:1]
+; CHECK-MESA3D-NEXT:    s_endpgm
+;
+; CHECK-G-UNKNOWN-LABEL: test_workgroup_id_z_optimized:
+; CHECK-G-UNKNOWN:       ; %bb.0:
+; CHECK-G-UNKNOWN-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; CHECK-G-UNKNOWN-NEXT:    v_mov_b32_e32 v0, 0
+; CHECK-G-UNKNOWN-NEXT:    s_wait_kmcnt 0x0
+; CHECK-G-UNKNOWN-NEXT:    global_store_b32 v0, v0, s[0:1]
+; CHECK-G-UNKNOWN-NEXT:    s_endpgm
+;
+; CHECK-G-MESA3D-LABEL: test_workgroup_id_z_optimized:
+; CHECK-G-MESA3D:         .amd_kernel_code_t
+; CHECK-G-MESA3D-NEXT:     amd_code_version_major = 1
+; CHECK-G-MESA3D-NEXT:     amd_code_version_minor = 2
+; CHECK-G-MESA3D-NEXT:     amd_machine_kind = 1
+; CHECK-G-MESA3D-NEXT:     amd_machine_version_major = 12
+; CHECK-G-MESA3D-NEXT:     amd_machine_version_minor = 5
+; CHECK-G-MESA3D-NEXT:     amd_machine_version_stepping = 0
+; CHECK-G-MESA3D-NEXT:     kernel_code_entry_byte_offset = 256
+; CHECK-G-MESA3D-NEXT:     kernel_code_prefetch_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     granulated_workitem_vgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     granulated_wavefront_sgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     priority = 0
+; CHECK-G-MESA3D-NEXT:     float_mode = 240
+; CHECK-G-MESA3D-NEXT:     priv = 0
+; CHECK-G-MESA3D-NEXT:     enable_dx10_clamp = 0
+; CHECK-G-MESA3D-NEXT:     debug_mode = 0
+; CHECK-G-MESA3D-NEXT:     enable_ieee_mode = 0
+; CHECK-G-MESA3D-NEXT:     enable_wgp_mode = 0
+; CHECK-G-MESA3D-NEXT:     enable_mem_ordered = 1
+; CHECK-G-MESA3D-NEXT:     enable_fwd_progress = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
+; CHECK-G-MESA3D-NEXT:     user_sgpr_count = 8
+; CHECK-G-MESA3D-NEXT:     enable_trap_handler = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_id_x = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_id_y = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_id_z = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_info = 0
+; CHECK-G-MESA3D-NEXT:     enable_vgpr_workitem_id = 2
+; CHECK-G-MESA3D-NEXT:     enable_exception_msb = 0
+; CHECK-G-MESA3D-NEXT:     granulated_lds_size = 0
+; CHECK-G-MESA3D-NEXT:     enable_exception = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_private_segment_buffer = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_dispatch_ptr = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_queue_ptr = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_dispatch_id = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_flat_scratch_init = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_private_segment_size = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; CHECK-G-MESA3D-NEXT:     enable_wavefront_size32 = 1
+; CHECK-G-MESA3D-NEXT:     enable_ordered_append_gds = 0
+; CHECK-G-MESA3D-NEXT:     private_element_size = 1
+; CHECK-G-MESA3D-NEXT:     is_ptr64 = 1
+; CHECK-G-MESA3D-NEXT:     is_dynamic_callstack = 0
+; CHECK-G-MESA3D-NEXT:     is_debug_enabled = 0
+; CHECK-G-MESA3D-NEXT:     is_xnack_enabled = 0
+; CHECK-G-MESA3D-NEXT:     workitem_private_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     workgroup_group_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     gds_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     kernarg_segment_byte_size = 24
+; CHECK-G-MESA3D-NEXT:     workgroup_fbarrier_count = 0
+; CHECK-G-MESA3D-NEXT:     wavefront_sgpr_count = 6
+; CHECK-G-MESA3D-NEXT:     workitem_vgpr_count = 1
+; CHECK-G-MESA3D-NEXT:     reserved_vgpr_first = 0
+; CHECK-G-MESA3D-NEXT:     reserved_vgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     reserved_sgpr_first = 0
+; CHECK-G-MESA3D-NEXT:     reserved_sgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; CHECK-G-MESA3D-NEXT:     debug_private_segment_buffer_sgpr = 0
+; CHECK-G-MESA3D-NEXT:     kernarg_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT:     group_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT:     private_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT:     wavefront_size = 5
+; CHECK-G-MESA3D-NEXT:     call_convention = -1
+; CHECK-G-MESA3D-NEXT:     runtime_loader_kernel_symbol = 0
+; CHECK-G-MESA3D-NEXT:    .end_amd_kernel_code_t
+; CHECK-G-MESA3D-NEXT:  ; %bb.0:
+; CHECK-G-MESA3D-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-G-MESA3D-NEXT:    v_mov_b32_e32 v0, 0
+; CHECK-G-MESA3D-NEXT:    s_wait_kmcnt 0x0
+; CHECK-G-MESA3D-NEXT:    global_store_b32 v0, v0, s[0:1]
+; CHECK-G-MESA3D-NEXT:    s_endpgm
+  %id = call i32 @llvm.amdgcn.cluster.workgroup.id.z()
+  store i32 %id, ptr addrspace(1) %out
+  ret void
+}
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cluster.workgroup.max.flat.id.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cluster.workgroup.max.flat.id.ll
new file mode 100644
index 0000000..afe37e3
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cluster.workgroup.max.flat.id.ll
@@ -0,0 +1,194 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1250 %s -o - | FileCheck --check-prefixes=CHECK-UNKNOWN %s
+; RUN: llc -mtriple=amdgcn-unknown-mesa3d -mcpu=gfx1250 %s -o - | FileCheck -check-prefixes=CHECK-MESA3D %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1250 %s -o - | FileCheck --check-prefixes=CHECK-G-UNKNOWN %s
+; RUN: llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mcpu=gfx1250 %s -o - | FileCheck -check-prefixes=CHECK-G-MESA3D %s
+
+declare i32 @llvm.amdgcn.cluster.workgroup.max.flat.id() #0
+
+define amdgpu_kernel void @test_workgroup_max_flat_id(ptr addrspace(1) %out) #1 {
+; CHECK-UNKNOWN-LABEL: test_workgroup_max_flat_id:
+; CHECK-UNKNOWN:       ; %bb.0:
+; CHECK-UNKNOWN-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; CHECK-UNKNOWN-NEXT:    s_bfe_u32 s2, ttmp6, 0x40018
+; CHECK-UNKNOWN-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-UNKNOWN-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; CHECK-UNKNOWN-NEXT:    s_wait_kmcnt 0x0
+; CHECK-UNKNOWN-NEXT:    global_store_b32 v0, v1, s[0:1]
+; CHECK-UNKNOWN-NEXT:    s_endpgm
+;
+; CHECK-MESA3D-LABEL: test_workgroup_max_flat_id:
+; CHECK-MESA3D:         .amd_kernel_code_t
+; CHECK-MESA3D-NEXT:     amd_code_version_major = 1
+; CHECK-MESA3D-NEXT:     amd_code_version_minor = 2
+; CHECK-MESA3D-NEXT:     amd_machine_kind = 1
+; CHECK-MESA3D-NEXT:     amd_machine_version_major = 12
+; CHECK-MESA3D-NEXT:     amd_machine_version_minor = 5
+; CHECK-MESA3D-NEXT:     amd_machine_version_stepping = 0
+; CHECK-MESA3D-NEXT:     kernel_code_entry_byte_offset = 256
+; CHECK-MESA3D-NEXT:     kernel_code_prefetch_byte_size = 0
+; CHECK-MESA3D-NEXT:     granulated_workitem_vgpr_count = 0
+; CHECK-MESA3D-NEXT:     granulated_wavefront_sgpr_count = 0
+; CHECK-MESA3D-NEXT:     priority = 0
+; CHECK-MESA3D-NEXT:     float_mode = 240
+; CHECK-MESA3D-NEXT:     priv = 0
+; CHECK-MESA3D-NEXT:     enable_dx10_clamp = 0
+; CHECK-MESA3D-NEXT:     debug_mode = 0
+; CHECK-MESA3D-NEXT:     enable_ieee_mode = 0
+; CHECK-MESA3D-NEXT:     enable_wgp_mode = 0
+; CHECK-MESA3D-NEXT:     enable_mem_ordered = 1
+; CHECK-MESA3D-NEXT:     enable_fwd_progress = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
+; CHECK-MESA3D-NEXT:     user_sgpr_count = 8
+; CHECK-MESA3D-NEXT:     enable_trap_handler = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_id_x = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_id_y = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_id_z = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_info = 0
+; CHECK-MESA3D-NEXT:     enable_vgpr_workitem_id = 2
+; CHECK-MESA3D-NEXT:     enable_exception_msb = 0
+; CHECK-MESA3D-NEXT:     granulated_lds_size = 0
+; CHECK-MESA3D-NEXT:     enable_exception = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_private_segment_buffer = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_dispatch_ptr = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_queue_ptr = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_dispatch_id = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_flat_scratch_init = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_private_segment_size = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; CHECK-MESA3D-NEXT:     enable_wavefront_size32 = 1
+; CHECK-MESA3D-NEXT:     enable_ordered_append_gds = 0
+; CHECK-MESA3D-NEXT:     private_element_size = 1
+; CHECK-MESA3D-NEXT:     is_ptr64 = 1
+; CHECK-MESA3D-NEXT:     is_dynamic_callstack = 0
+; CHECK-MESA3D-NEXT:     is_debug_enabled = 0
+; CHECK-MESA3D-NEXT:     is_xnack_enabled = 0
+; CHECK-MESA3D-NEXT:     workitem_private_segment_byte_size = 0
+; CHECK-MESA3D-NEXT:     workgroup_group_segment_byte_size = 0
+; CHECK-MESA3D-NEXT:     gds_segment_byte_size = 0
+; CHECK-MESA3D-NEXT:     kernarg_segment_byte_size = 24
+; CHECK-MESA3D-NEXT:     workgroup_fbarrier_count = 0
+; CHECK-MESA3D-NEXT:     wavefront_sgpr_count = 6
+; CHECK-MESA3D-NEXT:     workitem_vgpr_count = 2
+; CHECK-MESA3D-NEXT:     reserved_vgpr_first = 0
+; CHECK-MESA3D-NEXT:     reserved_vgpr_count = 0
+; CHECK-MESA3D-NEXT:     reserved_sgpr_first = 0
+; CHECK-MESA3D-NEXT:     reserved_sgpr_count = 0
+; CHECK-MESA3D-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; CHECK-MESA3D-NEXT:     debug_private_segment_buffer_sgpr = 0
+; CHECK-MESA3D-NEXT:     kernarg_segment_alignment = 4
+; CHECK-MESA3D-NEXT:     group_segment_alignment = 4
+; CHECK-MESA3D-NEXT:     private_segment_alignment = 4
+; CHECK-MESA3D-NEXT:     wavefront_size = 5
+; CHECK-MESA3D-NEXT:     call_convention = -1
+; CHECK-MESA3D-NEXT:     runtime_loader_kernel_symbol = 0
+; CHECK-MESA3D-NEXT:    .end_amd_kernel_code_t
+; CHECK-MESA3D-NEXT:  ; %bb.0:
+; CHECK-MESA3D-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-MESA3D-NEXT:    s_bfe_u32 s2, ttmp6, 0x40018
+; CHECK-MESA3D-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-MESA3D-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; CHECK-MESA3D-NEXT:    s_wait_kmcnt 0x0
+; CHECK-MESA3D-NEXT:    global_store_b32 v0, v1, s[0:1]
+; CHECK-MESA3D-NEXT:    s_endpgm
+;
+; CHECK-G-UNKNOWN-LABEL: test_workgroup_max_flat_id:
+; CHECK-G-UNKNOWN:       ; %bb.0:
+; CHECK-G-UNKNOWN-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; CHECK-G-UNKNOWN-NEXT:    s_bfe_u32 s2, ttmp6, 0x40018
+; CHECK-G-UNKNOWN-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-G-UNKNOWN-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
+; CHECK-G-UNKNOWN-NEXT:    s_wait_kmcnt 0x0
+; CHECK-G-UNKNOWN-NEXT:    global_store_b32 v1, v0, s[0:1]
+; CHECK-G-UNKNOWN-NEXT:    s_endpgm
+;
+; CHECK-G-MESA3D-LABEL: test_workgroup_max_flat_id:
+; CHECK-G-MESA3D:         .amd_kernel_code_t
+; CHECK-G-MESA3D-NEXT:     amd_code_version_major = 1
+; CHECK-G-MESA3D-NEXT:     amd_code_version_minor = 2
+; CHECK-G-MESA3D-NEXT:     amd_machine_kind = 1
+; CHECK-G-MESA3D-NEXT:     amd_machine_version_major = 12
+; CHECK-G-MESA3D-NEXT:     amd_machine_version_minor = 5
+; CHECK-G-MESA3D-NEXT:     amd_machine_version_stepping = 0
+; CHECK-G-MESA3D-NEXT:     kernel_code_entry_byte_offset = 256
+; CHECK-G-MESA3D-NEXT:     kernel_code_prefetch_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     granulated_workitem_vgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     granulated_wavefront_sgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     priority = 0
+; CHECK-G-MESA3D-NEXT:     float_mode = 240
+; CHECK-G-MESA3D-NEXT:     priv = 0
+; CHECK-G-MESA3D-NEXT:     enable_dx10_clamp = 0
+; CHECK-G-MESA3D-NEXT:     debug_mode = 0
+; CHECK-G-MESA3D-NEXT:     enable_ieee_mode = 0
+; CHECK-G-MESA3D-NEXT:     enable_wgp_mode = 0
+; CHECK-G-MESA3D-NEXT:     enable_mem_ordered = 1
+; CHECK-G-MESA3D-NEXT:     enable_fwd_progress = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
+; CHECK-G-MESA3D-NEXT:     user_sgpr_count = 8
+; CHECK-G-MESA3D-NEXT:     enable_trap_handler = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_id_x = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_id_y = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_id_z = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_info = 0
+; CHECK-G-MESA3D-NEXT:     enable_vgpr_workitem_id = 2
+; CHECK-G-MESA3D-NEXT:     enable_exception_msb = 0
+; CHECK-G-MESA3D-NEXT:     granulated_lds_size = 0
+; CHECK-G-MESA3D-NEXT:     enable_exception = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_private_segment_buffer = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_dispatch_ptr = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_queue_ptr = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_dispatch_id = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_flat_scratch_init = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_private_segment_size = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; CHECK-G-MESA3D-NEXT:     enable_wavefront_size32 = 1
+; CHECK-G-MESA3D-NEXT:     enable_ordered_append_gds = 0
+; CHECK-G-MESA3D-NEXT:     private_element_size = 1
+; CHECK-G-MESA3D-NEXT:     is_ptr64 = 1
+; CHECK-G-MESA3D-NEXT:     is_dynamic_callstack = 0
+; CHECK-G-MESA3D-NEXT:     is_debug_enabled = 0
+; CHECK-G-MESA3D-NEXT:     is_xnack_enabled = 0
+; CHECK-G-MESA3D-NEXT:     workitem_private_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     workgroup_group_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     gds_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     kernarg_segment_byte_size = 24
+; CHECK-G-MESA3D-NEXT:     workgroup_fbarrier_count = 0
+; CHECK-G-MESA3D-NEXT:     wavefront_sgpr_count = 6
+; CHECK-G-MESA3D-NEXT:     workitem_vgpr_count = 2
+; CHECK-G-MESA3D-NEXT:     reserved_vgpr_first = 0
+; CHECK-G-MESA3D-NEXT:     reserved_vgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     reserved_sgpr_first = 0
+; CHECK-G-MESA3D-NEXT:     reserved_sgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; CHECK-G-MESA3D-NEXT:     debug_private_segment_buffer_sgpr = 0
+; CHECK-G-MESA3D-NEXT:     kernarg_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT:     group_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT:     private_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT:     wavefront_size = 5
+; CHECK-G-MESA3D-NEXT:     call_convention = -1
+; CHECK-G-MESA3D-NEXT:     runtime_loader_kernel_symbol = 0
+; CHECK-G-MESA3D-NEXT:    .end_amd_kernel_code_t
+; CHECK-G-MESA3D-NEXT:  ; %bb.0:
+; CHECK-G-MESA3D-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-G-MESA3D-NEXT:    s_bfe_u32 s2, ttmp6, 0x40018
+; CHECK-G-MESA3D-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-G-MESA3D-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
+; CHECK-G-MESA3D-NEXT:    s_wait_kmcnt 0x0
+; CHECK-G-MESA3D-NEXT:    global_store_b32 v1, v0, s[0:1]
+; CHECK-G-MESA3D-NEXT:    s_endpgm
+  %id = call i32 @llvm.amdgcn.cluster.workgroup.max.flat.id()
+  store i32 %id, ptr addrspace(1) %out
+  ret void
+}
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cluster.workgroup.max.id.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cluster.workgroup.max.id.ll
new file mode 100644
index 0000000..7ea4fa5
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cluster.workgroup.max.id.ll
@@ -0,0 +1,1077 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1250 %s -o - | FileCheck --check-prefixes=CHECK-UNKNOWN %s
+; RUN: llc -mtriple=amdgcn-unknown-mesa3d -mcpu=gfx1250 %s -o - | FileCheck -check-prefixes=CHECK-MESA3D %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1250 %s -o - | FileCheck --check-prefixes=CHECK-G-UNKNOWN %s
+; RUN: llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mcpu=gfx1250 %s -o - | FileCheck -check-prefixes=CHECK-G-MESA3D %s
+
+declare i32 @llvm.amdgcn.cluster.workgroup.max.id.x() #0
+declare i32 @llvm.amdgcn.cluster.workgroup.max.id.y() #0
+declare i32 @llvm.amdgcn.cluster.workgroup.max.id.z() #0
+
+define amdgpu_kernel void @test_workgroup_max_id_x(ptr addrspace(1) %out) #1 {
+; CHECK-UNKNOWN-LABEL: test_workgroup_max_id_x:
+; CHECK-UNKNOWN:       ; %bb.0:
+; CHECK-UNKNOWN-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; CHECK-UNKNOWN-NEXT:    s_bfe_u32 s2, ttmp6, 0x4000c
+; CHECK-UNKNOWN-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-UNKNOWN-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; CHECK-UNKNOWN-NEXT:    s_wait_kmcnt 0x0
+; CHECK-UNKNOWN-NEXT:    global_store_b32 v0, v1, s[0:1]
+; CHECK-UNKNOWN-NEXT:    s_endpgm
+;
+; CHECK-MESA3D-LABEL: test_workgroup_max_id_x:
+; CHECK-MESA3D:         .amd_kernel_code_t
+; CHECK-MESA3D-NEXT:     amd_code_version_major = 1
+; CHECK-MESA3D-NEXT:     amd_code_version_minor = 2
+; CHECK-MESA3D-NEXT:     amd_machine_kind = 1
+; CHECK-MESA3D-NEXT:     amd_machine_version_major = 12
+; CHECK-MESA3D-NEXT:     amd_machine_version_minor = 5
+; CHECK-MESA3D-NEXT:     amd_machine_version_stepping = 0
+; CHECK-MESA3D-NEXT:     kernel_code_entry_byte_offset = 256
+; CHECK-MESA3D-NEXT:     kernel_code_prefetch_byte_size = 0
+; CHECK-MESA3D-NEXT:     granulated_workitem_vgpr_count = 0
+; CHECK-MESA3D-NEXT:     granulated_wavefront_sgpr_count = 0
+; CHECK-MESA3D-NEXT:     priority = 0
+; CHECK-MESA3D-NEXT:     float_mode = 240
+; CHECK-MESA3D-NEXT:     priv = 0
+; CHECK-MESA3D-NEXT:     enable_dx10_clamp = 0
+; CHECK-MESA3D-NEXT:     debug_mode = 0
+; CHECK-MESA3D-NEXT:     enable_ieee_mode = 0
+; CHECK-MESA3D-NEXT:     enable_wgp_mode = 0
+; CHECK-MESA3D-NEXT:     enable_mem_ordered = 1
+; CHECK-MESA3D-NEXT:     enable_fwd_progress = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
+; CHECK-MESA3D-NEXT:     user_sgpr_count = 8
+; CHECK-MESA3D-NEXT:     enable_trap_handler = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_id_x = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_id_y = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_id_z = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_info = 0
+; CHECK-MESA3D-NEXT:     enable_vgpr_workitem_id = 2
+; CHECK-MESA3D-NEXT:     enable_exception_msb = 0
+; CHECK-MESA3D-NEXT:     granulated_lds_size = 0
+; CHECK-MESA3D-NEXT:     enable_exception = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_private_segment_buffer = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_dispatch_ptr = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_queue_ptr = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_dispatch_id = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_flat_scratch_init = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_private_segment_size = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; CHECK-MESA3D-NEXT:     enable_wavefront_size32 = 1
+; CHECK-MESA3D-NEXT:     enable_ordered_append_gds = 0
+; CHECK-MESA3D-NEXT:     private_element_size = 1
+; CHECK-MESA3D-NEXT:     is_ptr64 = 1
+; CHECK-MESA3D-NEXT:     is_dynamic_callstack = 0
+; CHECK-MESA3D-NEXT:     is_debug_enabled = 0
+; CHECK-MESA3D-NEXT:     is_xnack_enabled = 0
+; CHECK-MESA3D-NEXT:     workitem_private_segment_byte_size = 0
+; CHECK-MESA3D-NEXT:     workgroup_group_segment_byte_size = 0
+; CHECK-MESA3D-NEXT:     gds_segment_byte_size = 0
+; CHECK-MESA3D-NEXT:     kernarg_segment_byte_size = 24
+; CHECK-MESA3D-NEXT:     workgroup_fbarrier_count = 0
+; CHECK-MESA3D-NEXT:     wavefront_sgpr_count = 6
+; CHECK-MESA3D-NEXT:     workitem_vgpr_count = 2
+; CHECK-MESA3D-NEXT:     reserved_vgpr_first = 0
+; CHECK-MESA3D-NEXT:     reserved_vgpr_count = 0
+; CHECK-MESA3D-NEXT:     reserved_sgpr_first = 0
+; CHECK-MESA3D-NEXT:     reserved_sgpr_count = 0
+; CHECK-MESA3D-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; CHECK-MESA3D-NEXT:     debug_private_segment_buffer_sgpr = 0
+; CHECK-MESA3D-NEXT:     kernarg_segment_alignment = 4
+; CHECK-MESA3D-NEXT:     group_segment_alignment = 4
+; CHECK-MESA3D-NEXT:     private_segment_alignment = 4
+; CHECK-MESA3D-NEXT:     wavefront_size = 5
+; CHECK-MESA3D-NEXT:     call_convention = -1
+; CHECK-MESA3D-NEXT:     runtime_loader_kernel_symbol = 0
+; CHECK-MESA3D-NEXT:    .end_amd_kernel_code_t
+; CHECK-MESA3D-NEXT:  ; %bb.0:
+; CHECK-MESA3D-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-MESA3D-NEXT:    s_bfe_u32 s2, ttmp6, 0x4000c
+; CHECK-MESA3D-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-MESA3D-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; CHECK-MESA3D-NEXT:    s_wait_kmcnt 0x0
+; CHECK-MESA3D-NEXT:    global_store_b32 v0, v1, s[0:1]
+; CHECK-MESA3D-NEXT:    s_endpgm
+;
+; CHECK-G-UNKNOWN-LABEL: test_workgroup_max_id_x:
+; CHECK-G-UNKNOWN:       ; %bb.0:
+; CHECK-G-UNKNOWN-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; CHECK-G-UNKNOWN-NEXT:    s_bfe_u32 s2, ttmp6, 0x4000c
+; CHECK-G-UNKNOWN-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-G-UNKNOWN-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
+; CHECK-G-UNKNOWN-NEXT:    s_wait_kmcnt 0x0
+; CHECK-G-UNKNOWN-NEXT:    global_store_b32 v1, v0, s[0:1]
+; CHECK-G-UNKNOWN-NEXT:    s_endpgm
+;
+; CHECK-G-MESA3D-LABEL: test_workgroup_max_id_x:
+; CHECK-G-MESA3D:         .amd_kernel_code_t
+; CHECK-G-MESA3D-NEXT:     amd_code_version_major = 1
+; CHECK-G-MESA3D-NEXT:     amd_code_version_minor = 2
+; CHECK-G-MESA3D-NEXT:     amd_machine_kind = 1
+; CHECK-G-MESA3D-NEXT:     amd_machine_version_major = 12
+; CHECK-G-MESA3D-NEXT:     amd_machine_version_minor = 5
+; CHECK-G-MESA3D-NEXT:     amd_machine_version_stepping = 0
+; CHECK-G-MESA3D-NEXT:     kernel_code_entry_byte_offset = 256
+; CHECK-G-MESA3D-NEXT:     kernel_code_prefetch_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     granulated_workitem_vgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     granulated_wavefront_sgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     priority = 0
+; CHECK-G-MESA3D-NEXT:     float_mode = 240
+; CHECK-G-MESA3D-NEXT:     priv = 0
+; CHECK-G-MESA3D-NEXT:     enable_dx10_clamp = 0
+; CHECK-G-MESA3D-NEXT:     debug_mode = 0
+; CHECK-G-MESA3D-NEXT:     enable_ieee_mode = 0
+; CHECK-G-MESA3D-NEXT:     enable_wgp_mode = 0
+; CHECK-G-MESA3D-NEXT:     enable_mem_ordered = 1
+; CHECK-G-MESA3D-NEXT:     enable_fwd_progress = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
+; CHECK-G-MESA3D-NEXT:     user_sgpr_count = 8
+; CHECK-G-MESA3D-NEXT:     enable_trap_handler = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_id_x = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_id_y = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_id_z = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_info = 0
+; CHECK-G-MESA3D-NEXT:     enable_vgpr_workitem_id = 2
+; CHECK-G-MESA3D-NEXT:     enable_exception_msb = 0
+; CHECK-G-MESA3D-NEXT:     granulated_lds_size = 0
+; CHECK-G-MESA3D-NEXT:     enable_exception = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_private_segment_buffer = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_dispatch_ptr = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_queue_ptr = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_dispatch_id = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_flat_scratch_init = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_private_segment_size = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; CHECK-G-MESA3D-NEXT:     enable_wavefront_size32 = 1
+; CHECK-G-MESA3D-NEXT:     enable_ordered_append_gds = 0
+; CHECK-G-MESA3D-NEXT:     private_element_size = 1
+; CHECK-G-MESA3D-NEXT:     is_ptr64 = 1
+; CHECK-G-MESA3D-NEXT:     is_dynamic_callstack = 0
+; CHECK-G-MESA3D-NEXT:     is_debug_enabled = 0
+; CHECK-G-MESA3D-NEXT:     is_xnack_enabled = 0
+; CHECK-G-MESA3D-NEXT:     workitem_private_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     workgroup_group_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     gds_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     kernarg_segment_byte_size = 24
+; CHECK-G-MESA3D-NEXT:     workgroup_fbarrier_count = 0
+; CHECK-G-MESA3D-NEXT:     wavefront_sgpr_count = 6
+; CHECK-G-MESA3D-NEXT:     workitem_vgpr_count = 2
+; CHECK-G-MESA3D-NEXT:     reserved_vgpr_first = 0
+; CHECK-G-MESA3D-NEXT:     reserved_vgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     reserved_sgpr_first = 0
+; CHECK-G-MESA3D-NEXT:     reserved_sgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; CHECK-G-MESA3D-NEXT:     debug_private_segment_buffer_sgpr = 0
+; CHECK-G-MESA3D-NEXT:     kernarg_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT:     group_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT:     private_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT:     wavefront_size = 5
+; CHECK-G-MESA3D-NEXT:     call_convention = -1
+; CHECK-G-MESA3D-NEXT:     runtime_loader_kernel_symbol = 0
+; CHECK-G-MESA3D-NEXT:    .end_amd_kernel_code_t
+; CHECK-G-MESA3D-NEXT:  ; %bb.0:
+; CHECK-G-MESA3D-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-G-MESA3D-NEXT:    s_bfe_u32 s2, ttmp6, 0x4000c
+; CHECK-G-MESA3D-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-G-MESA3D-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
+; CHECK-G-MESA3D-NEXT:    s_wait_kmcnt 0x0
+; CHECK-G-MESA3D-NEXT:    global_store_b32 v1, v0, s[0:1]
+; CHECK-G-MESA3D-NEXT:    s_endpgm
+  %id = call i32 @llvm.amdgcn.cluster.workgroup.max.id.x()
+  store i32 %id, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @test_workgroup_max_id_x_optimized(ptr addrspace(1) %out) "amdgpu-cluster-dims"="5,6,7" {
+; CHECK-UNKNOWN-LABEL: test_workgroup_max_id_x_optimized:
+; CHECK-UNKNOWN:       ; %bb.0:
+; CHECK-UNKNOWN-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; CHECK-UNKNOWN-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 4
+; CHECK-UNKNOWN-NEXT:    s_wait_kmcnt 0x0
+; CHECK-UNKNOWN-NEXT:    global_store_b32 v0, v1, s[0:1]
+; CHECK-UNKNOWN-NEXT:    s_endpgm
+;
+; CHECK-MESA3D-LABEL: test_workgroup_max_id_x_optimized:
+; CHECK-MESA3D:         .amd_kernel_code_t
+; CHECK-MESA3D-NEXT:     amd_code_version_major = 1
+; CHECK-MESA3D-NEXT:     amd_code_version_minor = 2
+; CHECK-MESA3D-NEXT:     amd_machine_kind = 1
+; CHECK-MESA3D-NEXT:     amd_machine_version_major = 12
+; CHECK-MESA3D-NEXT:     amd_machine_version_minor = 5
+; CHECK-MESA3D-NEXT:     amd_machine_version_stepping = 0
+; CHECK-MESA3D-NEXT:     kernel_code_entry_byte_offset = 256
+; CHECK-MESA3D-NEXT:     kernel_code_prefetch_byte_size = 0
+; CHECK-MESA3D-NEXT:     granulated_workitem_vgpr_count = 0
+; CHECK-MESA3D-NEXT:     granulated_wavefront_sgpr_count = 0
+; CHECK-MESA3D-NEXT:     priority = 0
+; CHECK-MESA3D-NEXT:     float_mode = 240
+; CHECK-MESA3D-NEXT:     priv = 0
+; CHECK-MESA3D-NEXT:     enable_dx10_clamp = 0
+; CHECK-MESA3D-NEXT:     debug_mode = 0
+; CHECK-MESA3D-NEXT:     enable_ieee_mode = 0
+; CHECK-MESA3D-NEXT:     enable_wgp_mode = 0
+; CHECK-MESA3D-NEXT:     enable_mem_ordered = 1
+; CHECK-MESA3D-NEXT:     enable_fwd_progress = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
+; CHECK-MESA3D-NEXT:     user_sgpr_count = 8
+; CHECK-MESA3D-NEXT:     enable_trap_handler = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_id_x = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_id_y = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_id_z = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_info = 0
+; CHECK-MESA3D-NEXT:     enable_vgpr_workitem_id = 2
+; CHECK-MESA3D-NEXT:     enable_exception_msb = 0
+; CHECK-MESA3D-NEXT:     granulated_lds_size = 0
+; CHECK-MESA3D-NEXT:     enable_exception = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_private_segment_buffer = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_dispatch_ptr = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_queue_ptr = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_dispatch_id = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_flat_scratch_init = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_private_segment_size = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; CHECK-MESA3D-NEXT:     enable_wavefront_size32 = 1
+; CHECK-MESA3D-NEXT:     enable_ordered_append_gds = 0
+; CHECK-MESA3D-NEXT:     private_element_size = 1
+; CHECK-MESA3D-NEXT:     is_ptr64 = 1
+; CHECK-MESA3D-NEXT:     is_dynamic_callstack = 0
+; CHECK-MESA3D-NEXT:     is_debug_enabled = 0
+; CHECK-MESA3D-NEXT:     is_xnack_enabled = 0
+; CHECK-MESA3D-NEXT:     workitem_private_segment_byte_size = 0
+; CHECK-MESA3D-NEXT:     workgroup_group_segment_byte_size = 0
+; CHECK-MESA3D-NEXT:     gds_segment_byte_size = 0
+; CHECK-MESA3D-NEXT:     kernarg_segment_byte_size = 24
+; CHECK-MESA3D-NEXT:     workgroup_fbarrier_count = 0
+; CHECK-MESA3D-NEXT:     wavefront_sgpr_count = 6
+; CHECK-MESA3D-NEXT:     workitem_vgpr_count = 2
+; CHECK-MESA3D-NEXT:     reserved_vgpr_first = 0
+; CHECK-MESA3D-NEXT:     reserved_vgpr_count = 0
+; CHECK-MESA3D-NEXT:     reserved_sgpr_first = 0
+; CHECK-MESA3D-NEXT:     reserved_sgpr_count = 0
+; CHECK-MESA3D-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; CHECK-MESA3D-NEXT:     debug_private_segment_buffer_sgpr = 0
+; CHECK-MESA3D-NEXT:     kernarg_segment_alignment = 4
+; CHECK-MESA3D-NEXT:     group_segment_alignment = 4
+; CHECK-MESA3D-NEXT:     private_segment_alignment = 4
+; CHECK-MESA3D-NEXT:     wavefront_size = 5
+; CHECK-MESA3D-NEXT:     call_convention = -1
+; CHECK-MESA3D-NEXT:     runtime_loader_kernel_symbol = 0
+; CHECK-MESA3D-NEXT:    .end_amd_kernel_code_t
+; CHECK-MESA3D-NEXT:  ; %bb.0:
+; CHECK-MESA3D-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-MESA3D-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 4
+; CHECK-MESA3D-NEXT:    s_wait_kmcnt 0x0
+; CHECK-MESA3D-NEXT:    global_store_b32 v0, v1, s[0:1]
+; CHECK-MESA3D-NEXT:    s_endpgm
+;
+; CHECK-G-UNKNOWN-LABEL: test_workgroup_max_id_x_optimized:
+; CHECK-G-UNKNOWN:       ; %bb.0:
+; CHECK-G-UNKNOWN-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; CHECK-G-UNKNOWN-NEXT:    v_dual_mov_b32 v0, 4 :: v_dual_mov_b32 v1, 0
+; CHECK-G-UNKNOWN-NEXT:    s_wait_kmcnt 0x0
+; CHECK-G-UNKNOWN-NEXT:    global_store_b32 v1, v0, s[0:1]
+; CHECK-G-UNKNOWN-NEXT:    s_endpgm
+;
+; CHECK-G-MESA3D-LABEL: test_workgroup_max_id_x_optimized:
+; CHECK-G-MESA3D:         .amd_kernel_code_t
+; CHECK-G-MESA3D-NEXT:     amd_code_version_major = 1
+; CHECK-G-MESA3D-NEXT:     amd_code_version_minor = 2
+; CHECK-G-MESA3D-NEXT:     amd_machine_kind = 1
+; CHECK-G-MESA3D-NEXT:     amd_machine_version_major = 12
+; CHECK-G-MESA3D-NEXT:     amd_machine_version_minor = 5
+; CHECK-G-MESA3D-NEXT:     amd_machine_version_stepping = 0
+; CHECK-G-MESA3D-NEXT:     kernel_code_entry_byte_offset = 256
+; CHECK-G-MESA3D-NEXT:     kernel_code_prefetch_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     granulated_workitem_vgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     granulated_wavefront_sgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     priority = 0
+; CHECK-G-MESA3D-NEXT:     float_mode = 240
+; CHECK-G-MESA3D-NEXT:     priv = 0
+; CHECK-G-MESA3D-NEXT:     enable_dx10_clamp = 0
+; CHECK-G-MESA3D-NEXT:     debug_mode = 0
+; CHECK-G-MESA3D-NEXT:     enable_ieee_mode = 0
+; CHECK-G-MESA3D-NEXT:     enable_wgp_mode = 0
+; CHECK-G-MESA3D-NEXT:     enable_mem_ordered = 1
+; CHECK-G-MESA3D-NEXT:     enable_fwd_progress = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
+; CHECK-G-MESA3D-NEXT:     user_sgpr_count = 8
+; CHECK-G-MESA3D-NEXT:     enable_trap_handler = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_id_x = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_id_y = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_id_z = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_info = 0
+; CHECK-G-MESA3D-NEXT:     enable_vgpr_workitem_id = 2
+; CHECK-G-MESA3D-NEXT:     enable_exception_msb = 0
+; CHECK-G-MESA3D-NEXT:     granulated_lds_size = 0
+; CHECK-G-MESA3D-NEXT:     enable_exception = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_private_segment_buffer = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_dispatch_ptr = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_queue_ptr = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_dispatch_id = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_flat_scratch_init = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_private_segment_size = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; CHECK-G-MESA3D-NEXT:     enable_wavefront_size32 = 1
+; CHECK-G-MESA3D-NEXT:     enable_ordered_append_gds = 0
+; CHECK-G-MESA3D-NEXT:     private_element_size = 1
+; CHECK-G-MESA3D-NEXT:     is_ptr64 = 1
+; CHECK-G-MESA3D-NEXT:     is_dynamic_callstack = 0
+; CHECK-G-MESA3D-NEXT:     is_debug_enabled = 0
+; CHECK-G-MESA3D-NEXT:     is_xnack_enabled = 0
+; CHECK-G-MESA3D-NEXT:     workitem_private_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     workgroup_group_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     gds_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     kernarg_segment_byte_size = 24
+; CHECK-G-MESA3D-NEXT:     workgroup_fbarrier_count = 0
+; CHECK-G-MESA3D-NEXT:     wavefront_sgpr_count = 6
+; CHECK-G-MESA3D-NEXT:     workitem_vgpr_count = 2
+; CHECK-G-MESA3D-NEXT:     reserved_vgpr_first = 0
+; CHECK-G-MESA3D-NEXT:     reserved_vgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     reserved_sgpr_first = 0
+; CHECK-G-MESA3D-NEXT:     reserved_sgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; CHECK-G-MESA3D-NEXT:     debug_private_segment_buffer_sgpr = 0
+; CHECK-G-MESA3D-NEXT:     kernarg_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT:     group_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT:     private_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT:     wavefront_size = 5
+; CHECK-G-MESA3D-NEXT:     call_convention = -1
+; CHECK-G-MESA3D-NEXT:     runtime_loader_kernel_symbol = 0
+; CHECK-G-MESA3D-NEXT:    .end_amd_kernel_code_t
+; CHECK-G-MESA3D-NEXT:  ; %bb.0:
+; CHECK-G-MESA3D-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-G-MESA3D-NEXT:    v_dual_mov_b32 v0, 4 :: v_dual_mov_b32 v1, 0
+; CHECK-G-MESA3D-NEXT:    s_wait_kmcnt 0x0
+; CHECK-G-MESA3D-NEXT:    global_store_b32 v1, v0, s[0:1]
+; CHECK-G-MESA3D-NEXT:    s_endpgm
+  %id = call i32 @llvm.amdgcn.cluster.workgroup.max.id.x()
+  store i32 %id, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @test_workgroup_max_id_y(ptr addrspace(1) %out) #1 {
+; CHECK-UNKNOWN-LABEL: test_workgroup_max_id_y:
+; CHECK-UNKNOWN:       ; %bb.0:
+; CHECK-UNKNOWN-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; CHECK-UNKNOWN-NEXT:    s_bfe_u32 s2, ttmp6, 0x40010
+; CHECK-UNKNOWN-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-UNKNOWN-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; CHECK-UNKNOWN-NEXT:    s_wait_kmcnt 0x0
+; CHECK-UNKNOWN-NEXT:    global_store_b32 v0, v1, s[0:1]
+; CHECK-UNKNOWN-NEXT:    s_endpgm
+;
+; CHECK-MESA3D-LABEL: test_workgroup_max_id_y:
+; CHECK-MESA3D:         .amd_kernel_code_t
+; CHECK-MESA3D-NEXT:     amd_code_version_major = 1
+; CHECK-MESA3D-NEXT:     amd_code_version_minor = 2
+; CHECK-MESA3D-NEXT:     amd_machine_kind = 1
+; CHECK-MESA3D-NEXT:     amd_machine_version_major = 12
+; CHECK-MESA3D-NEXT:     amd_machine_version_minor = 5
+; CHECK-MESA3D-NEXT:     amd_machine_version_stepping = 0
+; CHECK-MESA3D-NEXT:     kernel_code_entry_byte_offset = 256
+; CHECK-MESA3D-NEXT:     kernel_code_prefetch_byte_size = 0
+; CHECK-MESA3D-NEXT:     granulated_workitem_vgpr_count = 0
+; CHECK-MESA3D-NEXT:     granulated_wavefront_sgpr_count = 0
+; CHECK-MESA3D-NEXT:     priority = 0
+; CHECK-MESA3D-NEXT:     float_mode = 240
+; CHECK-MESA3D-NEXT:     priv = 0
+; CHECK-MESA3D-NEXT:     enable_dx10_clamp = 0
+; CHECK-MESA3D-NEXT:     debug_mode = 0
+; CHECK-MESA3D-NEXT:     enable_ieee_mode = 0
+; CHECK-MESA3D-NEXT:     enable_wgp_mode = 0
+; CHECK-MESA3D-NEXT:     enable_mem_ordered = 1
+; CHECK-MESA3D-NEXT:     enable_fwd_progress = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
+; CHECK-MESA3D-NEXT:     user_sgpr_count = 8
+; CHECK-MESA3D-NEXT:     enable_trap_handler = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_id_x = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_id_y = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_id_z = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_info = 0
+; CHECK-MESA3D-NEXT:     enable_vgpr_workitem_id = 2
+; CHECK-MESA3D-NEXT:     enable_exception_msb = 0
+; CHECK-MESA3D-NEXT:     granulated_lds_size = 0
+; CHECK-MESA3D-NEXT:     enable_exception = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_private_segment_buffer = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_dispatch_ptr = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_queue_ptr = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_dispatch_id = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_flat_scratch_init = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_private_segment_size = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; CHECK-MESA3D-NEXT:     enable_wavefront_size32 = 1
+; CHECK-MESA3D-NEXT:     enable_ordered_append_gds = 0
+; CHECK-MESA3D-NEXT:     private_element_size = 1
+; CHECK-MESA3D-NEXT:     is_ptr64 = 1
+; CHECK-MESA3D-NEXT:     is_dynamic_callstack = 0
+; CHECK-MESA3D-NEXT:     is_debug_enabled = 0
+; CHECK-MESA3D-NEXT:     is_xnack_enabled = 0
+; CHECK-MESA3D-NEXT:     workitem_private_segment_byte_size = 0
+; CHECK-MESA3D-NEXT:     workgroup_group_segment_byte_size = 0
+; CHECK-MESA3D-NEXT:     gds_segment_byte_size = 0
+; CHECK-MESA3D-NEXT:     kernarg_segment_byte_size = 24
+; CHECK-MESA3D-NEXT:     workgroup_fbarrier_count = 0
+; CHECK-MESA3D-NEXT:     wavefront_sgpr_count = 6
+; CHECK-MESA3D-NEXT:     workitem_vgpr_count = 2
+; CHECK-MESA3D-NEXT:     reserved_vgpr_first = 0
+; CHECK-MESA3D-NEXT:     reserved_vgpr_count = 0
+; CHECK-MESA3D-NEXT:     reserved_sgpr_first = 0
+; CHECK-MESA3D-NEXT:     reserved_sgpr_count = 0
+; CHECK-MESA3D-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; CHECK-MESA3D-NEXT:     debug_private_segment_buffer_sgpr = 0
+; CHECK-MESA3D-NEXT:     kernarg_segment_alignment = 4
+; CHECK-MESA3D-NEXT:     group_segment_alignment = 4
+; CHECK-MESA3D-NEXT:     private_segment_alignment = 4
+; CHECK-MESA3D-NEXT:     wavefront_size = 5
+; CHECK-MESA3D-NEXT:     call_convention = -1
+; CHECK-MESA3D-NEXT:     runtime_loader_kernel_symbol = 0
+; CHECK-MESA3D-NEXT:    .end_amd_kernel_code_t
+; CHECK-MESA3D-NEXT:  ; %bb.0:
+; CHECK-MESA3D-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-MESA3D-NEXT:    s_bfe_u32 s2, ttmp6, 0x40010
+; CHECK-MESA3D-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-MESA3D-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; CHECK-MESA3D-NEXT:    s_wait_kmcnt 0x0
+; CHECK-MESA3D-NEXT:    global_store_b32 v0, v1, s[0:1]
+; CHECK-MESA3D-NEXT:    s_endpgm
+;
+; CHECK-G-UNKNOWN-LABEL: test_workgroup_max_id_y:
+; CHECK-G-UNKNOWN:       ; %bb.0:
+; CHECK-G-UNKNOWN-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; CHECK-G-UNKNOWN-NEXT:    s_bfe_u32 s2, ttmp6, 0x40010
+; CHECK-G-UNKNOWN-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-G-UNKNOWN-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
+; CHECK-G-UNKNOWN-NEXT:    s_wait_kmcnt 0x0
+; CHECK-G-UNKNOWN-NEXT:    global_store_b32 v1, v0, s[0:1]
+; CHECK-G-UNKNOWN-NEXT:    s_endpgm
+;
+; CHECK-G-MESA3D-LABEL: test_workgroup_max_id_y:
+; CHECK-G-MESA3D:         .amd_kernel_code_t
+; CHECK-G-MESA3D-NEXT:     amd_code_version_major = 1
+; CHECK-G-MESA3D-NEXT:     amd_code_version_minor = 2
+; CHECK-G-MESA3D-NEXT:     amd_machine_kind = 1
+; CHECK-G-MESA3D-NEXT:     amd_machine_version_major = 12
+; CHECK-G-MESA3D-NEXT:     amd_machine_version_minor = 5
+; CHECK-G-MESA3D-NEXT:     amd_machine_version_stepping = 0
+; CHECK-G-MESA3D-NEXT:     kernel_code_entry_byte_offset = 256
+; CHECK-G-MESA3D-NEXT:     kernel_code_prefetch_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     granulated_workitem_vgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     granulated_wavefront_sgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     priority = 0
+; CHECK-G-MESA3D-NEXT:     float_mode = 240
+; CHECK-G-MESA3D-NEXT:     priv = 0
+; CHECK-G-MESA3D-NEXT:     enable_dx10_clamp = 0
+; CHECK-G-MESA3D-NEXT:     debug_mode = 0
+; CHECK-G-MESA3D-NEXT:     enable_ieee_mode = 0
+; CHECK-G-MESA3D-NEXT:     enable_wgp_mode = 0
+; CHECK-G-MESA3D-NEXT:     enable_mem_ordered = 1
+; CHECK-G-MESA3D-NEXT:     enable_fwd_progress = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
+; CHECK-G-MESA3D-NEXT:     user_sgpr_count = 8
+; CHECK-G-MESA3D-NEXT:     enable_trap_handler = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_id_x = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_id_y = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_id_z = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_info = 0
+; CHECK-G-MESA3D-NEXT:     enable_vgpr_workitem_id = 2
+; CHECK-G-MESA3D-NEXT:     enable_exception_msb = 0
+; CHECK-G-MESA3D-NEXT:     granulated_lds_size = 0
+; CHECK-G-MESA3D-NEXT:     enable_exception = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_private_segment_buffer = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_dispatch_ptr = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_queue_ptr = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_dispatch_id = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_flat_scratch_init = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_private_segment_size = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; CHECK-G-MESA3D-NEXT:     enable_wavefront_size32 = 1
+; CHECK-G-MESA3D-NEXT:     enable_ordered_append_gds = 0
+; CHECK-G-MESA3D-NEXT:     private_element_size = 1
+; CHECK-G-MESA3D-NEXT:     is_ptr64 = 1
+; CHECK-G-MESA3D-NEXT:     is_dynamic_callstack = 0
+; CHECK-G-MESA3D-NEXT:     is_debug_enabled = 0
+; CHECK-G-MESA3D-NEXT:     is_xnack_enabled = 0
+; CHECK-G-MESA3D-NEXT:     workitem_private_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     workgroup_group_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     gds_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     kernarg_segment_byte_size = 24
+; CHECK-G-MESA3D-NEXT:     workgroup_fbarrier_count = 0
+; CHECK-G-MESA3D-NEXT:     wavefront_sgpr_count = 6
+; CHECK-G-MESA3D-NEXT:     workitem_vgpr_count = 2
+; CHECK-G-MESA3D-NEXT:     reserved_vgpr_first = 0
+; CHECK-G-MESA3D-NEXT:     reserved_vgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     reserved_sgpr_first = 0
+; CHECK-G-MESA3D-NEXT:     reserved_sgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; CHECK-G-MESA3D-NEXT:     debug_private_segment_buffer_sgpr = 0
+; CHECK-G-MESA3D-NEXT:     kernarg_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT:     group_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT:     private_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT:     wavefront_size = 5
+; CHECK-G-MESA3D-NEXT:     call_convention = -1
+; CHECK-G-MESA3D-NEXT:     runtime_loader_kernel_symbol = 0
+; CHECK-G-MESA3D-NEXT:    .end_amd_kernel_code_t
+; CHECK-G-MESA3D-NEXT:  ; %bb.0:
+; CHECK-G-MESA3D-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-G-MESA3D-NEXT:    s_bfe_u32 s2, ttmp6, 0x40010
+; CHECK-G-MESA3D-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-G-MESA3D-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
+; CHECK-G-MESA3D-NEXT:    s_wait_kmcnt 0x0
+; CHECK-G-MESA3D-NEXT:    global_store_b32 v1, v0, s[0:1]
+; CHECK-G-MESA3D-NEXT:    s_endpgm
+  %id = call i32 @llvm.amdgcn.cluster.workgroup.max.id.y()
+  store i32 %id, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @test_workgroup_max_id_y_optimized(ptr addrspace(1) %out) "amdgpu-cluster-dims"="5,6,7" {
+; CHECK-UNKNOWN-LABEL: test_workgroup_max_id_y_optimized:
+; CHECK-UNKNOWN:       ; %bb.0:
+; CHECK-UNKNOWN-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; CHECK-UNKNOWN-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 5
+; CHECK-UNKNOWN-NEXT:    s_wait_kmcnt 0x0
+; CHECK-UNKNOWN-NEXT:    global_store_b32 v0, v1, s[0:1]
+; CHECK-UNKNOWN-NEXT:    s_endpgm
+;
+; CHECK-MESA3D-LABEL: test_workgroup_max_id_y_optimized:
+; CHECK-MESA3D:         .amd_kernel_code_t
+; CHECK-MESA3D-NEXT:     amd_code_version_major = 1
+; CHECK-MESA3D-NEXT:     amd_code_version_minor = 2
+; CHECK-MESA3D-NEXT:     amd_machine_kind = 1
+; CHECK-MESA3D-NEXT:     amd_machine_version_major = 12
+; CHECK-MESA3D-NEXT:     amd_machine_version_minor = 5
+; CHECK-MESA3D-NEXT:     amd_machine_version_stepping = 0
+; CHECK-MESA3D-NEXT:     kernel_code_entry_byte_offset = 256
+; CHECK-MESA3D-NEXT:     kernel_code_prefetch_byte_size = 0
+; CHECK-MESA3D-NEXT:     granulated_workitem_vgpr_count = 0
+; CHECK-MESA3D-NEXT:     granulated_wavefront_sgpr_count = 0
+; CHECK-MESA3D-NEXT:     priority = 0
+; CHECK-MESA3D-NEXT:     float_mode = 240
+; CHECK-MESA3D-NEXT:     priv = 0
+; CHECK-MESA3D-NEXT:     enable_dx10_clamp = 0
+; CHECK-MESA3D-NEXT:     debug_mode = 0
+; CHECK-MESA3D-NEXT:     enable_ieee_mode = 0
+; CHECK-MESA3D-NEXT:     enable_wgp_mode = 0
+; CHECK-MESA3D-NEXT:     enable_mem_ordered = 1
+; CHECK-MESA3D-NEXT:     enable_fwd_progress = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
+; CHECK-MESA3D-NEXT:     user_sgpr_count = 8
+; CHECK-MESA3D-NEXT:     enable_trap_handler = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_id_x = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_id_y = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_id_z = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_info = 0
+; CHECK-MESA3D-NEXT:     enable_vgpr_workitem_id = 2
+; CHECK-MESA3D-NEXT:     enable_exception_msb = 0
+; CHECK-MESA3D-NEXT:     granulated_lds_size = 0
+; CHECK-MESA3D-NEXT:     enable_exception = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_private_segment_buffer = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_dispatch_ptr = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_queue_ptr = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_dispatch_id = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_flat_scratch_init = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_private_segment_size = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; CHECK-MESA3D-NEXT:     enable_wavefront_size32 = 1
+; CHECK-MESA3D-NEXT:     enable_ordered_append_gds = 0
+; CHECK-MESA3D-NEXT:     private_element_size = 1
+; CHECK-MESA3D-NEXT:     is_ptr64 = 1
+; CHECK-MESA3D-NEXT:     is_dynamic_callstack = 0
+; CHECK-MESA3D-NEXT:     is_debug_enabled = 0
+; CHECK-MESA3D-NEXT:     is_xnack_enabled = 0
+; CHECK-MESA3D-NEXT:     workitem_private_segment_byte_size = 0
+; CHECK-MESA3D-NEXT:     workgroup_group_segment_byte_size = 0
+; CHECK-MESA3D-NEXT:     gds_segment_byte_size = 0
+; CHECK-MESA3D-NEXT:     kernarg_segment_byte_size = 24
+; CHECK-MESA3D-NEXT:     workgroup_fbarrier_count = 0
+; CHECK-MESA3D-NEXT:     wavefront_sgpr_count = 6
+; CHECK-MESA3D-NEXT:     workitem_vgpr_count = 2
+; CHECK-MESA3D-NEXT:     reserved_vgpr_first = 0
+; CHECK-MESA3D-NEXT:     reserved_vgpr_count = 0
+; CHECK-MESA3D-NEXT:     reserved_sgpr_first = 0
+; CHECK-MESA3D-NEXT:     reserved_sgpr_count = 0
+; CHECK-MESA3D-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; CHECK-MESA3D-NEXT:     debug_private_segment_buffer_sgpr = 0
+; CHECK-MESA3D-NEXT:     kernarg_segment_alignment = 4
+; CHECK-MESA3D-NEXT:     group_segment_alignment = 4
+; CHECK-MESA3D-NEXT:     private_segment_alignment = 4
+; CHECK-MESA3D-NEXT:     wavefront_size = 5
+; CHECK-MESA3D-NEXT:     call_convention = -1
+; CHECK-MESA3D-NEXT:     runtime_loader_kernel_symbol = 0
+; CHECK-MESA3D-NEXT:    .end_amd_kernel_code_t
+; CHECK-MESA3D-NEXT:  ; %bb.0:
+; CHECK-MESA3D-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-MESA3D-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 5
+; CHECK-MESA3D-NEXT:    s_wait_kmcnt 0x0
+; CHECK-MESA3D-NEXT:    global_store_b32 v0, v1, s[0:1]
+; CHECK-MESA3D-NEXT:    s_endpgm
+;
+; CHECK-G-UNKNOWN-LABEL: test_workgroup_max_id_y_optimized:
+; CHECK-G-UNKNOWN:       ; %bb.0:
+; CHECK-G-UNKNOWN-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; CHECK-G-UNKNOWN-NEXT:    v_dual_mov_b32 v0, 5 :: v_dual_mov_b32 v1, 0
+; CHECK-G-UNKNOWN-NEXT:    s_wait_kmcnt 0x0
+; CHECK-G-UNKNOWN-NEXT:    global_store_b32 v1, v0, s[0:1]
+; CHECK-G-UNKNOWN-NEXT:    s_endpgm
+;
+; CHECK-G-MESA3D-LABEL: test_workgroup_max_id_y_optimized:
+; CHECK-G-MESA3D:         .amd_kernel_code_t
+; CHECK-G-MESA3D-NEXT:     amd_code_version_major = 1
+; CHECK-G-MESA3D-NEXT:     amd_code_version_minor = 2
+; CHECK-G-MESA3D-NEXT:     amd_machine_kind = 1
+; CHECK-G-MESA3D-NEXT:     amd_machine_version_major = 12
+; CHECK-G-MESA3D-NEXT:     amd_machine_version_minor = 5
+; CHECK-G-MESA3D-NEXT:     amd_machine_version_stepping = 0
+; CHECK-G-MESA3D-NEXT:     kernel_code_entry_byte_offset = 256
+; CHECK-G-MESA3D-NEXT:     kernel_code_prefetch_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     granulated_workitem_vgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     granulated_wavefront_sgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     priority = 0
+; CHECK-G-MESA3D-NEXT:     float_mode = 240
+; CHECK-G-MESA3D-NEXT:     priv = 0
+; CHECK-G-MESA3D-NEXT:     enable_dx10_clamp = 0
+; CHECK-G-MESA3D-NEXT:     debug_mode = 0
+; CHECK-G-MESA3D-NEXT:     enable_ieee_mode = 0
+; CHECK-G-MESA3D-NEXT:     enable_wgp_mode = 0
+; CHECK-G-MESA3D-NEXT:     enable_mem_ordered = 1
+; CHECK-G-MESA3D-NEXT:     enable_fwd_progress = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
+; CHECK-G-MESA3D-NEXT:     user_sgpr_count = 8
+; CHECK-G-MESA3D-NEXT:     enable_trap_handler = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_id_x = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_id_y = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_id_z = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_info = 0
+; CHECK-G-MESA3D-NEXT:     enable_vgpr_workitem_id = 2
+; CHECK-G-MESA3D-NEXT:     enable_exception_msb = 0
+; CHECK-G-MESA3D-NEXT:     granulated_lds_size = 0
+; CHECK-G-MESA3D-NEXT:     enable_exception = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_private_segment_buffer = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_dispatch_ptr = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_queue_ptr = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_dispatch_id = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_flat_scratch_init = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_private_segment_size = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; CHECK-G-MESA3D-NEXT:     enable_wavefront_size32 = 1
+; CHECK-G-MESA3D-NEXT:     enable_ordered_append_gds = 0
+; CHECK-G-MESA3D-NEXT:     private_element_size = 1
+; CHECK-G-MESA3D-NEXT:     is_ptr64 = 1
+; CHECK-G-MESA3D-NEXT:     is_dynamic_callstack = 0
+; CHECK-G-MESA3D-NEXT:     is_debug_enabled = 0
+; CHECK-G-MESA3D-NEXT:     is_xnack_enabled = 0
+; CHECK-G-MESA3D-NEXT:     workitem_private_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     workgroup_group_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     gds_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     kernarg_segment_byte_size = 24
+; CHECK-G-MESA3D-NEXT:     workgroup_fbarrier_count = 0
+; CHECK-G-MESA3D-NEXT:     wavefront_sgpr_count = 6
+; CHECK-G-MESA3D-NEXT:     workitem_vgpr_count = 2
+; CHECK-G-MESA3D-NEXT:     reserved_vgpr_first = 0
+; CHECK-G-MESA3D-NEXT:     reserved_vgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     reserved_sgpr_first = 0
+; CHECK-G-MESA3D-NEXT:     reserved_sgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; CHECK-G-MESA3D-NEXT:     debug_private_segment_buffer_sgpr = 0
+; CHECK-G-MESA3D-NEXT:     kernarg_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT:     group_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT:     private_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT:     wavefront_size = 5
+; CHECK-G-MESA3D-NEXT:     call_convention = -1
+; CHECK-G-MESA3D-NEXT:     runtime_loader_kernel_symbol = 0
+; CHECK-G-MESA3D-NEXT:    .end_amd_kernel_code_t
+; CHECK-G-MESA3D-NEXT:  ; %bb.0:
+; CHECK-G-MESA3D-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-G-MESA3D-NEXT:    v_dual_mov_b32 v0, 5 :: v_dual_mov_b32 v1, 0
+; CHECK-G-MESA3D-NEXT:    s_wait_kmcnt 0x0
+; CHECK-G-MESA3D-NEXT:    global_store_b32 v1, v0, s[0:1]
+; CHECK-G-MESA3D-NEXT:    s_endpgm
+  %id = call i32 @llvm.amdgcn.cluster.workgroup.max.id.y()
+  store i32 %id, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @test_workgroup_max_id_z(ptr addrspace(1) %out) #1 {
+; CHECK-UNKNOWN-LABEL: test_workgroup_max_id_z:
+; CHECK-UNKNOWN:       ; %bb.0:
+; CHECK-UNKNOWN-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; CHECK-UNKNOWN-NEXT:    s_bfe_u32 s2, ttmp6, 0x40014
+; CHECK-UNKNOWN-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-UNKNOWN-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; CHECK-UNKNOWN-NEXT:    s_wait_kmcnt 0x0
+; CHECK-UNKNOWN-NEXT:    global_store_b32 v0, v1, s[0:1]
+; CHECK-UNKNOWN-NEXT:    s_endpgm
+;
+; CHECK-MESA3D-LABEL: test_workgroup_max_id_z:
+; CHECK-MESA3D:         .amd_kernel_code_t
+; CHECK-MESA3D-NEXT:     amd_code_version_major = 1
+; CHECK-MESA3D-NEXT:     amd_code_version_minor = 2
+; CHECK-MESA3D-NEXT:     amd_machine_kind = 1
+; CHECK-MESA3D-NEXT:     amd_machine_version_major = 12
+; CHECK-MESA3D-NEXT:     amd_machine_version_minor = 5
+; CHECK-MESA3D-NEXT:     amd_machine_version_stepping = 0
+; CHECK-MESA3D-NEXT:     kernel_code_entry_byte_offset = 256
+; CHECK-MESA3D-NEXT:     kernel_code_prefetch_byte_size = 0
+; CHECK-MESA3D-NEXT:     granulated_workitem_vgpr_count = 0
+; CHECK-MESA3D-NEXT:     granulated_wavefront_sgpr_count = 0
+; CHECK-MESA3D-NEXT:     priority = 0
+; CHECK-MESA3D-NEXT:     float_mode = 240
+; CHECK-MESA3D-NEXT:     priv = 0
+; CHECK-MESA3D-NEXT:     enable_dx10_clamp = 0
+; CHECK-MESA3D-NEXT:     debug_mode = 0
+; CHECK-MESA3D-NEXT:     enable_ieee_mode = 0
+; CHECK-MESA3D-NEXT:     enable_wgp_mode = 0
+; CHECK-MESA3D-NEXT:     enable_mem_ordered = 1
+; CHECK-MESA3D-NEXT:     enable_fwd_progress = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
+; CHECK-MESA3D-NEXT:     user_sgpr_count = 8
+; CHECK-MESA3D-NEXT:     enable_trap_handler = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_id_x = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_id_y = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_id_z = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_info = 0
+; CHECK-MESA3D-NEXT:     enable_vgpr_workitem_id = 2
+; CHECK-MESA3D-NEXT:     enable_exception_msb = 0
+; CHECK-MESA3D-NEXT:     granulated_lds_size = 0
+; CHECK-MESA3D-NEXT:     enable_exception = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_private_segment_buffer = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_dispatch_ptr = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_queue_ptr = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_dispatch_id = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_flat_scratch_init = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_private_segment_size = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; CHECK-MESA3D-NEXT:     enable_wavefront_size32 = 1
+; CHECK-MESA3D-NEXT:     enable_ordered_append_gds = 0
+; CHECK-MESA3D-NEXT:     private_element_size = 1
+; CHECK-MESA3D-NEXT:     is_ptr64 = 1
+; CHECK-MESA3D-NEXT:     is_dynamic_callstack = 0
+; CHECK-MESA3D-NEXT:     is_debug_enabled = 0
+; CHECK-MESA3D-NEXT:     is_xnack_enabled = 0
+; CHECK-MESA3D-NEXT:     workitem_private_segment_byte_size = 0
+; CHECK-MESA3D-NEXT:     workgroup_group_segment_byte_size = 0
+; CHECK-MESA3D-NEXT:     gds_segment_byte_size = 0
+; CHECK-MESA3D-NEXT:     kernarg_segment_byte_size = 24
+; CHECK-MESA3D-NEXT:     workgroup_fbarrier_count = 0
+; CHECK-MESA3D-NEXT:     wavefront_sgpr_count = 6
+; CHECK-MESA3D-NEXT:     workitem_vgpr_count = 2
+; CHECK-MESA3D-NEXT:     reserved_vgpr_first = 0
+; CHECK-MESA3D-NEXT:     reserved_vgpr_count = 0
+; CHECK-MESA3D-NEXT:     reserved_sgpr_first = 0
+; CHECK-MESA3D-NEXT:     reserved_sgpr_count = 0
+; CHECK-MESA3D-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; CHECK-MESA3D-NEXT:     debug_private_segment_buffer_sgpr = 0
+; CHECK-MESA3D-NEXT:     kernarg_segment_alignment = 4
+; CHECK-MESA3D-NEXT:     group_segment_alignment = 4
+; CHECK-MESA3D-NEXT:     private_segment_alignment = 4
+; CHECK-MESA3D-NEXT:     wavefront_size = 5
+; CHECK-MESA3D-NEXT:     call_convention = -1
+; CHECK-MESA3D-NEXT:     runtime_loader_kernel_symbol = 0
+; CHECK-MESA3D-NEXT:    .end_amd_kernel_code_t
+; CHECK-MESA3D-NEXT:  ; %bb.0:
+; CHECK-MESA3D-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-MESA3D-NEXT:    s_bfe_u32 s2, ttmp6, 0x40014
+; CHECK-MESA3D-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-MESA3D-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; CHECK-MESA3D-NEXT:    s_wait_kmcnt 0x0
+; CHECK-MESA3D-NEXT:    global_store_b32 v0, v1, s[0:1]
+; CHECK-MESA3D-NEXT:    s_endpgm
+;
+; CHECK-G-UNKNOWN-LABEL: test_workgroup_max_id_z:
+; CHECK-G-UNKNOWN:       ; %bb.0:
+; CHECK-G-UNKNOWN-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; CHECK-G-UNKNOWN-NEXT:    s_bfe_u32 s2, ttmp6, 0x40014
+; CHECK-G-UNKNOWN-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-G-UNKNOWN-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
+; CHECK-G-UNKNOWN-NEXT:    s_wait_kmcnt 0x0
+; CHECK-G-UNKNOWN-NEXT:    global_store_b32 v1, v0, s[0:1]
+; CHECK-G-UNKNOWN-NEXT:    s_endpgm
+;
+; CHECK-G-MESA3D-LABEL: test_workgroup_max_id_z:
+; CHECK-G-MESA3D:         .amd_kernel_code_t
+; CHECK-G-MESA3D-NEXT:     amd_code_version_major = 1
+; CHECK-G-MESA3D-NEXT:     amd_code_version_minor = 2
+; CHECK-G-MESA3D-NEXT:     amd_machine_kind = 1
+; CHECK-G-MESA3D-NEXT:     amd_machine_version_major = 12
+; CHECK-G-MESA3D-NEXT:     amd_machine_version_minor = 5
+; CHECK-G-MESA3D-NEXT:     amd_machine_version_stepping = 0
+; CHECK-G-MESA3D-NEXT:     kernel_code_entry_byte_offset = 256
+; CHECK-G-MESA3D-NEXT:     kernel_code_prefetch_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     granulated_workitem_vgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     granulated_wavefront_sgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     priority = 0
+; CHECK-G-MESA3D-NEXT:     float_mode = 240
+; CHECK-G-MESA3D-NEXT:     priv = 0
+; CHECK-G-MESA3D-NEXT:     enable_dx10_clamp = 0
+; CHECK-G-MESA3D-NEXT:     debug_mode = 0
+; CHECK-G-MESA3D-NEXT:     enable_ieee_mode = 0
+; CHECK-G-MESA3D-NEXT:     enable_wgp_mode = 0
+; CHECK-G-MESA3D-NEXT:     enable_mem_ordered = 1
+; CHECK-G-MESA3D-NEXT:     enable_fwd_progress = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
+; CHECK-G-MESA3D-NEXT:     user_sgpr_count = 8
+; CHECK-G-MESA3D-NEXT:     enable_trap_handler = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_id_x = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_id_y = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_id_z = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_info = 0
+; CHECK-G-MESA3D-NEXT:     enable_vgpr_workitem_id = 2
+; CHECK-G-MESA3D-NEXT:     enable_exception_msb = 0
+; CHECK-G-MESA3D-NEXT:     granulated_lds_size = 0
+; CHECK-G-MESA3D-NEXT:     enable_exception = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_private_segment_buffer = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_dispatch_ptr = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_queue_ptr = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_dispatch_id = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_flat_scratch_init = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_private_segment_size = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; CHECK-G-MESA3D-NEXT:     enable_wavefront_size32 = 1
+; CHECK-G-MESA3D-NEXT:     enable_ordered_append_gds = 0
+; CHECK-G-MESA3D-NEXT:     private_element_size = 1
+; CHECK-G-MESA3D-NEXT:     is_ptr64 = 1
+; CHECK-G-MESA3D-NEXT:     is_dynamic_callstack = 0
+; CHECK-G-MESA3D-NEXT:     is_debug_enabled = 0
+; CHECK-G-MESA3D-NEXT:     is_xnack_enabled = 0
+; CHECK-G-MESA3D-NEXT:     workitem_private_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     workgroup_group_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     gds_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     kernarg_segment_byte_size = 24
+; CHECK-G-MESA3D-NEXT:     workgroup_fbarrier_count = 0
+; CHECK-G-MESA3D-NEXT:     wavefront_sgpr_count = 6
+; CHECK-G-MESA3D-NEXT:     workitem_vgpr_count = 2
+; CHECK-G-MESA3D-NEXT:     reserved_vgpr_first = 0
+; CHECK-G-MESA3D-NEXT:     reserved_vgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     reserved_sgpr_first = 0
+; CHECK-G-MESA3D-NEXT:     reserved_sgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; CHECK-G-MESA3D-NEXT:     debug_private_segment_buffer_sgpr = 0
+; CHECK-G-MESA3D-NEXT:     kernarg_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT:     group_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT:     private_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT:     wavefront_size = 5
+; CHECK-G-MESA3D-NEXT:     call_convention = -1
+; CHECK-G-MESA3D-NEXT:     runtime_loader_kernel_symbol = 0
+; CHECK-G-MESA3D-NEXT:    .end_amd_kernel_code_t
+; CHECK-G-MESA3D-NEXT:  ; %bb.0:
+; CHECK-G-MESA3D-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-G-MESA3D-NEXT:    s_bfe_u32 s2, ttmp6, 0x40014
+; CHECK-G-MESA3D-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-G-MESA3D-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
+; CHECK-G-MESA3D-NEXT:    s_wait_kmcnt 0x0
+; CHECK-G-MESA3D-NEXT:    global_store_b32 v1, v0, s[0:1]
+; CHECK-G-MESA3D-NEXT:    s_endpgm
+  %id = call i32 @llvm.amdgcn.cluster.workgroup.max.id.z()
+  store i32 %id, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @test_workgroup_max_id_z_optimized(ptr addrspace(1) %out) "amdgpu-cluster-dims"="5,6,7" {
+; CHECK-UNKNOWN-LABEL: test_workgroup_max_id_z_optimized:
+; CHECK-UNKNOWN:       ; %bb.0:
+; CHECK-UNKNOWN-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; CHECK-UNKNOWN-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 6
+; CHECK-UNKNOWN-NEXT:    s_wait_kmcnt 0x0
+; CHECK-UNKNOWN-NEXT:    global_store_b32 v0, v1, s[0:1]
+; CHECK-UNKNOWN-NEXT:    s_endpgm
+;
+; CHECK-MESA3D-LABEL: test_workgroup_max_id_z_optimized:
+; CHECK-MESA3D:         .amd_kernel_code_t
+; CHECK-MESA3D-NEXT:     amd_code_version_major = 1
+; CHECK-MESA3D-NEXT:     amd_code_version_minor = 2
+; CHECK-MESA3D-NEXT:     amd_machine_kind = 1
+; CHECK-MESA3D-NEXT:     amd_machine_version_major = 12
+; CHECK-MESA3D-NEXT:     amd_machine_version_minor = 5
+; CHECK-MESA3D-NEXT:     amd_machine_version_stepping = 0
+; CHECK-MESA3D-NEXT:     kernel_code_entry_byte_offset = 256
+; CHECK-MESA3D-NEXT:     kernel_code_prefetch_byte_size = 0
+; CHECK-MESA3D-NEXT:     granulated_workitem_vgpr_count = 0
+; CHECK-MESA3D-NEXT:     granulated_wavefront_sgpr_count = 0
+; CHECK-MESA3D-NEXT:     priority = 0
+; CHECK-MESA3D-NEXT:     float_mode = 240
+; CHECK-MESA3D-NEXT:     priv = 0
+; CHECK-MESA3D-NEXT:     enable_dx10_clamp = 0
+; CHECK-MESA3D-NEXT:     debug_mode = 0
+; CHECK-MESA3D-NEXT:     enable_ieee_mode = 0
+; CHECK-MESA3D-NEXT:     enable_wgp_mode = 0
+; CHECK-MESA3D-NEXT:     enable_mem_ordered = 1
+; CHECK-MESA3D-NEXT:     enable_fwd_progress = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
+; CHECK-MESA3D-NEXT:     user_sgpr_count = 8
+; CHECK-MESA3D-NEXT:     enable_trap_handler = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_id_x = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_id_y = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_id_z = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_info = 0
+; CHECK-MESA3D-NEXT:     enable_vgpr_workitem_id = 2
+; CHECK-MESA3D-NEXT:     enable_exception_msb = 0
+; CHECK-MESA3D-NEXT:     granulated_lds_size = 0
+; CHECK-MESA3D-NEXT:     enable_exception = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_private_segment_buffer = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_dispatch_ptr = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_queue_ptr = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_dispatch_id = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_flat_scratch_init = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_private_segment_size = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; CHECK-MESA3D-NEXT:     enable_wavefront_size32 = 1
+; CHECK-MESA3D-NEXT:     enable_ordered_append_gds = 0
+; CHECK-MESA3D-NEXT:     private_element_size = 1
+; CHECK-MESA3D-NEXT:     is_ptr64 = 1
+; CHECK-MESA3D-NEXT:     is_dynamic_callstack = 0
+; CHECK-MESA3D-NEXT:     is_debug_enabled = 0
+; CHECK-MESA3D-NEXT:     is_xnack_enabled = 0
+; CHECK-MESA3D-NEXT:     workitem_private_segment_byte_size = 0
+; CHECK-MESA3D-NEXT:     workgroup_group_segment_byte_size = 0
+; CHECK-MESA3D-NEXT:     gds_segment_byte_size = 0
+; CHECK-MESA3D-NEXT:     kernarg_segment_byte_size = 24
+; CHECK-MESA3D-NEXT:     workgroup_fbarrier_count = 0
+; CHECK-MESA3D-NEXT:     wavefront_sgpr_count = 6
+; CHECK-MESA3D-NEXT:     workitem_vgpr_count = 2
+; CHECK-MESA3D-NEXT:     reserved_vgpr_first = 0
+; CHECK-MESA3D-NEXT:     reserved_vgpr_count = 0
+; CHECK-MESA3D-NEXT:     reserved_sgpr_first = 0
+; CHECK-MESA3D-NEXT:     reserved_sgpr_count = 0
+; CHECK-MESA3D-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; CHECK-MESA3D-NEXT:     debug_private_segment_buffer_sgpr = 0
+; CHECK-MESA3D-NEXT:     kernarg_segment_alignment = 4
+; CHECK-MESA3D-NEXT:     group_segment_alignment = 4
+; CHECK-MESA3D-NEXT:     private_segment_alignment = 4
+; CHECK-MESA3D-NEXT:     wavefront_size = 5
+; CHECK-MESA3D-NEXT:     call_convention = -1
+; CHECK-MESA3D-NEXT:     runtime_loader_kernel_symbol = 0
+; CHECK-MESA3D-NEXT:    .end_amd_kernel_code_t
+; CHECK-MESA3D-NEXT:  ; %bb.0:
+; CHECK-MESA3D-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-MESA3D-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 6
+; CHECK-MESA3D-NEXT:    s_wait_kmcnt 0x0
+; CHECK-MESA3D-NEXT:    global_store_b32 v0, v1, s[0:1]
+; CHECK-MESA3D-NEXT:    s_endpgm
+;
+; CHECK-G-UNKNOWN-LABEL: test_workgroup_max_id_z_optimized:
+; CHECK-G-UNKNOWN:       ; %bb.0:
+; CHECK-G-UNKNOWN-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; CHECK-G-UNKNOWN-NEXT:    v_dual_mov_b32 v0, 6 :: v_dual_mov_b32 v1, 0
+; CHECK-G-UNKNOWN-NEXT:    s_wait_kmcnt 0x0
+; CHECK-G-UNKNOWN-NEXT:    global_store_b32 v1, v0, s[0:1]
+; CHECK-G-UNKNOWN-NEXT:    s_endpgm
+;
+; CHECK-G-MESA3D-LABEL: test_workgroup_max_id_z_optimized:
+; CHECK-G-MESA3D:         .amd_kernel_code_t
+; CHECK-G-MESA3D-NEXT:     amd_code_version_major = 1
+; CHECK-G-MESA3D-NEXT:     amd_code_version_minor = 2
+; CHECK-G-MESA3D-NEXT:     amd_machine_kind = 1
+; CHECK-G-MESA3D-NEXT:     amd_machine_version_major = 12
+; CHECK-G-MESA3D-NEXT:     amd_machine_version_minor = 5
+; CHECK-G-MESA3D-NEXT:     amd_machine_version_stepping = 0
+; CHECK-G-MESA3D-NEXT:     kernel_code_entry_byte_offset = 256
+; CHECK-G-MESA3D-NEXT:     kernel_code_prefetch_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     granulated_workitem_vgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     granulated_wavefront_sgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     priority = 0
+; CHECK-G-MESA3D-NEXT:     float_mode = 240
+; CHECK-G-MESA3D-NEXT:     priv = 0
+; CHECK-G-MESA3D-NEXT:     enable_dx10_clamp = 0
+; CHECK-G-MESA3D-NEXT:     debug_mode = 0
+; CHECK-G-MESA3D-NEXT:     enable_ieee_mode = 0
+; CHECK-G-MESA3D-NEXT:     enable_wgp_mode = 0
+; CHECK-G-MESA3D-NEXT:     enable_mem_ordered = 1
+; CHECK-G-MESA3D-NEXT:     enable_fwd_progress = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
+; CHECK-G-MESA3D-NEXT:     user_sgpr_count = 8
+; CHECK-G-MESA3D-NEXT:     enable_trap_handler = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_id_x = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_id_y = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_id_z = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_info = 0
+; CHECK-G-MESA3D-NEXT:     enable_vgpr_workitem_id = 2
+; CHECK-G-MESA3D-NEXT:     enable_exception_msb = 0
+; CHECK-G-MESA3D-NEXT:     granulated_lds_size = 0
+; CHECK-G-MESA3D-NEXT:     enable_exception = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_private_segment_buffer = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_dispatch_ptr = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_queue_ptr = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_dispatch_id = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_flat_scratch_init = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_private_segment_size = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; CHECK-G-MESA3D-NEXT:     enable_wavefront_size32 = 1
+; CHECK-G-MESA3D-NEXT:     enable_ordered_append_gds = 0
+; CHECK-G-MESA3D-NEXT:     private_element_size = 1
+; CHECK-G-MESA3D-NEXT:     is_ptr64 = 1
+; CHECK-G-MESA3D-NEXT:     is_dynamic_callstack = 0
+; CHECK-G-MESA3D-NEXT:     is_debug_enabled = 0
+; CHECK-G-MESA3D-NEXT:     is_xnack_enabled = 0
+; CHECK-G-MESA3D-NEXT:     workitem_private_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     workgroup_group_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     gds_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     kernarg_segment_byte_size = 24
+; CHECK-G-MESA3D-NEXT:     workgroup_fbarrier_count = 0
+; CHECK-G-MESA3D-NEXT:     wavefront_sgpr_count = 6
+; CHECK-G-MESA3D-NEXT:     workitem_vgpr_count = 2
+; CHECK-G-MESA3D-NEXT:     reserved_vgpr_first = 0
+; CHECK-G-MESA3D-NEXT:     reserved_vgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     reserved_sgpr_first = 0
+; CHECK-G-MESA3D-NEXT:     reserved_sgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; CHECK-G-MESA3D-NEXT:     debug_private_segment_buffer_sgpr = 0
+; CHECK-G-MESA3D-NEXT:     kernarg_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT:     group_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT:     private_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT:     wavefront_size = 5
+; CHECK-G-MESA3D-NEXT:     call_convention = -1
+; CHECK-G-MESA3D-NEXT:     runtime_loader_kernel_symbol = 0
+; CHECK-G-MESA3D-NEXT:    .end_amd_kernel_code_t
+; CHECK-G-MESA3D-NEXT:  ; %bb.0:
+; CHECK-G-MESA3D-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-G-MESA3D-NEXT:    v_dual_mov_b32 v0, 6 :: v_dual_mov_b32 v1, 0
+; CHECK-G-MESA3D-NEXT:    s_wait_kmcnt 0x0
+; CHECK-G-MESA3D-NEXT:    global_store_b32 v1, v0, s[0:1]
+; CHECK-G-MESA3D-NEXT:    s_endpgm
+  %id = call i32 @llvm.amdgcn.cluster.workgroup.max.id.z()
+  store i32 %id, ptr addrspace(1) %out
+  ret void
+}
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.read.tr.gfx950.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.read.tr.gfx950.ll
index f504f2c..3e96dfe 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.read.tr.gfx950.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.read.tr.gfx950.ll
@@ -158,3 +158,69 @@ entry:
   store <4 x bfloat> %val, ptr addrspace(1) %use
   ret void
 }
+
+; This is a special case that does not require aligned VGPRs. Make
+; sure no copies are required for the unaligned ABI return value.
+define { i32, <3 x i32> } @ds_read_b96_tr_b6_no_align2_requirement(ptr addrspace(3) %ptr) {
+; GFX950-SDAG-LABEL: ds_read_b96_tr_b6_no_align2_requirement:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    ds_read_b96_tr_b6 v[2:4], v0 offset:32
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX950-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v1, v2
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v2, v3
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v3, v4
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: ds_read_b96_tr_b6_no_align2_requirement:
+; GFX950-GISEL:       ; %bb.0:
+; GFX950-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT:    ds_read_b96_tr_b6 v[2:4], v0 offset:32
+; GFX950-GISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX950-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX950-GISEL-NEXT:    v_mov_b32_e32 v1, v2
+; GFX950-GISEL-NEXT:    v_mov_b32_e32 v2, v3
+; GFX950-GISEL-NEXT:    v_mov_b32_e32 v3, v4
+; GFX950-GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4
+  %val = call <3 x i32> @llvm.amdgcn.ds.read.tr6.b96.v3i32.p3(ptr addrspace(3) %gep)
+  %insert0 = insertvalue { i32, <3 x i32> } poison, i32 0, 0
+  %insert1 = insertvalue { i32, <3 x i32> } %insert0, <3 x i32> %val, 1
+  ret { i32, <3 x i32> } %insert1
+}
+
+define void @ds_read_b96_tr_b6_no_align2_requirement_agpr(ptr addrspace(3) %ptr) {
+; GFX950-SDAG-LABEL: ds_read_b96_tr_b6_no_align2_requirement_agpr:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    ds_read_b96_tr_b6 v[0:2], v0 offset:32
+; GFX950-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX950-SDAG-NEXT:    v_accvgpr_write_b32 a1, v0
+; GFX950-SDAG-NEXT:    v_accvgpr_write_b32 a2, v1
+; GFX950-SDAG-NEXT:    v_accvgpr_write_b32 a3, v2
+; GFX950-SDAG-NEXT:    ;;#ASMSTART
+; GFX950-SDAG-NEXT:    ; use a1 a2 a3
+; GFX950-SDAG-NEXT:    ;;#ASMEND
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: ds_read_b96_tr_b6_no_align2_requirement_agpr:
+; GFX950-GISEL:       ; %bb.0:
+; GFX950-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT:    ds_read_b96_tr_b6 v[0:2], v0 offset:32
+; GFX950-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX950-GISEL-NEXT:    v_accvgpr_write_b32 a1, v0
+; GFX950-GISEL-NEXT:    v_accvgpr_write_b32 a2, v1
+; GFX950-GISEL-NEXT:    v_accvgpr_write_b32 a3, v2
+; GFX950-GISEL-NEXT:    ;;#ASMSTART
+; GFX950-GISEL-NEXT:    ; use a1 a2 a3
+; GFX950-GISEL-NEXT:    ;;#ASMEND
+; GFX950-GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4
+  %val = call <3 x i32> @llvm.amdgcn.ds.read.tr6.b96.v3i32.p3(ptr addrspace(3) %gep)
+  %val0 = extractelement <3 x i32> %val, i32 0
+  %val1 = extractelement <3 x i32> %val, i32 1
+  %val2 = extractelement <3 x i32> %val, i32 2
+  call void asm sideeffect "; use $0 $1 $2", "{a1},{a2},{a3}"(i32 %val0, i32 %val1, i32 %val2)
+  ret void
+}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.gfx90a.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.gfx90a.ll
new file mode 100644
index 0000000..49607e3
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.gfx90a.ll
@@ -0,0 +1,170 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn -mcpu=gfx90a < %s | FileCheck -check-prefix=GFX90A %s
+
+define amdgpu_ps void @atomic_swap_1d_agpr(<8 x i32> inreg %rsrc, i32 %s) {
+; GFX90A-LABEL: atomic_swap_1d_agpr:
+; GFX90A:       ; %bb.0:
+; GFX90A-NEXT:    ;;#ASMSTART
+; GFX90A-NEXT:    ; def a0
+; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    image_atomic_swap a0, v0, s[0:7] dmask:0x1 unorm glc
+; GFX90A-NEXT:    s_endpgm
+  %data = call i32 asm "; def $0", "=a"()
+  %v = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+  call void asm "; use $0", "a"(i32 %v)
+  ret void
+}
+
+define amdgpu_ps void @atomic_add_2d_agpr(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
+; GFX90A-LABEL: atomic_add_2d_agpr:
+; GFX90A:       ; %bb.0:
+; GFX90A-NEXT:    ;;#ASMSTART
+; GFX90A-NEXT:    ; def a0
+; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    image_atomic_add a0, v[0:1], s[0:7] dmask:0x1 unorm glc
+; GFX90A-NEXT:    s_endpgm
+  %data = call i32 asm "; def $0", "=a"()
+  %v = call i32 @llvm.amdgcn.image.atomic.add.2d.i32.i32(i32 %data, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
+  call void asm "; use $0", "a"(i32 %v)
+  ret void
+}
+
+; FIXME: This should directly use the AGPRs
+define amdgpu_ps void @atomic_cmpswap_1d_agpr(<8 x i32> inreg %rsrc, i32 %s) {
+; GFX90A-LABEL: atomic_cmpswap_1d_agpr:
+; GFX90A:       ; %bb.0:
+; GFX90A-NEXT:    ;;#ASMSTART
+; GFX90A-NEXT:    ; def a0
+; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    ;;#ASMSTART
+; GFX90A-NEXT:    ; def a1
+; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    v_accvgpr_read_b32 v2, a0
+; GFX90A-NEXT:    v_accvgpr_read_b32 v3, a1
+; GFX90A-NEXT:    image_atomic_cmpswap v[2:3], v0, s[0:7] dmask:0x3 unorm glc
+; GFX90A-NEXT:    s_endpgm
+  %cmp = call i32 asm "; def $0", "=a"()
+  %swap = call i32 asm "; def $0", "=a"()
+  %v = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32 %cmp, i32 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+  call void asm "; use $0", "a"(i32 %v)
+  ret void
+}
+
+define amdgpu_ps void @atomic_swap_1d_i64_agpr(<8 x i32> inreg %rsrc, i32 %s) {
+; GFX90A-LABEL: atomic_swap_1d_i64_agpr:
+; GFX90A:       ; %bb.0:
+; GFX90A-NEXT:    ;;#ASMSTART
+; GFX90A-NEXT:    ; def a[0:1]
+; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    image_atomic_swap a[0:1], v0, s[0:7] dmask:0x3 unorm glc
+; GFX90A-NEXT:    s_endpgm
+  %data = call i64 asm "; def $0", "=a"()
+  %v = call i64 @llvm.amdgcn.image.atomic.swap.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+  call void asm "; use $0", "a"(i64 %v)
+  ret void
+}
+
+define amdgpu_ps void @atomic_cmpswap_1d_64_agpr(<8 x i32> inreg %rsrc, i32 %s) {
+; GFX90A-LABEL: atomic_cmpswap_1d_64_agpr:
+; GFX90A:       ; %bb.0:
+; GFX90A-NEXT:    ;;#ASMSTART
+; GFX90A-NEXT:    ; def a[0:1]
+; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    v_accvgpr_read_b32 v3, a1
+; GFX90A-NEXT:    v_accvgpr_read_b32 v2, a0
+; GFX90A-NEXT:    ;;#ASMSTART
+; GFX90A-NEXT:    ; def a[0:1]
+; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    v_accvgpr_read_b32 v5, a1
+; GFX90A-NEXT:    v_accvgpr_read_b32 v4, a0
+; GFX90A-NEXT:    image_atomic_cmpswap v[2:5], v0, s[0:7] dmask:0xf unorm glc
+; GFX90A-NEXT:    s_endpgm
+  %cmp = call i64 asm "; def $0", "=a"()
+  %swap = call i64 asm "; def $0", "=a"()
+  %v = call i64 @llvm.amdgcn.image.atomic.cmpswap.1d.i64.i32(i64 %cmp, i64 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+  call void asm "; use $0", "a"(i64 %v)
+  ret void
+}
+
+define amdgpu_ps void @atomic_swap_1d_agpr_noret(<8 x i32> inreg %rsrc, i32 %s) {
+; GFX90A-LABEL: atomic_swap_1d_agpr_noret:
+; GFX90A:       ; %bb.0:
+; GFX90A-NEXT:    ;;#ASMSTART
+; GFX90A-NEXT:    ; def a0
+; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    v_accvgpr_read_b32 v1, a0
+; GFX90A-NEXT:    image_atomic_swap v1, v0, s[0:7] dmask:0x1 unorm glc
+; GFX90A-NEXT:    s_endpgm
+  %data = call i32 asm "; def $0", "=a"()
+  %unused = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+  ret void
+}
+
+define amdgpu_ps void @atomic_add_2d_agpr_noret(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
+; GFX90A-LABEL: atomic_add_2d_agpr_noret:
+; GFX90A:       ; %bb.0:
+; GFX90A-NEXT:    ;;#ASMSTART
+; GFX90A-NEXT:    ; def a0
+; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    v_accvgpr_read_b32 v2, a0
+; GFX90A-NEXT:    image_atomic_add v2, v[0:1], s[0:7] dmask:0x1 unorm glc
+; GFX90A-NEXT:    s_endpgm
+  %data = call i32 asm "; def $0", "=a"()
+  %unused = call i32 @llvm.amdgcn.image.atomic.add.2d.i32.i32(i32 %data, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
+  ret void
+}
+
+define amdgpu_ps void @atomic_cmpswap_1d_agpr_noret(<8 x i32> inreg %rsrc, i32 %s) {
+; GFX90A-LABEL: atomic_cmpswap_1d_agpr_noret:
+; GFX90A:       ; %bb.0:
+; GFX90A-NEXT:    ;;#ASMSTART
+; GFX90A-NEXT:    ; def a0
+; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    ;;#ASMSTART
+; GFX90A-NEXT:    ; def a1
+; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    v_accvgpr_read_b32 v2, a0
+; GFX90A-NEXT:    v_accvgpr_read_b32 v3, a1
+; GFX90A-NEXT:    image_atomic_cmpswap v[2:3], v0, s[0:7] dmask:0x3 unorm glc
+; GFX90A-NEXT:    s_endpgm
+  %cmp = call i32 asm "; def $0", "=a"()
+  %swap = call i32 asm "; def $0", "=a"()
+  %unused = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32 %cmp, i32 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+  ret void
+}
+
+define amdgpu_ps void @atomic_swap_1d_i64_agpr_noret(<8 x i32> inreg %rsrc, i32 %s) {
+; GFX90A-LABEL: atomic_swap_1d_i64_agpr_noret:
+; GFX90A:       ; %bb.0:
+; GFX90A-NEXT:    ;;#ASMSTART
+; GFX90A-NEXT:    ; def a[0:1]
+; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    v_accvgpr_read_b32 v3, a1
+; GFX90A-NEXT:    v_accvgpr_read_b32 v2, a0
+; GFX90A-NEXT:    image_atomic_swap v[2:3], v0, s[0:7] dmask:0x3 unorm glc
+; GFX90A-NEXT:    s_endpgm
+  %data = call i64 asm "; def $0", "=a"()
+  %unused = call i64 @llvm.amdgcn.image.atomic.swap.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+  ret void
+}
+
+define amdgpu_ps void @atomic_cmpswap_1d_64_agpr_noret(<8 x i32> inreg %rsrc, i32 %s) {
+; GFX90A-LABEL: atomic_cmpswap_1d_64_agpr_noret:
+; GFX90A:       ; %bb.0:
+; GFX90A-NEXT:    ;;#ASMSTART
+; GFX90A-NEXT:    ; def a[0:1]
+; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    v_accvgpr_read_b32 v3, a1
+; GFX90A-NEXT:    v_accvgpr_read_b32 v2, a0
+; GFX90A-NEXT:    ;;#ASMSTART
+; GFX90A-NEXT:    ; def a[0:1]
+; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    v_accvgpr_read_b32 v5, a1
+; GFX90A-NEXT:    v_accvgpr_read_b32 v4, a0
+; GFX90A-NEXT:    image_atomic_cmpswap v[2:5], v0, s[0:7] dmask:0xf unorm glc
+; GFX90A-NEXT:    s_endpgm
+  %cmp = call i64 asm "; def $0", "=a"()
+  %swap = call i64 asm "; def $0", "=a"()
+  %unused = call i64 @llvm.amdgcn.image.atomic.cmpswap.1d.i64.i32(i64 %cmp, i64 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+  ret void
+}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.gfx90a.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.gfx90a.ll
index dcac419..bb4a607f 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.gfx90a.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.gfx90a.ll
@@ -418,6 +418,114 @@ main_body:
   ret <4 x float> %v
 }
 
+define amdgpu_ps void @load_1d_agpr(<8 x i32> inreg %rsrc, i32 %s) {
+; GCN-LABEL: load_1d_agpr:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    image_load a[0:3], v0, s[0:7] dmask:0xf unorm
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ; use a[0:3]
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    s_endpgm
+  %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+  call void asm sideeffect "; use $0", "a"(<4 x float> %v)
+  ret  void
+}
+
+define amdgpu_ps void @load_2d_agpr(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
+; GCN-LABEL: load_2d_agpr:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    image_load a[0:3], v[0:1], s[0:7] dmask:0xf unorm
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ; use a[0:3]
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    s_endpgm
+  %v = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
+  call void asm sideeffect "; use $0", "a"(<4 x float> %v)
+  ret  void
+}
+
+define amdgpu_ps void @load_3d_agpr(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %r) {
+; GCN-LABEL: load_3d_agpr:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    image_load a[0:3], v[0:2], s[0:7] dmask:0xf unorm
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ; use a[0:3]
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    s_endpgm
+  %v = call <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 0, i32 0)
+  call void asm sideeffect "; use $0", "a"(<4 x float> %v)
+  ret  void
+}
+
+define amdgpu_ps void @load_cube_agpr(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice) {
+; GCN-LABEL: load_cube_agpr:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    image_load a[0:3], v[0:2], s[0:7] dmask:0xf unorm da
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ; use a[0:3]
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    s_endpgm
+  %v = call <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
+  call void asm sideeffect "; use $0", "a"(<4 x float> %v)
+  ret  void
+}
+
+define amdgpu_ps void @store_1d_agpr(<8 x i32> inreg %rsrc, i32 %s) {
+; GCN-LABEL: store_1d_agpr:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ; def a[0:3]
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    image_store a[0:3], v0, s[0:7] dmask:0xf unorm
+; GCN-NEXT:    s_endpgm
+  %vdata = call <4 x float> asm "; def $0", "=a"()
+  call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+  ret void
+}
+
+define amdgpu_ps void @store_2d_agpr(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
+; GCN-LABEL: store_2d_agpr:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ; def a[0:3]
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    image_store a[0:3], v[0:1], s[0:7] dmask:0xf unorm
+; GCN-NEXT:    s_endpgm
+  %vdata = call <4 x float> asm "; def $0", "=a"()
+  call void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
+  ret void
+}
+
+define amdgpu_ps void @store_3d_agpr(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %r) {
+; GCN-LABEL: store_3d_agpr:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ; def a[0:3]
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    image_store a[0:3], v[0:2], s[0:7] dmask:0xf unorm
+; GCN-NEXT:    s_endpgm
+  %vdata = call <4 x float> asm "; def $0", "=a"()
+  call void @llvm.amdgcn.image.store.3d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 0, i32 0)
+  ret void
+}
+
+define amdgpu_ps void @store_cube_agpr(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice) {
+; GCN-LABEL: store_cube_agpr:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ; def a[0:3]
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    image_store a[0:3], v[0:2], s[0:7] dmask:0xf unorm da
+; GCN-NEXT:    s_endpgm
+  %vdata = call <4 x float> asm "; def $0", "=a"()
+  call void @llvm.amdgcn.image.store.cube.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
+  ret void
+}
+
 declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #1
 declare {float,i32} @llvm.amdgcn.image.load.1d.f32i32.i32(i32, i32, <8 x i32>, i32, i32) #1
 declare {<2 x float>,i32} @llvm.amdgcn.image.load.1d.v2f32i32.i32(i32, i32, <8 x i32>, i32, i32) #1
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.load.tr.gfx1250.w32.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.load.tr.gfx1250.w32.ll
index d91b03c..d9f2fc5 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.load.tr.gfx1250.w32.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.load.tr.gfx1250.w32.ll
@@ -320,3 +320,57 @@ entry:
   store <8 x bfloat> %val, ptr addrspace(1) %use
   ret void
 }
+
+; This is a special case that does not require aligned VGPRs. Make
+; sure no copies are required for the unaligned ABI return value.
+define { i32, <3 x i32> } @global_load_tr6_b96_vaddr_no_align2_requirement(ptr addrspace(1) %addr, ptr addrspace(1) %use) {
+; GFX1250-LABEL: global_load_tr6_b96_vaddr_no_align2_requirement:
+; GFX1250:       ; %bb.0:
+; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    global_load_tr6_b96 v[2:4], v[0:1], off offset:32
+; GFX1250-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, v2
+; GFX1250-NEXT:    v_dual_mov_b32 v2, v3 :: v_dual_mov_b32 v3, v4
+; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
+  %gep = getelementptr i64, ptr addrspace(1) %addr, i32 4
+  %val = call <3 x i32> @llvm.amdgcn.global.load.tr6.b96.v3i32.p1(ptr addrspace(1) %gep)
+  %insert0 = insertvalue { i32, <3 x i32> } poison, i32 0, 0
+  %insert1 = insertvalue { i32, <3 x i32> } %insert0, <3 x i32> %val, 1
+  ret { i32, <3 x i32> } %insert1
+}
+
+define { i32, <3 x i32> } @global_load_tr6_b96_saddr_no_align2_requirement(ptr addrspace(1) inreg %addr, ptr addrspace(1) %use) {
+; GFX1250-LABEL: global_load_tr6_b96_saddr_no_align2_requirement:
+; GFX1250:       ; %bb.0:
+; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1250-NEXT:    global_load_tr6_b96 v[2:4], v0, s[0:1] offset:32
+; GFX1250-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, v2
+; GFX1250-NEXT:    v_dual_mov_b32 v2, v3 :: v_dual_mov_b32 v3, v4
+; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
+  %gep = getelementptr i64, ptr addrspace(1) %addr, i32 4
+  %val = call <3 x i32> @llvm.amdgcn.global.load.tr6.b96.v3i32.p1(ptr addrspace(1) %gep)
+  %insert0 = insertvalue { i32, <3 x i32> } poison, i32 0, 0
+  %insert1 = insertvalue { i32, <3 x i32> } %insert0, <3 x i32> %val, 1
+  ret { i32, <3 x i32> } %insert1
+}
+
+define { i32, <3 x i32> } @ds_load_tr6_b96_no_align2_requirement(ptr addrspace(3) %addr, ptr addrspace(1) %use) {
+; GFX1250-LABEL: ds_load_tr6_b96_no_align2_requirement:
+; GFX1250:       ; %bb.0:
+; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    ds_load_tr6_b96 v[2:4], v0 offset:32
+; GFX1250-NEXT:    s_wait_dscnt 0x0
+; GFX1250-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, v2
+; GFX1250-NEXT:    v_dual_mov_b32 v2, v3 :: v_dual_mov_b32 v3, v4
+; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
+  %gep = getelementptr i64, ptr addrspace(3) %addr, i32 4
+  %val = call <3 x i32> @llvm.amdgcn.ds.load.tr6.b96.v3i32.p3(ptr addrspace(3) %gep)
+  %insert0 = insertvalue { i32, <3 x i32> } poison, i32 0, 0
+  %insert1 = insertvalue { i32, <3 x i32> } %insert0, <3 x i32> %val, 1
+  ret { i32, <3 x i32> } %insert1
+}
diff --git a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-hsa.ll b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-hsa.ll
index 2554d99..169a84f 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-hsa.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-hsa.ll
@@ -297,6 +297,6 @@ declare i32 @llvm.amdgcn.workgroup.id.y()
 declare i32 @llvm.amdgcn.workgroup.id.z()
 declare void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32>, ptr addrspace(8), i32, i32, i32 immarg)
 
-attributes #0 = { nounwind "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" }
+attributes #0 = { nounwind "amdgpu-no-workgroup-id-y" "amdgpu-no-cluster-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-cluster-id-z" }
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; GFX9ARCH: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-opt.ll b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-opt.ll
new file mode 100644
index 0000000..69439d4
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-opt.ll
@@ -0,0 +1,390 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 %s -o - | FileCheck -check-prefix=GFX1250-SDAG %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -global-isel %s -o - | FileCheck -check-prefix=GFX1250-GISEL %s
+
+define void @test_workgroup_id_x_non_kernel(ptr addrspace(1) %out) {
+; GFX1250-SDAG-LABEL: test_workgroup_id_x_non_kernel:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    s_bfe_u32 s0, ttmp6, 0x4000c
+; GFX1250-SDAG-NEXT:    s_and_b32 s1, ttmp6, 15
+; GFX1250-SDAG-NEXT:    s_add_co_i32 s0, s0, 1
+; GFX1250-SDAG-NEXT:    s_getreg_b32 s2, hwreg(HW_REG_IB_STS2, 6, 4)
+; GFX1250-SDAG-NEXT:    s_mul_i32 s0, ttmp9, s0
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
+; GFX1250-SDAG-NEXT:    s_add_co_i32 s1, s1, s0
+; GFX1250-SDAG-NEXT:    s_cmp_eq_u32 s2, 0
+; GFX1250-SDAG-NEXT:    s_cselect_b32 s0, ttmp9, s1
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v2, s0
+; GFX1250-SDAG-NEXT:    global_store_b32 v[0:1], v2, off
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250-GISEL-LABEL: test_workgroup_id_x_non_kernel:
+; GFX1250-GISEL:       ; %bb.0:
+; GFX1250-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-GISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-GISEL-NEXT:    s_bfe_u32 s0, ttmp6, 0x4000c
+; GFX1250-GISEL-NEXT:    s_and_b32 s1, ttmp6, 15
+; GFX1250-GISEL-NEXT:    s_add_co_i32 s0, s0, 1
+; GFX1250-GISEL-NEXT:    s_getreg_b32 s2, hwreg(HW_REG_IB_STS2, 6, 4)
+; GFX1250-GISEL-NEXT:    s_mul_i32 s0, ttmp9, s0
+; GFX1250-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
+; GFX1250-GISEL-NEXT:    s_add_co_i32 s1, s1, s0
+; GFX1250-GISEL-NEXT:    s_cmp_eq_u32 s2, 0
+; GFX1250-GISEL-NEXT:    s_cselect_b32 s0, ttmp9, s1
+; GFX1250-GISEL-NEXT:    v_mov_b32_e32 v2, s0
+; GFX1250-GISEL-NEXT:    global_store_b32 v[0:1], v2, off
+; GFX1250-GISEL-NEXT:    s_set_pc_i64 s[30:31]
+  %id = call i32 @llvm.amdgcn.workgroup.id.x()
+  store i32 %id, ptr addrspace(1) %out
+  ret void
+}
+
+define void @test_workgroup_id_x_non_kernel_optimized_used(ptr addrspace(1) %out) "amdgpu-cluster-dims"="1024,1024,1024" {
+; GFX1250-SDAG-LABEL: test_workgroup_id_x_non_kernel_optimized_used:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    s_bfe_u32 s0, ttmp6, 0x4000c
+; GFX1250-SDAG-NEXT:    s_and_b32 s1, ttmp6, 15
+; GFX1250-SDAG-NEXT:    s_add_co_i32 s0, s0, 1
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-SDAG-NEXT:    s_mul_i32 s0, ttmp9, s0
+; GFX1250-SDAG-NEXT:    s_add_co_i32 s0, s1, s0
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v2, s0
+; GFX1250-SDAG-NEXT:    global_store_b32 v[0:1], v2, off
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250-GISEL-LABEL: test_workgroup_id_x_non_kernel_optimized_used:
+; GFX1250-GISEL:       ; %bb.0:
+; GFX1250-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-GISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-GISEL-NEXT:    s_bfe_u32 s0, ttmp6, 0x4000c
+; GFX1250-GISEL-NEXT:    s_and_b32 s1, ttmp6, 15
+; GFX1250-GISEL-NEXT:    s_add_co_i32 s0, s0, 1
+; GFX1250-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-GISEL-NEXT:    s_mul_i32 s0, ttmp9, s0
+; GFX1250-GISEL-NEXT:    s_add_co_i32 s0, s1, s0
+; GFX1250-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-GISEL-NEXT:    v_mov_b32_e32 v2, s0
+; GFX1250-GISEL-NEXT:    global_store_b32 v[0:1], v2, off
+; GFX1250-GISEL-NEXT:    s_set_pc_i64 s[30:31]
+  %id = call i32 @llvm.amdgcn.workgroup.id.x()
+  store i32 %id, ptr addrspace(1) %out
+  ret void
+}
+
+define void @test_workgroup_id_x_non_kernel_optimized_not_used(ptr addrspace(1) %out) "amdgpu-cluster-dims"="0,0,0" {
+; GFX1250-SDAG-LABEL: test_workgroup_id_x_non_kernel_optimized_not_used:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v2, ttmp9
+; GFX1250-SDAG-NEXT:    global_store_b32 v[0:1], v2, off
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250-GISEL-LABEL: test_workgroup_id_x_non_kernel_optimized_not_used:
+; GFX1250-GISEL:       ; %bb.0:
+; GFX1250-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-GISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-GISEL-NEXT:    v_mov_b32_e32 v2, ttmp9
+; GFX1250-GISEL-NEXT:    global_store_b32 v[0:1], v2, off
+; GFX1250-GISEL-NEXT:    s_set_pc_i64 s[30:31]
+  %id = call i32 @llvm.amdgcn.workgroup.id.x()
+  store i32 %id, ptr addrspace(1) %out
+  ret void
+}
+
+define void @test_workgroup_id_x_non_kernel_optimized_fixed(ptr addrspace(1) %out) "amdgpu-cluster-dims"="2,1,2" {
+; GFX1250-SDAG-LABEL: test_workgroup_id_x_non_kernel_optimized_fixed:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    s_lshl_b32 s0, ttmp9, 1
+; GFX1250-SDAG-NEXT:    s_and_b32 s1, ttmp6, 15
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-SDAG-NEXT:    s_add_co_i32 s0, s1, s0
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v2, s0
+; GFX1250-SDAG-NEXT:    global_store_b32 v[0:1], v2, off
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250-GISEL-LABEL: test_workgroup_id_x_non_kernel_optimized_fixed:
+; GFX1250-GISEL:       ; %bb.0:
+; GFX1250-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-GISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-GISEL-NEXT:    s_and_b32 s0, ttmp6, 15
+; GFX1250-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-GISEL-NEXT:    s_lshl1_add_u32 s0, ttmp9, s0
+; GFX1250-GISEL-NEXT:    v_mov_b32_e32 v2, s0
+; GFX1250-GISEL-NEXT:    global_store_b32 v[0:1], v2, off
+; GFX1250-GISEL-NEXT:    s_set_pc_i64 s[30:31]
+  %id = call i32 @llvm.amdgcn.workgroup.id.x()
+  store i32 %id, ptr addrspace(1) %out
+  ret void
+}
+
+define void @test_workgroup_id_y_non_kernel(ptr addrspace(1) %out) {
+; GFX1250-SDAG-LABEL: test_workgroup_id_y_non_kernel:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    s_bfe_u32 s0, ttmp6, 0x40010
+; GFX1250-SDAG-NEXT:    s_and_b32 s1, ttmp7, 0xffff
+; GFX1250-SDAG-NEXT:    s_add_co_i32 s0, s0, 1
+; GFX1250-SDAG-NEXT:    s_bfe_u32 s2, ttmp6, 0x40004
+; GFX1250-SDAG-NEXT:    s_mul_i32 s0, s1, s0
+; GFX1250-SDAG-NEXT:    s_getreg_b32 s3, hwreg(HW_REG_IB_STS2, 6, 4)
+; GFX1250-SDAG-NEXT:    s_add_co_i32 s2, s2, s0
+; GFX1250-SDAG-NEXT:    s_cmp_eq_u32 s3, 0
+; GFX1250-SDAG-NEXT:    s_cselect_b32 s0, s1, s2
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v2, s0
+; GFX1250-SDAG-NEXT:    global_store_b32 v[0:1], v2, off
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250-GISEL-LABEL: test_workgroup_id_y_non_kernel:
+; GFX1250-GISEL:       ; %bb.0:
+; GFX1250-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-GISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-GISEL-NEXT:    s_bfe_u32 s0, ttmp6, 0x40010
+; GFX1250-GISEL-NEXT:    s_and_b32 s1, ttmp7, 0xffff
+; GFX1250-GISEL-NEXT:    s_add_co_i32 s0, s0, 1
+; GFX1250-GISEL-NEXT:    s_bfe_u32 s2, ttmp6, 0x40004
+; GFX1250-GISEL-NEXT:    s_mul_i32 s0, s1, s0
+; GFX1250-GISEL-NEXT:    s_getreg_b32 s3, hwreg(HW_REG_IB_STS2, 6, 4)
+; GFX1250-GISEL-NEXT:    s_add_co_i32 s2, s2, s0
+; GFX1250-GISEL-NEXT:    s_cmp_eq_u32 s3, 0
+; GFX1250-GISEL-NEXT:    s_cselect_b32 s0, s1, s2
+; GFX1250-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-GISEL-NEXT:    v_mov_b32_e32 v2, s0
+; GFX1250-GISEL-NEXT:    global_store_b32 v[0:1], v2, off
+; GFX1250-GISEL-NEXT:    s_set_pc_i64 s[30:31]
+  %id = call i32 @llvm.amdgcn.workgroup.id.y()
+  store i32 %id, ptr addrspace(1) %out
+  ret void
+}
+
+define void @test_workgroup_id_y_non_kernel_optimized_used(ptr addrspace(1) %out) "amdgpu-cluster-dims"="1024,1024,1024" {
+; GFX1250-SDAG-LABEL: test_workgroup_id_y_non_kernel_optimized_used:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    s_bfe_u32 s0, ttmp6, 0x40010
+; GFX1250-SDAG-NEXT:    s_and_b32 s1, ttmp7, 0xffff
+; GFX1250-SDAG-NEXT:    s_add_co_i32 s0, s0, 1
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
+; GFX1250-SDAG-NEXT:    s_mul_i32 s1, s1, s0
+; GFX1250-SDAG-NEXT:    s_bfe_u32 s0, ttmp6, 0x40004
+; GFX1250-SDAG-NEXT:    s_add_co_i32 s0, s0, s1
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v2, s0
+; GFX1250-SDAG-NEXT:    global_store_b32 v[0:1], v2, off
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250-GISEL-LABEL: test_workgroup_id_y_non_kernel_optimized_used:
+; GFX1250-GISEL:       ; %bb.0:
+; GFX1250-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-GISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-GISEL-NEXT:    s_bfe_u32 s0, ttmp6, 0x40010
+; GFX1250-GISEL-NEXT:    s_and_b32 s1, ttmp7, 0xffff
+; GFX1250-GISEL-NEXT:    s_add_co_i32 s0, s0, 1
+; GFX1250-GISEL-NEXT:    s_bfe_u32 s2, ttmp6, 0x40004
+; GFX1250-GISEL-NEXT:    s_mul_i32 s1, s1, s0
+; GFX1250-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-GISEL-NEXT:    s_add_co_i32 s0, s2, s1
+; GFX1250-GISEL-NEXT:    v_mov_b32_e32 v2, s0
+; GFX1250-GISEL-NEXT:    global_store_b32 v[0:1], v2, off
+; GFX1250-GISEL-NEXT:    s_set_pc_i64 s[30:31]
+  %id = call i32 @llvm.amdgcn.workgroup.id.y()
+  store i32 %id, ptr addrspace(1) %out
+  ret void
+}
+
+define void @test_workgroup_id_y_non_kernel_optimized_not_used(ptr addrspace(1) %out) "amdgpu-cluster-dims"="0,0,0" {
+; GFX1250-SDAG-LABEL: test_workgroup_id_y_non_kernel_optimized_not_used:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    s_and_b32 s0, ttmp7, 0xffff
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v2, s0
+; GFX1250-SDAG-NEXT:    global_store_b32 v[0:1], v2, off
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250-GISEL-LABEL: test_workgroup_id_y_non_kernel_optimized_not_used:
+; GFX1250-GISEL:       ; %bb.0:
+; GFX1250-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-GISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-GISEL-NEXT:    s_and_b32 s0, ttmp7, 0xffff
+; GFX1250-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-GISEL-NEXT:    v_mov_b32_e32 v2, s0
+; GFX1250-GISEL-NEXT:    global_store_b32 v[0:1], v2, off
+; GFX1250-GISEL-NEXT:    s_set_pc_i64 s[30:31]
+  %id = call i32 @llvm.amdgcn.workgroup.id.y()
+  store i32 %id, ptr addrspace(1) %out
+  ret void
+}
+
+define void @test_workgroup_id_y_non_kernel_optimized_fixed(ptr addrspace(1) %out) "amdgpu-cluster-dims"="2,1,2" {
+; GFX1250-SDAG-LABEL: test_workgroup_id_y_non_kernel_optimized_fixed:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    s_and_b32 s0, ttmp7, 0xffff
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v2, s0
+; GFX1250-SDAG-NEXT:    global_store_b32 v[0:1], v2, off
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250-GISEL-LABEL: test_workgroup_id_y_non_kernel_optimized_fixed:
+; GFX1250-GISEL:       ; %bb.0:
+; GFX1250-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-GISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-GISEL-NEXT:    s_and_b32 s0, ttmp7, 0xffff
+; GFX1250-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-GISEL-NEXT:    v_mov_b32_e32 v2, s0
+; GFX1250-GISEL-NEXT:    global_store_b32 v[0:1], v2, off
+; GFX1250-GISEL-NEXT:    s_set_pc_i64 s[30:31]
+  %id = call i32 @llvm.amdgcn.workgroup.id.y()
+  store i32 %id, ptr addrspace(1) %out
+  ret void
+}
+
+define void @test_workgroup_id_z_non_kernel(ptr addrspace(1) %out) {
+; GFX1250-SDAG-LABEL: test_workgroup_id_z_non_kernel:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    s_bfe_u32 s0, ttmp6, 0x40014
+; GFX1250-SDAG-NEXT:    s_lshr_b32 s1, ttmp7, 16
+; GFX1250-SDAG-NEXT:    s_add_co_i32 s0, s0, 1
+; GFX1250-SDAG-NEXT:    s_bfe_u32 s2, ttmp6, 0x40008
+; GFX1250-SDAG-NEXT:    s_mul_i32 s0, s1, s0
+; GFX1250-SDAG-NEXT:    s_getreg_b32 s3, hwreg(HW_REG_IB_STS2, 6, 4)
+; GFX1250-SDAG-NEXT:    s_add_co_i32 s2, s2, s0
+; GFX1250-SDAG-NEXT:    s_cmp_eq_u32 s3, 0
+; GFX1250-SDAG-NEXT:    s_cselect_b32 s0, s1, s2
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v2, s0
+; GFX1250-SDAG-NEXT:    global_store_b32 v[0:1], v2, off
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250-GISEL-LABEL: test_workgroup_id_z_non_kernel:
+; GFX1250-GISEL:       ; %bb.0:
+; GFX1250-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-GISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-GISEL-NEXT:    s_bfe_u32 s0, ttmp6, 0x40014
+; GFX1250-GISEL-NEXT:    s_lshr_b32 s1, ttmp7, 16
+; GFX1250-GISEL-NEXT:    s_add_co_i32 s0, s0, 1
+; GFX1250-GISEL-NEXT:    s_bfe_u32 s2, ttmp6, 0x40008
+; GFX1250-GISEL-NEXT:    s_mul_i32 s0, s1, s0
+; GFX1250-GISEL-NEXT:    s_getreg_b32 s3, hwreg(HW_REG_IB_STS2, 6, 4)
+; GFX1250-GISEL-NEXT:    s_add_co_i32 s2, s2, s0
+; GFX1250-GISEL-NEXT:    s_cmp_eq_u32 s3, 0
+; GFX1250-GISEL-NEXT:    s_cselect_b32 s0, s1, s2
+; GFX1250-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-GISEL-NEXT:    v_mov_b32_e32 v2, s0
+; GFX1250-GISEL-NEXT:    global_store_b32 v[0:1], v2, off
+; GFX1250-GISEL-NEXT:    s_set_pc_i64 s[30:31]
+  %id = call i32 @llvm.amdgcn.workgroup.id.z()
+  store i32 %id, ptr addrspace(1) %out
+  ret void
+}
+
+define void @test_workgroup_id_z_non_kernel_optimized_used(ptr addrspace(1) %out) "amdgpu-cluster-dims"="1024,1024,1024" {
+; GFX1250-SDAG-LABEL: test_workgroup_id_z_non_kernel_optimized_used:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    s_bfe_u32 s0, ttmp6, 0x40014
+; GFX1250-SDAG-NEXT:    s_lshr_b32 s1, ttmp7, 16
+; GFX1250-SDAG-NEXT:    s_add_co_i32 s0, s0, 1
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
+; GFX1250-SDAG-NEXT:    s_mul_i32 s1, s1, s0
+; GFX1250-SDAG-NEXT:    s_bfe_u32 s0, ttmp6, 0x40008
+; GFX1250-SDAG-NEXT:    s_add_co_i32 s0, s0, s1
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v2, s0
+; GFX1250-SDAG-NEXT:    global_store_b32 v[0:1], v2, off
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250-GISEL-LABEL: test_workgroup_id_z_non_kernel_optimized_used:
+; GFX1250-GISEL:       ; %bb.0:
+; GFX1250-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-GISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-GISEL-NEXT:    s_bfe_u32 s0, ttmp6, 0x40014
+; GFX1250-GISEL-NEXT:    s_lshr_b32 s1, ttmp7, 16
+; GFX1250-GISEL-NEXT:    s_add_co_i32 s0, s0, 1
+; GFX1250-GISEL-NEXT:    s_bfe_u32 s2, ttmp6, 0x40008
+; GFX1250-GISEL-NEXT:    s_mul_i32 s1, s1, s0
+; GFX1250-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-GISEL-NEXT:    s_add_co_i32 s0, s2, s1
+; GFX1250-GISEL-NEXT:    v_mov_b32_e32 v2, s0
+; GFX1250-GISEL-NEXT:    global_store_b32 v[0:1], v2, off
+; GFX1250-GISEL-NEXT:    s_set_pc_i64 s[30:31]
+  %id = call i32 @llvm.amdgcn.workgroup.id.z()
+  store i32 %id, ptr addrspace(1) %out
+  ret void
+}
+
+define void @test_workgroup_id_z_non_kernel_optimized_not_used(ptr addrspace(1) %out) "amdgpu-cluster-dims"="0,0,0" {
+; GFX1250-SDAG-LABEL: test_workgroup_id_z_non_kernel_optimized_not_used:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    s_lshr_b32 s0, ttmp7, 16
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v2, s0
+; GFX1250-SDAG-NEXT:    global_store_b32 v[0:1], v2, off
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250-GISEL-LABEL: test_workgroup_id_z_non_kernel_optimized_not_used:
+; GFX1250-GISEL:       ; %bb.0:
+; GFX1250-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-GISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-GISEL-NEXT:    s_lshr_b32 s0, ttmp7, 16
+; GFX1250-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-GISEL-NEXT:    v_mov_b32_e32 v2, s0
+; GFX1250-GISEL-NEXT:    global_store_b32 v[0:1], v2, off
+; GFX1250-GISEL-NEXT:    s_set_pc_i64 s[30:31]
+  %id = call i32 @llvm.amdgcn.workgroup.id.z()
+  store i32 %id, ptr addrspace(1) %out
+  ret void
+}
+
+define void @test_workgroup_id_z_non_kernel_optimized_fixed(ptr addrspace(1) %out) "amdgpu-cluster-dims"="2,1,2" {
+; GFX1250-SDAG-LABEL: test_workgroup_id_z_non_kernel_optimized_fixed:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    s_lshr_b32 s0, ttmp7, 15
+; GFX1250-SDAG-NEXT:    s_bfe_u32 s1, ttmp6, 0x40008
+; GFX1250-SDAG-NEXT:    s_and_b32 s0, s0, 0x1fffe
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-SDAG-NEXT:    s_add_co_i32 s0, s1, s0
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v2, s0
+; GFX1250-SDAG-NEXT:    global_store_b32 v[0:1], v2, off
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250-GISEL-LABEL: test_workgroup_id_z_non_kernel_optimized_fixed:
+; GFX1250-GISEL:       ; %bb.0:
+; GFX1250-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-GISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-GISEL-NEXT:    s_lshr_b32 s0, ttmp7, 16
+; GFX1250-GISEL-NEXT:    s_bfe_u32 s1, ttmp6, 0x40008
+; GFX1250-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-GISEL-NEXT:    s_lshl1_add_u32 s0, s0, s1
+; GFX1250-GISEL-NEXT:    v_mov_b32_e32 v2, s0
+; GFX1250-GISEL-NEXT:    global_store_b32 v[0:1], v2, off
+; GFX1250-GISEL-NEXT:    s_set_pc_i64 s[30:31]
+  %id = call i32 @llvm.amdgcn.workgroup.id.z()
+  store i32 %id, ptr addrspace(1) %out
+  ret void
+}
+
+
+declare i32 @llvm.amdgcn.workgroup.id.x()
+declare i32 @llvm.amdgcn.workgroup.id.y()
+declare i32 @llvm.amdgcn.workgroup.id.z()
diff --git a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics.ll
new file mode 100644
index 0000000..497241c
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics.ll
@@ -0,0 +1,376 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+architected-sgprs < %s | FileCheck -check-prefix=GFX9-SDAG %s
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+architected-sgprs -global-isel < %s | FileCheck -check-prefix=GFX9-GISEL %s
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12-SDAG %s
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -global-isel < %s | FileCheck -check-prefix=GFX12-GISEL %s
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1250 < %s | FileCheck -check-prefix=GFX1250-SDAG %s
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1250 -global-isel < %s | FileCheck -check-prefix=GFX1250-GISEL %s
+
+define amdgpu_cs void @_amdgpu_cs_main() {
+; GFX9-SDAG-LABEL: _amdgpu_cs_main:
+; GFX9-SDAG:       ; %bb.0: ; %.entry
+; GFX9-SDAG-NEXT:    s_lshr_b32 s0, ttmp7, 16
+; GFX9-SDAG-NEXT:    s_and_b32 s1, ttmp7, 0xffff
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, ttmp9
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, s1
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v2, s0
+; GFX9-SDAG-NEXT:    buffer_store_dwordx3 v[0:2], off, s[0:3], 0
+; GFX9-SDAG-NEXT:    s_endpgm
+;
+; GFX9-GISEL-LABEL: _amdgpu_cs_main:
+; GFX9-GISEL:       ; %bb.0: ; %.entry
+; GFX9-GISEL-NEXT:    s_mov_b32 s0, ttmp9
+; GFX9-GISEL-NEXT:    s_and_b32 s1, ttmp7, 0xffff
+; GFX9-GISEL-NEXT:    s_lshr_b32 s2, ttmp7, 16
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, s0
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v1, s1
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v2, s2
+; GFX9-GISEL-NEXT:    buffer_store_dwordx3 v[0:2], off, s[0:3], 0
+; GFX9-GISEL-NEXT:    s_endpgm
+;
+; GFX12-SDAG-LABEL: _amdgpu_cs_main:
+; GFX12-SDAG:       ; %bb.0: ; %.entry
+; GFX12-SDAG-NEXT:    s_and_b32 s0, ttmp7, 0xffff
+; GFX12-SDAG-NEXT:    s_lshr_b32 s1, ttmp7, 16
+; GFX12-SDAG-NEXT:    v_dual_mov_b32 v0, ttmp9 :: v_dual_mov_b32 v1, s0
+; GFX12-SDAG-NEXT:    v_mov_b32_e32 v2, s1
+; GFX12-SDAG-NEXT:    buffer_store_b96 v[0:2], off, s[0:3], null
+; GFX12-SDAG-NEXT:    s_endpgm
+;
+; GFX12-GISEL-LABEL: _amdgpu_cs_main:
+; GFX12-GISEL:       ; %bb.0: ; %.entry
+; GFX12-GISEL-NEXT:    s_mov_b32 s0, ttmp9
+; GFX12-GISEL-NEXT:    s_and_b32 s1, ttmp7, 0xffff
+; GFX12-GISEL-NEXT:    s_lshr_b32 s2, ttmp7, 16
+; GFX12-GISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX12-GISEL-NEXT:    v_mov_b32_e32 v2, s2
+; GFX12-GISEL-NEXT:    buffer_store_b96 v[0:2], off, s[0:3], null
+; GFX12-GISEL-NEXT:    s_endpgm
+;
+; GFX1250-SDAG-LABEL: _amdgpu_cs_main:
+; GFX1250-SDAG:       ; %bb.0: ; %.entry
+; GFX1250-SDAG-NEXT:    s_bfe_u32 s0, ttmp6, 0x4000c
+; GFX1250-SDAG-NEXT:    s_bfe_u32 s2, ttmp6, 0x40010
+; GFX1250-SDAG-NEXT:    s_add_co_i32 s0, s0, 1
+; GFX1250-SDAG-NEXT:    s_and_b32 s1, ttmp6, 15
+; GFX1250-SDAG-NEXT:    s_mul_i32 s0, ttmp9, s0
+; GFX1250-SDAG-NEXT:    s_and_b32 s3, ttmp7, 0xffff
+; GFX1250-SDAG-NEXT:    s_add_co_i32 s2, s2, 1
+; GFX1250-SDAG-NEXT:    s_bfe_u32 s4, ttmp6, 0x40014
+; GFX1250-SDAG-NEXT:    s_add_co_i32 s1, s1, s0
+; GFX1250-SDAG-NEXT:    s_mul_i32 s0, s3, s2
+; GFX1250-SDAG-NEXT:    s_bfe_u32 s2, ttmp6, 0x40004
+; GFX1250-SDAG-NEXT:    s_lshr_b32 s5, ttmp7, 16
+; GFX1250-SDAG-NEXT:    s_add_co_i32 s4, s4, 1
+; GFX1250-SDAG-NEXT:    s_add_co_i32 s2, s2, s0
+; GFX1250-SDAG-NEXT:    s_mul_i32 s0, s5, s4
+; GFX1250-SDAG-NEXT:    s_bfe_u32 s4, ttmp6, 0x40008
+; GFX1250-SDAG-NEXT:    s_getreg_b32 s6, hwreg(HW_REG_IB_STS2, 6, 4)
+; GFX1250-SDAG-NEXT:    s_add_co_i32 s4, s4, s0
+; GFX1250-SDAG-NEXT:    s_cmp_eq_u32 s6, 0
+; GFX1250-SDAG-NEXT:    s_cselect_b32 s0, s5, s4
+; GFX1250-SDAG-NEXT:    s_cselect_b32 s1, ttmp9, s1
+; GFX1250-SDAG-NEXT:    s_cselect_b32 s2, s3, s2
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-SDAG-NEXT:    v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s2
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v2, s0
+; GFX1250-SDAG-NEXT:    buffer_store_b96 v[0:2], off, s[0:3], null
+; GFX1250-SDAG-NEXT:    s_endpgm
+;
+; GFX1250-GISEL-LABEL: _amdgpu_cs_main:
+; GFX1250-GISEL:       ; %bb.0: ; %.entry
+; GFX1250-GISEL-NEXT:    s_bfe_u32 s0, ttmp6, 0x4000c
+; GFX1250-GISEL-NEXT:    s_and_b32 s1, ttmp6, 15
+; GFX1250-GISEL-NEXT:    s_add_co_i32 s0, s0, 1
+; GFX1250-GISEL-NEXT:    s_getreg_b32 s2, hwreg(HW_REG_IB_STS2, 6, 4)
+; GFX1250-GISEL-NEXT:    s_mul_i32 s0, ttmp9, s0
+; GFX1250-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-GISEL-NEXT:    s_add_co_i32 s1, s1, s0
+; GFX1250-GISEL-NEXT:    s_cmp_eq_u32 s2, 0
+; GFX1250-GISEL-NEXT:    s_cselect_b32 s0, ttmp9, s1
+; GFX1250-GISEL-NEXT:    s_bfe_u32 s1, ttmp6, 0x40010
+; GFX1250-GISEL-NEXT:    s_and_b32 s3, ttmp7, 0xffff
+; GFX1250-GISEL-NEXT:    s_add_co_i32 s1, s1, 1
+; GFX1250-GISEL-NEXT:    s_bfe_u32 s4, ttmp6, 0x40004
+; GFX1250-GISEL-NEXT:    s_mul_i32 s1, s3, s1
+; GFX1250-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-GISEL-NEXT:    s_add_co_i32 s4, s4, s1
+; GFX1250-GISEL-NEXT:    s_cmp_eq_u32 s2, 0
+; GFX1250-GISEL-NEXT:    s_cselect_b32 s1, s3, s4
+; GFX1250-GISEL-NEXT:    s_bfe_u32 s3, ttmp6, 0x40014
+; GFX1250-GISEL-NEXT:    s_lshr_b32 s4, ttmp7, 16
+; GFX1250-GISEL-NEXT:    s_add_co_i32 s3, s3, 1
+; GFX1250-GISEL-NEXT:    s_bfe_u32 s5, ttmp6, 0x40008
+; GFX1250-GISEL-NEXT:    s_mul_i32 s3, s4, s3
+; GFX1250-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-GISEL-NEXT:    s_add_co_i32 s5, s5, s3
+; GFX1250-GISEL-NEXT:    s_cmp_eq_u32 s2, 0
+; GFX1250-GISEL-NEXT:    s_cselect_b32 s2, s4, s5
+; GFX1250-GISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX1250-GISEL-NEXT:    v_mov_b32_e32 v2, s2
+; GFX1250-GISEL-NEXT:    buffer_store_b96 v[0:2], off, s[0:3], null
+; GFX1250-GISEL-NEXT:    s_endpgm
+.entry:
+  %idx = call i32 @llvm.amdgcn.workgroup.id.x()
+  %idy = call i32 @llvm.amdgcn.workgroup.id.y()
+  %idz = call i32 @llvm.amdgcn.workgroup.id.z()
+  %ielemx = insertelement <3 x i32> undef, i32 %idx, i64 0
+  %ielemy = insertelement <3 x i32> %ielemx, i32 %idy, i64 1
+  %ielemz = insertelement <3 x i32> %ielemy, i32 %idz, i64 2
+  call void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32> %ielemz, ptr addrspace(8) undef, i32 0, i32 0, i32 0)
+  ret void
+}
+
+define amdgpu_cs void @workgroup_id_no_clusters() "amdgpu-cluster-dims"="0,0,0" {
+; GFX9-SDAG-LABEL: workgroup_id_no_clusters:
+; GFX9-SDAG:       ; %bb.0: ; %.entry
+; GFX9-SDAG-NEXT:    s_lshr_b32 s0, ttmp7, 16
+; GFX9-SDAG-NEXT:    s_and_b32 s1, ttmp7, 0xffff
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, ttmp9
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, s1
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v2, s0
+; GFX9-SDAG-NEXT:    buffer_store_dwordx3 v[0:2], off, s[0:3], 0
+; GFX9-SDAG-NEXT:    s_endpgm
+;
+; GFX9-GISEL-LABEL: workgroup_id_no_clusters:
+; GFX9-GISEL:       ; %bb.0: ; %.entry
+; GFX9-GISEL-NEXT:    s_mov_b32 s0, ttmp9
+; GFX9-GISEL-NEXT:    s_and_b32 s1, ttmp7, 0xffff
+; GFX9-GISEL-NEXT:    s_lshr_b32 s2, ttmp7, 16
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, s0
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v1, s1
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v2, s2
+; GFX9-GISEL-NEXT:    buffer_store_dwordx3 v[0:2], off, s[0:3], 0
+; GFX9-GISEL-NEXT:    s_endpgm
+;
+; GFX12-SDAG-LABEL: workgroup_id_no_clusters:
+; GFX12-SDAG:       ; %bb.0: ; %.entry
+; GFX12-SDAG-NEXT:    s_and_b32 s0, ttmp7, 0xffff
+; GFX12-SDAG-NEXT:    s_lshr_b32 s1, ttmp7, 16
+; GFX12-SDAG-NEXT:    v_dual_mov_b32 v0, ttmp9 :: v_dual_mov_b32 v1, s0
+; GFX12-SDAG-NEXT:    v_mov_b32_e32 v2, s1
+; GFX12-SDAG-NEXT:    buffer_store_b96 v[0:2], off, s[0:3], null
+; GFX12-SDAG-NEXT:    s_endpgm
+;
+; GFX12-GISEL-LABEL: workgroup_id_no_clusters:
+; GFX12-GISEL:       ; %bb.0: ; %.entry
+; GFX12-GISEL-NEXT:    s_mov_b32 s0, ttmp9
+; GFX12-GISEL-NEXT:    s_and_b32 s1, ttmp7, 0xffff
+; GFX12-GISEL-NEXT:    s_lshr_b32 s2, ttmp7, 16
+; GFX12-GISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX12-GISEL-NEXT:    v_mov_b32_e32 v2, s2
+; GFX12-GISEL-NEXT:    buffer_store_b96 v[0:2], off, s[0:3], null
+; GFX12-GISEL-NEXT:    s_endpgm
+;
+; GFX1250-SDAG-LABEL: workgroup_id_no_clusters:
+; GFX1250-SDAG:       ; %bb.0: ; %.entry
+; GFX1250-SDAG-NEXT:    s_and_b32 s0, ttmp7, 0xffff
+; GFX1250-SDAG-NEXT:    s_lshr_b32 s1, ttmp7, 16
+; GFX1250-SDAG-NEXT:    v_dual_mov_b32 v0, ttmp9 :: v_dual_mov_b32 v1, s0
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v2, s1
+; GFX1250-SDAG-NEXT:    buffer_store_b96 v[0:2], off, s[0:3], null
+; GFX1250-SDAG-NEXT:    s_endpgm
+;
+; GFX1250-GISEL-LABEL: workgroup_id_no_clusters:
+; GFX1250-GISEL:       ; %bb.0: ; %.entry
+; GFX1250-GISEL-NEXT:    s_mov_b32 s0, ttmp9
+; GFX1250-GISEL-NEXT:    s_and_b32 s1, ttmp7, 0xffff
+; GFX1250-GISEL-NEXT:    s_lshr_b32 s2, ttmp7, 16
+; GFX1250-GISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX1250-GISEL-NEXT:    v_mov_b32_e32 v2, s2
+; GFX1250-GISEL-NEXT:    buffer_store_b96 v[0:2], off, s[0:3], null
+; GFX1250-GISEL-NEXT:    s_endpgm
+.entry:
+  %idx = call i32 @llvm.amdgcn.workgroup.id.x()
+  %idy = call i32 @llvm.amdgcn.workgroup.id.y()
+  %idz = call i32 @llvm.amdgcn.workgroup.id.z()
+  %ielemx = insertelement <3 x i32> undef, i32 %idx, i64 0
+  %ielemy = insertelement <3 x i32> %ielemx, i32 %idy, i64 1
+  %ielemz = insertelement <3 x i32> %ielemy, i32 %idz, i64 2
+  call void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32> %ielemz, ptr addrspace(8) undef, i32 0, i32 0, i32 0)
+  ret void
+}
+
+define amdgpu_cs void @workgroup_id_optimized() "amdgpu-cluster-dims"="2,3,4" {
+; GFX9-SDAG-LABEL: workgroup_id_optimized:
+; GFX9-SDAG:       ; %bb.0: ; %.entry
+; GFX9-SDAG-NEXT:    s_lshr_b32 s0, ttmp7, 16
+; GFX9-SDAG-NEXT:    s_and_b32 s1, ttmp7, 0xffff
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, ttmp9
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, s1
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v2, s0
+; GFX9-SDAG-NEXT:    buffer_store_dwordx3 v[0:2], off, s[0:3], 0
+; GFX9-SDAG-NEXT:    s_endpgm
+;
+; GFX9-GISEL-LABEL: workgroup_id_optimized:
+; GFX9-GISEL:       ; %bb.0: ; %.entry
+; GFX9-GISEL-NEXT:    s_mov_b32 s0, ttmp9
+; GFX9-GISEL-NEXT:    s_and_b32 s1, ttmp7, 0xffff
+; GFX9-GISEL-NEXT:    s_lshr_b32 s2, ttmp7, 16
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, s0
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v1, s1
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v2, s2
+; GFX9-GISEL-NEXT:    buffer_store_dwordx3 v[0:2], off, s[0:3], 0
+; GFX9-GISEL-NEXT:    s_endpgm
+;
+; GFX12-SDAG-LABEL: workgroup_id_optimized:
+; GFX12-SDAG:       ; %bb.0: ; %.entry
+; GFX12-SDAG-NEXT:    s_and_b32 s0, ttmp7, 0xffff
+; GFX12-SDAG-NEXT:    s_lshr_b32 s1, ttmp7, 16
+; GFX12-SDAG-NEXT:    v_dual_mov_b32 v0, ttmp9 :: v_dual_mov_b32 v1, s0
+; GFX12-SDAG-NEXT:    v_mov_b32_e32 v2, s1
+; GFX12-SDAG-NEXT:    buffer_store_b96 v[0:2], off, s[0:3], null
+; GFX12-SDAG-NEXT:    s_endpgm
+;
+; GFX12-GISEL-LABEL: workgroup_id_optimized:
+; GFX12-GISEL:       ; %bb.0: ; %.entry
+; GFX12-GISEL-NEXT:    s_mov_b32 s0, ttmp9
+; GFX12-GISEL-NEXT:    s_and_b32 s1, ttmp7, 0xffff
+; GFX12-GISEL-NEXT:    s_lshr_b32 s2, ttmp7, 16
+; GFX12-GISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX12-GISEL-NEXT:    v_mov_b32_e32 v2, s2
+; GFX12-GISEL-NEXT:    buffer_store_b96 v[0:2], off, s[0:3], null
+; GFX12-GISEL-NEXT:    s_endpgm
+;
+; GFX1250-SDAG-LABEL: workgroup_id_optimized:
+; GFX1250-SDAG:       ; %bb.0: ; %.entry
+; GFX1250-SDAG-NEXT:    s_lshl_b32 s0, ttmp9, 1
+; GFX1250-SDAG-NEXT:    s_and_b32 s1, ttmp6, 15
+; GFX1250-SDAG-NEXT:    s_lshr_b32 s2, ttmp7, 14
+; GFX1250-SDAG-NEXT:    s_add_co_i32 s1, s1, s0
+; GFX1250-SDAG-NEXT:    s_and_b32 s0, s2, 0x3fffc
+; GFX1250-SDAG-NEXT:    s_and_b32 s2, ttmp7, 0xffff
+; GFX1250-SDAG-NEXT:    s_bfe_u32 s3, ttmp6, 0x40008
+; GFX1250-SDAG-NEXT:    s_mul_i32 s2, s2, 3
+; GFX1250-SDAG-NEXT:    s_bfe_u32 s4, ttmp6, 0x40004
+; GFX1250-SDAG-NEXT:    s_add_co_i32 s3, s3, s0
+; GFX1250-SDAG-NEXT:    s_add_co_i32 s4, s4, s2
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-SDAG-NEXT:    v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s4
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v2, s3
+; GFX1250-SDAG-NEXT:    buffer_store_b96 v[0:2], off, s[0:3], null
+; GFX1250-SDAG-NEXT:    s_endpgm
+;
+; GFX1250-GISEL-LABEL: workgroup_id_optimized:
+; GFX1250-GISEL:       ; %bb.0: ; %.entry
+; GFX1250-GISEL-NEXT:    s_and_b32 s1, ttmp7, 0xffff
+; GFX1250-GISEL-NEXT:    s_and_b32 s0, ttmp6, 15
+; GFX1250-GISEL-NEXT:    s_bfe_u32 s2, ttmp6, 0x40004
+; GFX1250-GISEL-NEXT:    s_mul_i32 s1, s1, 3
+; GFX1250-GISEL-NEXT:    s_lshr_b32 s3, ttmp7, 16
+; GFX1250-GISEL-NEXT:    s_bfe_u32 s4, ttmp6, 0x40008
+; GFX1250-GISEL-NEXT:    s_lshl1_add_u32 s0, ttmp9, s0
+; GFX1250-GISEL-NEXT:    s_add_co_i32 s1, s2, s1
+; GFX1250-GISEL-NEXT:    s_lshl2_add_u32 s2, s3, s4
+; GFX1250-GISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX1250-GISEL-NEXT:    v_mov_b32_e32 v2, s2
+; GFX1250-GISEL-NEXT:    buffer_store_b96 v[0:2], off, s[0:3], null
+; GFX1250-GISEL-NEXT:    s_endpgm
+.entry:
+  %idx = call i32 @llvm.amdgcn.workgroup.id.x()
+  %idy = call i32 @llvm.amdgcn.workgroup.id.y()
+  %idz = call i32 @llvm.amdgcn.workgroup.id.z()
+  %ielemx = insertelement <3 x i32> undef, i32 %idx, i64 0
+  %ielemy = insertelement <3 x i32> %ielemx, i32 %idy, i64 1
+  %ielemz = insertelement <3 x i32> %ielemy, i32 %idz, i64 2
+  call void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32> %ielemz, ptr addrspace(8) undef, i32 0, i32 0, i32 0)
+  ret void
+}
+
+define amdgpu_cs void @caller() {
+; GFX9-SDAG-LABEL: caller:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    s_getpc_b64 s[8:9]
+; GFX9-SDAG-NEXT:    s_mov_b32 s8, s0
+; GFX9-SDAG-NEXT:    s_load_dwordx4 s[8:11], s[8:9], 0x10
+; GFX9-SDAG-NEXT:    s_mov_b32 s5, callee@abs32@hi
+; GFX9-SDAG-NEXT:    s_mov_b32 s4, callee@abs32@lo
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, ttmp9
+; GFX9-SDAG-NEXT:    s_mov_b32 s32, 0
+; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-SDAG-NEXT:    s_add_u32 s8, s8, s0
+; GFX9-SDAG-NEXT:    s_addc_u32 s9, s9, 0
+; GFX9-SDAG-NEXT:    s_mov_b64 s[0:1], s[8:9]
+; GFX9-SDAG-NEXT:    s_mov_b64 s[2:3], s[10:11]
+; GFX9-SDAG-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GFX9-SDAG-NEXT:    s_endpgm
+;
+; GFX9-GISEL-LABEL: caller:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    s_getpc_b64 s[8:9]
+; GFX9-GISEL-NEXT:    s_mov_b32 s8, s0
+; GFX9-GISEL-NEXT:    s_load_dwordx4 s[8:11], s[8:9], 0x10
+; GFX9-GISEL-NEXT:    s_mov_b32 s4, callee@abs32@lo
+; GFX9-GISEL-NEXT:    s_mov_b32 s5, callee@abs32@hi
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, ttmp9
+; GFX9-GISEL-NEXT:    s_mov_b32 s32, 0
+; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-GISEL-NEXT:    s_add_u32 s8, s8, s0
+; GFX9-GISEL-NEXT:    s_addc_u32 s9, s9, 0
+; GFX9-GISEL-NEXT:    s_mov_b64 s[0:1], s[8:9]
+; GFX9-GISEL-NEXT:    s_mov_b64 s[2:3], s[10:11]
+; GFX9-GISEL-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GFX9-GISEL-NEXT:    s_endpgm
+;
+; GFX12-SDAG-LABEL: caller:
+; GFX12-SDAG:       ; %bb.0:
+; GFX12-SDAG-NEXT:    v_mov_b32_e32 v0, ttmp9
+; GFX12-SDAG-NEXT:    s_mov_b32 s1, callee@abs32@hi
+; GFX12-SDAG-NEXT:    s_mov_b32 s0, callee@abs32@lo
+; GFX12-SDAG-NEXT:    s_mov_b32 s32, 0
+; GFX12-SDAG-NEXT:    s_wait_alu 0xfffe
+; GFX12-SDAG-NEXT:    s_swappc_b64 s[30:31], s[0:1]
+; GFX12-SDAG-NEXT:    s_endpgm
+;
+; GFX12-GISEL-LABEL: caller:
+; GFX12-GISEL:       ; %bb.0:
+; GFX12-GISEL-NEXT:    v_mov_b32_e32 v0, ttmp9
+; GFX12-GISEL-NEXT:    s_mov_b32 s0, callee@abs32@lo
+; GFX12-GISEL-NEXT:    s_mov_b32 s1, callee@abs32@hi
+; GFX12-GISEL-NEXT:    s_mov_b32 s32, 0
+; GFX12-GISEL-NEXT:    s_wait_alu 0xfffe
+; GFX12-GISEL-NEXT:    s_swappc_b64 s[30:31], s[0:1]
+; GFX12-GISEL-NEXT:    s_endpgm
+;
+; GFX1250-SDAG-LABEL: caller:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_bfe_u32 s0, ttmp6, 0x4000c
+; GFX1250-SDAG-NEXT:    s_and_b32 s1, ttmp6, 15
+; GFX1250-SDAG-NEXT:    s_add_co_i32 s0, s0, 1
+; GFX1250-SDAG-NEXT:    s_getreg_b32 s2, hwreg(HW_REG_IB_STS2, 6, 4)
+; GFX1250-SDAG-NEXT:    s_mul_i32 s0, ttmp9, s0
+; GFX1250-SDAG-NEXT:    s_mov_b32 s32, 0
+; GFX1250-SDAG-NEXT:    s_add_co_i32 s1, s1, s0
+; GFX1250-SDAG-NEXT:    s_cmp_eq_u32 s2, 0
+; GFX1250-SDAG-NEXT:    s_cselect_b32 s2, ttmp9, s1
+; GFX1250-SDAG-NEXT:    s_mov_b64 s[0:1], callee@abs64
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v0, s2
+; GFX1250-SDAG-NEXT:    s_swap_pc_i64 s[30:31], s[0:1]
+; GFX1250-SDAG-NEXT:    s_endpgm
+;
+; GFX1250-GISEL-LABEL: caller:
+; GFX1250-GISEL:       ; %bb.0:
+; GFX1250-GISEL-NEXT:    s_bfe_u32 s0, ttmp6, 0x4000c
+; GFX1250-GISEL-NEXT:    s_and_b32 s1, ttmp6, 15
+; GFX1250-GISEL-NEXT:    s_add_co_i32 s0, s0, 1
+; GFX1250-GISEL-NEXT:    s_getreg_b32 s2, hwreg(HW_REG_IB_STS2, 6, 4)
+; GFX1250-GISEL-NEXT:    s_mul_i32 s0, ttmp9, s0
+; GFX1250-GISEL-NEXT:    s_mov_b32 s32, 0
+; GFX1250-GISEL-NEXT:    s_add_co_i32 s1, s1, s0
+; GFX1250-GISEL-NEXT:    s_cmp_eq_u32 s2, 0
+; GFX1250-GISEL-NEXT:    s_cselect_b32 s2, ttmp9, s1
+; GFX1250-GISEL-NEXT:    s_mov_b64 s[0:1], callee@abs64
+; GFX1250-GISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX1250-GISEL-NEXT:    s_swap_pc_i64 s[30:31], s[0:1]
+; GFX1250-GISEL-NEXT:    s_endpgm
+  %idx = call i32 @llvm.amdgcn.workgroup.id.x()
+  call amdgpu_gfx void @callee(i32 %idx)
+  ret void
+}
+
+declare amdgpu_gfx void @callee(i32)
+
+declare i32 @llvm.amdgcn.workgroup.id.x()
+declare i32 @llvm.amdgcn.workgroup.id.y()
+declare i32 @llvm.amdgcn.workgroup.id.z()
+declare void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32>, ptr addrspace(8), i32, i32, i32 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/reassoc-mul-add-1-to-mad.ll b/llvm/test/CodeGen/AMDGPU/reassoc-mul-add-1-to-mad.ll
index 25609e8..b2bcb74 100644
--- a/llvm/test/CodeGen/AMDGPU/reassoc-mul-add-1-to-mad.ll
+++ b/llvm/test/CodeGen/AMDGPU/reassoc-mul-add-1-to-mad.ll
@@ -4089,32 +4089,44 @@ define amdgpu_kernel void @compute_mad(ptr addrspace(4) %i18, ptr addrspace(4) %
 ; GFX1250-NEXT:    s_add_co_i32 s0, s10, 1
 ; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
 ; GFX1250-NEXT:    v_mul_lo_u32 v1, s0, v0
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1250-NEXT:    v_dual_add_nc_u32 v2, s0, v1 :: v_dual_add_nc_u32 v1, 1, v1
 ; GFX1250-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0
+; GFX1250-NEXT:    s_wait_xcnt 0x0
+; GFX1250-NEXT:    s_and_b32 s4, ttmp6, 15
+; GFX1250-NEXT:    s_getreg_b32 s5, hwreg(HW_REG_IB_STS2, 6, 4)
 ; GFX1250-NEXT:    v_mul_lo_u32 v2, v2, v0
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
+; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1250-NEXT:    v_mul_lo_u32 v3, v2, v1
 ; GFX1250-NEXT:    s_wait_kmcnt 0x0
 ; GFX1250-NEXT:    s_load_b32 s2, s[2:3], 0x4
+; GFX1250-NEXT:    s_wait_xcnt 0x0
+; GFX1250-NEXT:    s_bfe_u32 s3, ttmp6, 0x4000c
 ; GFX1250-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
+; GFX1250-NEXT:    s_add_co_i32 s3, s3, 1
+; GFX1250-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250-NEXT:    s_mul_i32 s3, ttmp9, s3
 ; GFX1250-NEXT:    v_add_nc_u32_e32 v1, v3, v1
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
+; GFX1250-NEXT:    s_add_co_i32 s4, s4, s3
+; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_1)
 ; GFX1250-NEXT:    v_mul_lo_u32 v1, v1, v2
 ; GFX1250-NEXT:    v_add_nc_u32_e32 v2, 1, v3
 ; GFX1250-NEXT:    s_wait_kmcnt 0x0
 ; GFX1250-NEXT:    s_and_b32 s2, s2, 0xffff
+; GFX1250-NEXT:    s_cmp_eq_u32 s5, 0
 ; GFX1250-NEXT:    v_mul_lo_u32 v3, v1, v2
-; GFX1250-NEXT:    v_mad_u32 v0, ttmp9, s2, v0
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250-NEXT:    s_cselect_b32 s3, ttmp9, s4
+; GFX1250-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250-NEXT:    v_mad_u32 v0, s3, s2, v0
 ; GFX1250-NEXT:    v_add_nc_u32_e32 v2, v3, v2
+; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
 ; GFX1250-NEXT:    v_mul_lo_u32 v2, v2, v1
 ; GFX1250-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
 ; GFX1250-NEXT:    v_add_nc_u64_e32 v[0:1], s[0:1], v[0:1]
+; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
 ; GFX1250-NEXT:    v_mad_u32 v3, v2, v3, v2
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
 ; GFX1250-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 2, s[8:9]
+; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_2)
 ; GFX1250-NEXT:    v_mad_u32 v2, v3, v2, v3
 ; GFX1250-NEXT:    global_store_b32 v[0:1], v2, off
 ; GFX1250-NEXT:    s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-unscoped.ll b/llvm/test/CodeGen/AMDGPU/waitcnt-unscoped.ll
index 0bd8667..a00aca3 100644
--- a/llvm/test/CodeGen/AMDGPU/waitcnt-unscoped.ll
+++ b/llvm/test/CodeGen/AMDGPU/waitcnt-unscoped.ll
@@ -26,7 +26,6 @@ define amdgpu_kernel void @test_waitcnt(ptr addrspace(1) %global_buffer, ptr add
 ; CHECK-NEXT:    ds_write_b32 v1, v3
 ; CHECK-NEXT:    ds_write_b32 v2, v3
 ; CHECK-NEXT:    ; sched_barrier mask(0x00000000)
-; CHECK-NEXT:    s_waitcnt vmcnt(0)
 ; CHECK-NEXT:    ds_read_b32 v1, v1
 ; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
 ; CHECK-NEXT:    global_store_dword v0, v1, s[0:1] offset:16
diff --git a/llvm/test/CodeGen/AMDGPU/workgroup-id-in-arch-sgprs.ll b/llvm/test/CodeGen/AMDGPU/workgroup-id-in-arch-sgprs.ll
index 7a64e55..afca83a 100644
--- a/llvm/test/CodeGen/AMDGPU/workgroup-id-in-arch-sgprs.ll
+++ b/llvm/test/CodeGen/AMDGPU/workgroup-id-in-arch-sgprs.ll
@@ -1,8 +1,10 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=0 < %s | FileCheck -check-prefixes=GFX9,GFX9-SDAG %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=1 -new-reg-bank-select < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -global-isel=0 < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -global-isel=1 -new-reg-bank-select < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=1 < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -global-isel=0 < %s | FileCheck -check-prefixes=GFX1200 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -global-isel=1 < %s | FileCheck -check-prefixes=GFX1200 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -global-isel=0 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-SDAG %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -global-isel=1 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-GISEL %s
 
 define amdgpu_kernel void @workgroup_id_x(ptr addrspace(1) %ptrx) {
 ;
@@ -15,6 +17,50 @@ define amdgpu_kernel void @workgroup_id_x(ptr addrspace(1) %ptrx) {
 ; GFX9-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GFX9-NEXT:    s_endpgm
 ;
+; GFX1200-LABEL: workgroup_id_x:
+; GFX1200:       ; %bb.0:
+; GFX1200-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1200-NEXT:    v_dual_mov_b32 v0, ttmp9 :: v_dual_mov_b32 v1, 0
+; GFX1200-NEXT:    s_wait_kmcnt 0x0
+; GFX1200-NEXT:    global_store_b32 v1, v0, s[0:1]
+; GFX1200-NEXT:    s_endpgm
+;
+; GFX1250-SDAG-LABEL: workgroup_id_x:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-SDAG-NEXT:    s_bfe_u32 s2, ttmp6, 0x4000c
+; GFX1250-SDAG-NEXT:    s_and_b32 s3, ttmp6, 15
+; GFX1250-SDAG-NEXT:    s_add_co_i32 s2, s2, 1
+; GFX1250-SDAG-NEXT:    s_wait_xcnt 0x0
+; GFX1250-SDAG-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_IB_STS2, 6, 4)
+; GFX1250-SDAG-NEXT:    s_mul_i32 s2, ttmp9, s2
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
+; GFX1250-SDAG-NEXT:    s_add_co_i32 s3, s3, s2
+; GFX1250-SDAG-NEXT:    s_cmp_eq_u32 s4, 0
+; GFX1250-SDAG-NEXT:    s_cselect_b32 s2, ttmp9, s3
+; GFX1250-SDAG-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX1250-SDAG-NEXT:    s_endpgm
+;
+; GFX1250-GISEL-LABEL: workgroup_id_x:
+; GFX1250-GISEL:       ; %bb.0:
+; GFX1250-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-GISEL-NEXT:    s_bfe_u32 s2, ttmp6, 0x4000c
+; GFX1250-GISEL-NEXT:    s_and_b32 s3, ttmp6, 15
+; GFX1250-GISEL-NEXT:    s_add_co_i32 s2, s2, 1
+; GFX1250-GISEL-NEXT:    s_wait_xcnt 0x0
+; GFX1250-GISEL-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_IB_STS2, 6, 4)
+; GFX1250-GISEL-NEXT:    s_mul_i32 s2, ttmp9, s2
+; GFX1250-GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX1250-GISEL-NEXT:    s_add_co_i32 s3, s3, s2
+; GFX1250-GISEL-NEXT:    s_cmp_eq_u32 s4, 0
+; GFX1250-GISEL-NEXT:    s_cselect_b32 s2, ttmp9, s3
+; GFX1250-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-GISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX1250-GISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
+; GFX1250-GISEL-NEXT:    s_endpgm
 ; GFX12-LABEL: workgroup_id_x:
 ; GFX12:       ; %bb.0:
 ; GFX12-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
@@ -41,6 +87,74 @@ define amdgpu_kernel void @workgroup_id_xy(ptr addrspace(1) %ptrx, ptr addrspace
 ; GFX9-NEXT:    global_store_dword v1, v2, s[2:3]
 ; GFX9-NEXT:    s_endpgm
 ;
+; GFX1200-LABEL: workgroup_id_xy:
+; GFX1200:       ; %bb.0:
+; GFX1200-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0
+; GFX1200-NEXT:    s_and_b32 s4, ttmp7, 0xffff
+; GFX1200-NEXT:    v_dual_mov_b32 v0, ttmp9 :: v_dual_mov_b32 v1, 0
+; GFX1200-NEXT:    v_mov_b32_e32 v2, s4
+; GFX1200-NEXT:    s_wait_kmcnt 0x0
+; GFX1200-NEXT:    s_clause 0x1
+; GFX1200-NEXT:    global_store_b32 v1, v0, s[0:1]
+; GFX1200-NEXT:    global_store_b32 v1, v2, s[2:3]
+; GFX1200-NEXT:    s_endpgm
+;
+; GFX1250-SDAG-LABEL: workgroup_id_xy:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0
+; GFX1250-SDAG-NEXT:    s_bfe_u32 s6, ttmp6, 0x40010
+; GFX1250-SDAG-NEXT:    s_wait_xcnt 0x0
+; GFX1250-SDAG-NEXT:    s_and_b32 s4, ttmp7, 0xffff
+; GFX1250-SDAG-NEXT:    s_add_co_i32 s6, s6, 1
+; GFX1250-SDAG-NEXT:    s_bfe_u32 s7, ttmp6, 0x4000c
+; GFX1250-SDAG-NEXT:    s_mul_i32 s5, s4, s6
+; GFX1250-SDAG-NEXT:    s_bfe_u32 s6, ttmp6, 0x40004
+; GFX1250-SDAG-NEXT:    s_add_co_i32 s7, s7, 1
+; GFX1250-SDAG-NEXT:    s_add_co_i32 s6, s6, s5
+; GFX1250-SDAG-NEXT:    s_and_b32 s5, ttmp6, 15
+; GFX1250-SDAG-NEXT:    s_mul_i32 s7, ttmp9, s7
+; GFX1250-SDAG-NEXT:    s_getreg_b32 s8, hwreg(HW_REG_IB_STS2, 6, 4)
+; GFX1250-SDAG-NEXT:    s_add_co_i32 s5, s5, s7
+; GFX1250-SDAG-NEXT:    s_cmp_eq_u32 s8, 0
+; GFX1250-SDAG-NEXT:    s_cselect_b32 s5, ttmp9, s5
+; GFX1250-SDAG-NEXT:    s_cselect_b32 s4, s4, s6
+; GFX1250-SDAG-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s5
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v2, s4
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    s_clause 0x1
+; GFX1250-SDAG-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX1250-SDAG-NEXT:    global_store_b32 v0, v2, s[2:3]
+; GFX1250-SDAG-NEXT:    s_endpgm
+;
+; GFX1250-GISEL-LABEL: workgroup_id_xy:
+; GFX1250-GISEL:       ; %bb.0:
+; GFX1250-GISEL-NEXT:    s_bfe_u32 s6, ttmp6, 0x4000c
+; GFX1250-GISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0
+; GFX1250-GISEL-NEXT:    s_add_co_i32 s6, s6, 1
+; GFX1250-GISEL-NEXT:    s_wait_xcnt 0x0
+; GFX1250-GISEL-NEXT:    s_and_b32 s4, ttmp6, 15
+; GFX1250-GISEL-NEXT:    s_mul_i32 s5, ttmp9, s6
+; GFX1250-GISEL-NEXT:    s_getreg_b32 s6, hwreg(HW_REG_IB_STS2, 6, 4)
+; GFX1250-GISEL-NEXT:    s_add_co_i32 s4, s4, s5
+; GFX1250-GISEL-NEXT:    s_cmp_eq_u32 s6, 0
+; GFX1250-GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX1250-GISEL-NEXT:    s_cselect_b32 s4, ttmp9, s4
+; GFX1250-GISEL-NEXT:    s_bfe_u32 s5, ttmp6, 0x40010
+; GFX1250-GISEL-NEXT:    s_and_b32 s7, ttmp7, 0xffff
+; GFX1250-GISEL-NEXT:    s_add_co_i32 s5, s5, 1
+; GFX1250-GISEL-NEXT:    s_bfe_u32 s8, ttmp6, 0x40004
+; GFX1250-GISEL-NEXT:    s_mul_i32 s5, s7, s5
+; GFX1250-GISEL-NEXT:    v_mov_b32_e32 v0, s4
+; GFX1250-GISEL-NEXT:    s_add_co_i32 s8, s8, s5
+; GFX1250-GISEL-NEXT:    s_cmp_eq_u32 s6, 0
+; GFX1250-GISEL-NEXT:    s_cselect_b32 s4, s7, s8
+; GFX1250-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-GISEL-NEXT:    v_mov_b32_e32 v2, s4
+; GFX1250-GISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-GISEL-NEXT:    s_clause 0x1
+; GFX1250-GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
+; GFX1250-GISEL-NEXT:    global_store_b32 v1, v2, s[2:3]
+; GFX1250-GISEL-NEXT:    s_endpgm
 ; GFX12-LABEL: workgroup_id_xy:
 ; GFX12:       ; %bb.0:
 ; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0
@@ -77,6 +191,99 @@ define amdgpu_kernel void @workgroup_id_xyz(ptr addrspace(1) %ptrx, ptr addrspac
 ; GFX9-NEXT:    global_store_dword v1, v0, s[4:5]
 ; GFX9-NEXT:    s_endpgm
 ;
+; GFX1200-LABEL: workgroup_id_xyz:
+; GFX1200:       ; %bb.0:
+; GFX1200-NEXT:    s_clause 0x1
+; GFX1200-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0
+; GFX1200-NEXT:    s_load_b64 s[4:5], s[4:5], 0x10
+; GFX1200-NEXT:    s_and_b32 s6, ttmp7, 0xffff
+; GFX1200-NEXT:    v_dual_mov_b32 v0, ttmp9 :: v_dual_mov_b32 v1, 0
+; GFX1200-NEXT:    s_lshr_b32 s7, ttmp7, 16
+; GFX1200-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1200-NEXT:    v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v3, s7
+; GFX1200-NEXT:    s_wait_kmcnt 0x0
+; GFX1200-NEXT:    s_clause 0x2
+; GFX1200-NEXT:    global_store_b32 v1, v0, s[0:1]
+; GFX1200-NEXT:    global_store_b32 v1, v2, s[2:3]
+; GFX1200-NEXT:    global_store_b32 v1, v3, s[4:5]
+; GFX1200-NEXT:    s_endpgm
+;
+; GFX1250-SDAG-LABEL: workgroup_id_xyz:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_bfe_u32 s0, ttmp6, 0x40014
+; GFX1250-SDAG-NEXT:    s_lshr_b32 s6, ttmp7, 16
+; GFX1250-SDAG-NEXT:    s_add_co_i32 s7, s0, 1
+; GFX1250-SDAG-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0
+; GFX1250-SDAG-NEXT:    s_wait_xcnt 0x0
+; GFX1250-SDAG-NEXT:    s_load_b64 s[4:5], s[4:5], 0x10
+; GFX1250-SDAG-NEXT:    s_bfe_u32 s9, ttmp6, 0x40010
+; GFX1250-SDAG-NEXT:    s_mul_i32 s7, s6, s7
+; GFX1250-SDAG-NEXT:    s_bfe_u32 s8, ttmp6, 0x40008
+; GFX1250-SDAG-NEXT:    s_and_b32 s10, ttmp7, 0xffff
+; GFX1250-SDAG-NEXT:    s_add_co_i32 s9, s9, 1
+; GFX1250-SDAG-NEXT:    s_bfe_u32 s11, ttmp6, 0x4000c
+; GFX1250-SDAG-NEXT:    s_add_co_i32 s8, s8, s7
+; GFX1250-SDAG-NEXT:    s_mul_i32 s7, s10, s9
+; GFX1250-SDAG-NEXT:    s_bfe_u32 s9, ttmp6, 0x40004
+; GFX1250-SDAG-NEXT:    s_add_co_i32 s11, s11, 1
+; GFX1250-SDAG-NEXT:    s_add_co_i32 s9, s9, s7
+; GFX1250-SDAG-NEXT:    s_and_b32 s7, ttmp6, 15
+; GFX1250-SDAG-NEXT:    s_mul_i32 s11, ttmp9, s11
+; GFX1250-SDAG-NEXT:    s_getreg_b32 s12, hwreg(HW_REG_IB_STS2, 6, 4)
+; GFX1250-SDAG-NEXT:    s_add_co_i32 s7, s7, s11
+; GFX1250-SDAG-NEXT:    s_cmp_eq_u32 s12, 0
+; GFX1250-SDAG-NEXT:    s_cselect_b32 s7, ttmp9, s7
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
+; GFX1250-SDAG-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s7
+; GFX1250-SDAG-NEXT:    s_cselect_b32 s7, s10, s9
+; GFX1250-SDAG-NEXT:    s_cselect_b32 s6, s6, s8
+; GFX1250-SDAG-NEXT:    v_dual_mov_b32 v2, s7 :: v_dual_mov_b32 v3, s6
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    s_clause 0x2
+; GFX1250-SDAG-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX1250-SDAG-NEXT:    global_store_b32 v0, v2, s[2:3]
+; GFX1250-SDAG-NEXT:    global_store_b32 v0, v3, s[4:5]
+; GFX1250-SDAG-NEXT:    s_endpgm
+;
+; GFX1250-GISEL-LABEL: workgroup_id_xyz:
+; GFX1250-GISEL:       ; %bb.0:
+; GFX1250-GISEL-NEXT:    s_bfe_u32 s0, ttmp6, 0x4000c
+; GFX1250-GISEL-NEXT:    s_and_b32 s1, ttmp6, 15
+; GFX1250-GISEL-NEXT:    s_add_co_i32 s0, s0, 1
+; GFX1250-GISEL-NEXT:    s_getreg_b32 s6, hwreg(HW_REG_IB_STS2, 6, 4)
+; GFX1250-GISEL-NEXT:    s_mul_i32 s0, ttmp9, s0
+; GFX1250-GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX1250-GISEL-NEXT:    s_add_co_i32 s1, s1, s0
+; GFX1250-GISEL-NEXT:    s_cmp_eq_u32 s6, 0
+; GFX1250-GISEL-NEXT:    s_cselect_b32 s7, ttmp9, s1
+; GFX1250-GISEL-NEXT:    s_bfe_u32 s0, ttmp6, 0x40010
+; GFX1250-GISEL-NEXT:    s_and_b32 s8, ttmp7, 0xffff
+; GFX1250-GISEL-NEXT:    s_add_co_i32 s0, s0, 1
+; GFX1250-GISEL-NEXT:    s_bfe_u32 s9, ttmp6, 0x40004
+; GFX1250-GISEL-NEXT:    s_mul_i32 s10, s8, s0
+; GFX1250-GISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0
+; GFX1250-GISEL-NEXT:    s_wait_xcnt 0x0
+; GFX1250-GISEL-NEXT:    s_load_b64 s[4:5], s[4:5], 0x10
+; GFX1250-GISEL-NEXT:    s_add_co_i32 s9, s9, s10
+; GFX1250-GISEL-NEXT:    s_cmp_eq_u32 s6, 0
+; GFX1250-GISEL-NEXT:    v_mov_b32_e32 v0, s7
+; GFX1250-GISEL-NEXT:    s_cselect_b32 s8, s8, s9
+; GFX1250-GISEL-NEXT:    s_bfe_u32 s9, ttmp6, 0x40014
+; GFX1250-GISEL-NEXT:    s_lshr_b32 s10, ttmp7, 16
+; GFX1250-GISEL-NEXT:    s_add_co_i32 s9, s9, 1
+; GFX1250-GISEL-NEXT:    s_bfe_u32 s11, ttmp6, 0x40008
+; GFX1250-GISEL-NEXT:    s_mul_i32 s9, s10, s9
+; GFX1250-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
+; GFX1250-GISEL-NEXT:    s_add_co_i32 s11, s11, s9
+; GFX1250-GISEL-NEXT:    s_cmp_eq_u32 s6, 0
+; GFX1250-GISEL-NEXT:    s_cselect_b32 s6, s10, s11
+; GFX1250-GISEL-NEXT:    v_dual_mov_b32 v2, s8 :: v_dual_mov_b32 v3, s6
+; GFX1250-GISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-GISEL-NEXT:    s_clause 0x2
+; GFX1250-GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
+; GFX1250-GISEL-NEXT:    global_store_b32 v1, v2, s[2:3]
+; GFX1250-GISEL-NEXT:    global_store_b32 v1, v3, s[4:5]
+; GFX1250-GISEL-NEXT:    s_endpgm
 ; GFX12-LABEL: workgroup_id_xyz:
 ; GFX12:       ; %bb.0:
 ; GFX12-NEXT:    s_clause 0x1
@@ -107,7 +314,6 @@ declare i32 @llvm.amdgcn.workgroup.id.x()
 declare i32 @llvm.amdgcn.workgroup.id.y()
 declare i32 @llvm.amdgcn.workgroup.id.z()
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; GFX12-GISEL: {{.*}}
-; GFX12-SDAG: {{.*}}
+; GFX1250: {{.*}}
 ; GFX9-GISEL: {{.*}}
 ; GFX9-SDAG: {{.*}}
diff --git a/llvm/test/CodeGen/DirectX/ContainerData/RootSignature-DescriptorTable-AllValidFlagCombinations.ll b/llvm/test/CodeGen/DirectX/ContainerData/RootSignature-DescriptorTable-AllValidFlagCombinations.ll
index 1bc9b85..d6cb05b 100644
--- a/llvm/test/CodeGen/DirectX/ContainerData/RootSignature-DescriptorTable-AllValidFlagCombinations.ll
+++ b/llvm/test/CodeGen/DirectX/ContainerData/RootSignature-DescriptorTable-AllValidFlagCombinations.ll
@@ -61,94 +61,94 @@ attributes #0 = { "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" }
 ;DXC-NEXT:      NumStaticSamplers: 0
 ;DXC-NEXT:      StaticSamplersOffset: 380
 ;DXC-NEXT:      Parameters:
-;DXC-NEXT:        - ParameterType:   0
-;DXC-NEXT:          ShaderVisibility: 0
+;DXC-NEXT:        - ParameterType:   DescriptorTable
+;DXC-NEXT:          ShaderVisibility: All
 ;DXC-NEXT:          Table:
 ;DXC-NEXT:            NumRanges:       14
 ;DXC-NEXT:            RangesOffset:    44
 ;DXC-NEXT:            Ranges:
-;DXC-NEXT:              - RangeType:       3
+;DXC-NEXT:              - RangeType:       Sampler
 ;DXC-NEXT:                NumDescriptors:  1
 ;DXC-NEXT:                BaseShaderRegister: 0
 ;DXC-NEXT:                RegisterSpace:   1
 ;DXC-NEXT:                OffsetInDescriptorsFromTableStart: 4294967295
-;DXC-NEXT:              - RangeType:       3
+;DXC-NEXT:              - RangeType:       Sampler
 ;DXC-NEXT:                NumDescriptors:  1
 ;DXC-NEXT:                BaseShaderRegister: 0
 ;DXC-NEXT:                RegisterSpace:   3
 ;DXC-NEXT:                OffsetInDescriptorsFromTableStart: 4294967295
 ;DXC-NEXT:                DESCRIPTORS_VOLATILE: true
-;DXC-NEXT:              - RangeType:       3
+;DXC-NEXT:              - RangeType:       Sampler
 ;DXC-NEXT:                NumDescriptors:  1
 ;DXC-NEXT:                BaseShaderRegister: 0
 ;DXC-NEXT:                RegisterSpace:   4
 ;DXC-NEXT:                OffsetInDescriptorsFromTableStart: 4294967295
 ;DXC-NEXT:                DESCRIPTORS_STATIC_KEEPING_BUFFER_BOUNDS_CHECKS: true
-;DXC-NEXT:              - RangeType:       0
+;DXC-NEXT:              - RangeType:       SRV
 ;DXC-NEXT:                NumDescriptors:  1
 ;DXC-NEXT:                BaseShaderRegister: 0
 ;DXC-NEXT:                RegisterSpace:   5
 ;DXC-NEXT:                OffsetInDescriptorsFromTableStart: 4294967295
 ;DXC-NEXT:                DESCRIPTORS_VOLATILE: true
-;DXC-NEXT:              - RangeType:       1
+;DXC-NEXT:              - RangeType:       UAV
 ;DXC-NEXT:                NumDescriptors:  5
 ;DXC-NEXT:                BaseShaderRegister: 1
 ;DXC-NEXT:                RegisterSpace:   6
 ;DXC-NEXT:                OffsetInDescriptorsFromTableStart: 5
 ;DXC-NEXT:                DESCRIPTORS_VOLATILE: true
-;DXC-NEXT:              - RangeType:       2
+;DXC-NEXT:              - RangeType:       CBuffer
 ;DXC-NEXT:                NumDescriptors:  5
 ;DXC-NEXT:                BaseShaderRegister: 1
 ;DXC-NEXT:                RegisterSpace:   7
 ;DXC-NEXT:                OffsetInDescriptorsFromTableStart: 5
 ;DXC-NEXT:                DATA_VOLATILE:   true
-;DXC-NEXT:              - RangeType:       0
+;DXC-NEXT:              - RangeType:       SRV
 ;DXC-NEXT:                NumDescriptors:  5
 ;DXC-NEXT:                BaseShaderRegister: 1
 ;DXC-NEXT:                RegisterSpace:   8
 ;DXC-NEXT:                OffsetInDescriptorsFromTableStart: 5
 ;DXC-NEXT:                DATA_STATIC:     true
-;DXC-NEXT:              - RangeType:       1
+;DXC-NEXT:              - RangeType:       UAV
 ;DXC-NEXT:                NumDescriptors:  5
 ;DXC-NEXT:                BaseShaderRegister: 1
 ;DXC-NEXT:                RegisterSpace:   9
 ;DXC-NEXT:                OffsetInDescriptorsFromTableStart: 5
 ;DXC-NEXT:                DATA_STATIC_WHILE_SET_AT_EXECUTE: true
-;DXC-NEXT:              - RangeType:       2
+;DXC-NEXT:              - RangeType:       CBuffer
 ;DXC-NEXT:                NumDescriptors:  5
 ;DXC-NEXT:                BaseShaderRegister: 1
 ;DXC-NEXT:                RegisterSpace:   10
 ;DXC-NEXT:                OffsetInDescriptorsFromTableStart: 5
 ;DXC-NEXT:                DESCRIPTORS_VOLATILE: true
 ;DXC-NEXT:                DATA_VOLATILE:   true
-;DXC-NEXT:              - RangeType:       0
+;DXC-NEXT:              - RangeType:       SRV
 ;DXC-NEXT:                NumDescriptors:  5
 ;DXC-NEXT:                BaseShaderRegister: 1
 ;DXC-NEXT:                RegisterSpace:   11
 ;DXC-NEXT:                OffsetInDescriptorsFromTableStart: 5
 ;DXC-NEXT:                DESCRIPTORS_VOLATILE: true
 ;DXC-NEXT:                DATA_STATIC_WHILE_SET_AT_EXECUTE: true
-;DXC-NEXT:              - RangeType:       1
+;DXC-NEXT:              - RangeType:       UAV
 ;DXC-NEXT:                NumDescriptors:  5
 ;DXC-NEXT:                BaseShaderRegister: 1
 ;DXC-NEXT:                RegisterSpace:   12
 ;DXC-NEXT:                OffsetInDescriptorsFromTableStart: 5
 ;DXC-NEXT:                DESCRIPTORS_STATIC_KEEPING_BUFFER_BOUNDS_CHECKS: true
-;DXC-NEXT:              - RangeType:       2
+;DXC-NEXT:              - RangeType:       CBuffer
 ;DXC-NEXT:                NumDescriptors:  5
 ;DXC-NEXT:                BaseShaderRegister: 1
 ;DXC-NEXT:                RegisterSpace:   13
 ;DXC-NEXT:                OffsetInDescriptorsFromTableStart: 5
 ;DXC-NEXT:                DATA_VOLATILE:   true
 ;DXC-NEXT:                DESCRIPTORS_STATIC_KEEPING_BUFFER_BOUNDS_CHECKS: true
-;DXC-NEXT:              - RangeType:       0
+;DXC-NEXT:              - RangeType:       SRV
 ;DXC-NEXT:                NumDescriptors:  5
 ;DXC-NEXT:                BaseShaderRegister: 1
 ;DXC-NEXT:                RegisterSpace:   14
 ;DXC-NEXT:                OffsetInDescriptorsFromTableStart: 5
 ;DXC-NEXT:                DATA_STATIC:     true
 ;DXC-NEXT:                DESCRIPTORS_STATIC_KEEPING_BUFFER_BOUNDS_CHECKS: true
-;DXC-NEXT:              - RangeType:       1
+;DXC-NEXT:              - RangeType:       UAV
 ;DXC-NEXT:                NumDescriptors:  5
 ;DXC-NEXT:                BaseShaderRegister: 1
 ;DXC-NEXT:                RegisterSpace:   15
diff --git a/llvm/test/CodeGen/DirectX/ContainerData/RootSignature-DescriptorTable-AllValidFlagCombinationsV1.ll b/llvm/test/CodeGen/DirectX/ContainerData/RootSignature-DescriptorTable-AllValidFlagCombinationsV1.ll
index fec6c4c..c65eab5 100644
--- a/llvm/test/CodeGen/DirectX/ContainerData/RootSignature-DescriptorTable-AllValidFlagCombinationsV1.ll
+++ b/llvm/test/CodeGen/DirectX/ContainerData/RootSignature-DescriptorTable-AllValidFlagCombinationsV1.ll
@@ -26,18 +26,18 @@ attributes #0 = { "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" }
 ; DXC-NEXT:       NumStaticSamplers: 0
 ; DXC-NEXT:       StaticSamplersOffset: 84
 ; DXC-NEXT:       Parameters:
-; DXC-NEXT:         - ParameterType:   0
-; DXC-NEXT:           ShaderVisibility: 0
+; DXC-NEXT:         - ParameterType:   DescriptorTable
+; DXC-NEXT:           ShaderVisibility: All
 ; DXC-NEXT:           Table:
 ; DXC-NEXT:             NumRanges:       2
 ; DXC-NEXT:             RangesOffset:    44
 ; DXC-NEXT:             Ranges:
-; DXC-NEXT:               - RangeType:       3
+; DXC-NEXT:               - RangeType:       Sampler
 ; DXC-NEXT:                 NumDescriptors:  1
 ; DXC-NEXT:                 BaseShaderRegister: 1
 ; DXC-NEXT:                 RegisterSpace:   0
 ; DXC-NEXT:                 OffsetInDescriptorsFromTableStart: 4294967295
-; DXC-NEXT:               - RangeType:       1
+; DXC-NEXT:               - RangeType:       UAV
 ; DXC-NEXT:                 NumDescriptors:  5
 ; DXC-NEXT:                 BaseShaderRegister: 1
 ; DXC-NEXT:                 RegisterSpace:   10
diff --git a/llvm/test/CodeGen/DirectX/ContainerData/RootSignature-DescriptorTable.ll b/llvm/test/CodeGen/DirectX/ContainerData/RootSignature-DescriptorTable.ll
index 4f6f0d0..c398550 100644
--- a/llvm/test/CodeGen/DirectX/ContainerData/RootSignature-DescriptorTable.ll
+++ b/llvm/test/CodeGen/DirectX/ContainerData/RootSignature-DescriptorTable.ll
@@ -23,24 +23,24 @@ attributes #0 = { "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" }
 ; DXC-NEXT:    Size:            92
 ; DXC-NEXT:    RootSignature:
 ; DXC-NEXT:      Version:         2
-; DXC-NEXT:      NumRootParameters: 1 
-; DXC-NEXT:      RootParametersOffset: 24 
+; DXC-NEXT:      NumRootParameters: 1
+; DXC-NEXT:      RootParametersOffset: 24
 ; DXC-NEXT:      NumStaticSamplers: 0
 ; DXC-NEXT:      StaticSamplersOffset: 92
 ; DXC-NEXT:      Parameters:
-; DXC-NEXT:        - ParameterType:   0
-; DXC-NEXT:          ShaderVisibility: 0
+; DXC-NEXT:        - ParameterType:   DescriptorTable
+; DXC-NEXT:          ShaderVisibility: All
 ; DXC-NEXT:          Table:
 ; DXC-NEXT:            NumRanges:       2
 ; DXC-NEXT:            RangesOffset:    44
 ; DXC-NEXT:            Ranges:
-; DXC-NEXT:              - RangeType:       0
+; DXC-NEXT:              - RangeType:       SRV
 ; DXC-NEXT:                NumDescriptors:  1
 ; DXC-NEXT:                BaseShaderRegister: 1
 ; DXC-NEXT:                RegisterSpace:   0
 ; DXC-NEXT:                OffsetInDescriptorsFromTableStart: 4294967295
 ; DXC-NEXT:                DATA_STATIC_WHILE_SET_AT_EXECUTE:   true
-; DXC-NEXT:              - RangeType:       1
+; DXC-NEXT:              - RangeType:       UAV
 ; DXC-NEXT:                NumDescriptors:  5
 ; DXC-NEXT:                BaseShaderRegister: 1
 ; DXC-NEXT:                RegisterSpace:   10
diff --git a/llvm/test/CodeGen/DirectX/ContainerData/RootSignature-RootConstants.ll b/llvm/test/CodeGen/DirectX/ContainerData/RootSignature-RootConstants.ll
index d217f39..4dec4e5 100644
--- a/llvm/test/CodeGen/DirectX/ContainerData/RootSignature-RootConstants.ll
+++ b/llvm/test/CodeGen/DirectX/ContainerData/RootSignature-RootConstants.ll
@@ -21,13 +21,13 @@ attributes #0 = { "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" }
 ; DXC-NEXT:    Size:            48
 ; DXC-NEXT:    RootSignature:
 ; DXC-NEXT:      Version:         2
-; DXC-NEXT:      NumRootParameters: 1 
-; DXC-NEXT:      RootParametersOffset: 24 
+; DXC-NEXT:      NumRootParameters: 1
+; DXC-NEXT:      RootParametersOffset: 24
 ; DXC-NEXT:      NumStaticSamplers: 0
 ; DXC-NEXT:      StaticSamplersOffset: 48
 ; DXC-NEXT:      Parameters:
-; DXC-NEXT:        - ParameterType:   1
-; DXC-NEXT:          ShaderVisibility: 0
+; DXC-NEXT:        - ParameterType:   Constants32Bit
+; DXC-NEXT:          ShaderVisibility: All
 ; DXC-NEXT:          Constants:
 ; DXC-NEXT:            Num32BitValues:  3
 ; DXC-NEXT:            RegisterSpace:   2
diff --git a/llvm/test/CodeGen/DirectX/ContainerData/RootSignature-RootDescriptor.ll b/llvm/test/CodeGen/DirectX/ContainerData/RootSignature-RootDescriptor.ll
index 54292bb..6f3acda 100644
--- a/llvm/test/CodeGen/DirectX/ContainerData/RootSignature-RootDescriptor.ll
+++ b/llvm/test/CodeGen/DirectX/ContainerData/RootSignature-RootDescriptor.ll
@@ -21,13 +21,13 @@ attributes #0 = { "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" }
 ; DXC-NEXT:    Size:            48
 ; DXC-NEXT:    RootSignature:
 ; DXC-NEXT:      Version:         2
-; DXC-NEXT:      NumRootParameters: 1 
-; DXC-NEXT:      RootParametersOffset: 24 
+; DXC-NEXT:      NumRootParameters: 1
+; DXC-NEXT:      RootParametersOffset: 24
 ; DXC-NEXT:      NumStaticSamplers: 0
 ; DXC-NEXT:      StaticSamplersOffset: 48
 ; DXC-NEXT:      Parameters:
-; DXC-NEXT:        - ParameterType:   2
-; DXC-NEXT:          ShaderVisibility: 0
+; DXC-NEXT:        - ParameterType:   CBV
+; DXC-NEXT:          ShaderVisibility: All
 ; DXC-NEXT:          Descriptor:
 ; DXC-NEXT:            RegisterSpace: 2
 ; DXC-NEXT:            ShaderRegister: 1
diff --git a/llvm/test/CodeGen/DirectX/ContainerData/RootSignature-RootDescriptor_V1.ll b/llvm/test/CodeGen/DirectX/ContainerData/RootSignature-RootDescriptor_V1.ll
index 891a03b..3509360e 100644
--- a/llvm/test/CodeGen/DirectX/ContainerData/RootSignature-RootDescriptor_V1.ll
+++ b/llvm/test/CodeGen/DirectX/ContainerData/RootSignature-RootDescriptor_V1.ll
@@ -21,13 +21,13 @@ attributes #0 = { "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" }
 ; DXC-NEXT:    Size:            44
 ; DXC-NEXT:    RootSignature:
 ; DXC-NEXT:      Version:         1
-; DXC-NEXT:      NumRootParameters: 1 
-; DXC-NEXT:      RootParametersOffset: 24 
+; DXC-NEXT:      NumRootParameters: 1
+; DXC-NEXT:      RootParametersOffset: 24
 ; DXC-NEXT:      NumStaticSamplers: 0
 ; DXC-NEXT:      StaticSamplersOffset: 44
 ; DXC-NEXT:      Parameters:
-; DXC-NEXT:        - ParameterType:   2
-; DXC-NEXT:          ShaderVisibility: 0
+; DXC-NEXT:        - ParameterType:   CBV
+; DXC-NEXT:          ShaderVisibility: All
 ; DXC-NEXT:          Descriptor:
 ; DXC-NEXT:            RegisterSpace: 2
 ; DXC-NEXT:            ShaderRegister: 1
diff --git a/llvm/test/CodeGen/DirectX/ContainerData/RootSignature-StaticSamplers.ll b/llvm/test/CodeGen/DirectX/ContainerData/RootSignature-StaticSamplers.ll
index d9ee39d..1dd470d 100644
--- a/llvm/test/CodeGen/DirectX/ContainerData/RootSignature-StaticSamplers.ll
+++ b/llvm/test/CodeGen/DirectX/ContainerData/RootSignature-StaticSamplers.ll
@@ -27,16 +27,16 @@ attributes #0 = { "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" }
 ; DXC-NEXT:       StaticSamplersOffset: 24
 ; DXC-NEXT:       Parameters:      []
 ; DXC-NEXT:       Samplers:
-; DXC-NEXT:         - Filter:          4
-; DXC-NEXT:           AddressU:        2
-; DXC-NEXT:           AddressV:        3
-; DXC-NEXT:           AddressW:        5
+; DXC-NEXT:         - Filter:          MinPointMagLinearMipPoint
+; DXC-NEXT:           AddressU:        Mirror
+; DXC-NEXT:           AddressV:        Clamp
+; DXC-NEXT:           AddressW:        MirrorOnce
 ; DXC-NEXT:           MipLODBias:      1.425
 ; DXC-NEXT:           MaxAnisotropy:   9
-; DXC-NEXT:           ComparisonFunc:  3
-; DXC-NEXT:           BorderColor:     2
+; DXC-NEXT:           ComparisonFunc:  Equal
+; DXC-NEXT:           BorderColor:     OpaqueWhite
 ; DXC-NEXT:           MinLOD:          -128
 ; DXC-NEXT:           MaxLOD:          128
 ; DXC-NEXT:           ShaderRegister:  42
 ; DXC-NEXT:           RegisterSpace:   0
-; DXC-NEXT:           ShaderVisibility: 0
+; DXC-NEXT:           ShaderVisibility: All
diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
index c7f70a9..ea08061 100644
--- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll
+++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
@@ -107,6 +107,9 @@
 ; CHECK-NEXT:       Remove dead machine instructions
 ; CHECK-NEXT:       MachineDominator Tree Construction
 ; CHECK-NEXT:       Machine Natural Loop Construction
+; CHECK-NEXT:       Machine Trace Metrics 
+; CHECK-NEXT:       Lazy Machine Block Frequency Analysis 
+; CHECK-NEXT:       Machine InstCombiner 
 ; CHECK-NEXT:       Machine Block Frequency Analysis
 ; CHECK-NEXT:       Early Machine Loop Invariant Code Motion
 ; CHECK-NEXT:       MachineDominator Tree Construction
@@ -117,9 +120,6 @@
 ; CHECK-NEXT:       Machine code sinking
 ; CHECK-NEXT:       Peephole Optimizations
 ; CHECK-NEXT:       Remove dead machine instructions
-; CHECK-NEXT:       Machine Trace Metrics
-; CHECK-NEXT:       Lazy Machine Block Frequency Analysis
-; CHECK-NEXT:       Machine InstCombiner
 ; RV64-NEXT:        RISC-V Optimize W Instructions
 ; CHECK-NEXT:       RISC-V Pre-RA pseudo instruction expansion pass
 ; CHECK-NEXT:       RISC-V Merge Base Offset
diff --git a/llvm/test/CodeGen/RISCV/attributes.ll b/llvm/test/CodeGen/RISCV/attributes.ll
index eacd5c9..154fb83 100644
--- a/llvm/test/CodeGen/RISCV/attributes.ll
+++ b/llvm/test/CodeGen/RISCV/attributes.ll
@@ -143,6 +143,7 @@
 ; RUN: llc -mtriple=riscv32 -mattr=+supm %s -o - | FileCheck --check-prefix=RV32SUPM %s
 ; RUN: llc -mtriple=riscv32 -mattr=+smctr  %s -o - | FileCheck --check-prefix=RV32SMCTR %s
 ; RUN: llc -mtriple=riscv32 -mattr=+ssctr  %s -o - | FileCheck --check-prefix=RV32SSCTR %s
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-zibi %s -o - | FileCheck --check-prefix=RV32ZIBI %s
 
 ; RUN: llc -mtriple=riscv64 %s -o - | FileCheck %s
 ; RUN: llc -mtriple=riscv64 -mattr=+m %s -o - | FileCheck --check-prefixes=CHECK,RV64M %s
@@ -292,6 +293,7 @@
 ; RUN: llc -mtriple=riscv64 -mattr=+sdext  %s -o - | FileCheck --check-prefix=RV64SDEXT %s
 ; RUN: llc -mtriple=riscv64 -mattr=+sdtrig  %s -o - | FileCheck --check-prefix=RV64SDTRIG %s
 ; RUN: llc -mtriple=riscv64 -mattr=+experimental-p %s -o - | FileCheck --check-prefix=RV64P %s
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-zibi %s -o - | FileCheck --check-prefix=RV64ZIBI %s
 
 
 ; Tests for profile features.
@@ -452,6 +454,7 @@
 ; RV32SMCTR: .attribute 5, "rv32i2p1_smctr1p0_sscsrind1p0"
 ; RV32SSCTR: .attribute 5, "rv32i2p1_sscsrind1p0_ssctr1p0"
 ; RV32P: .attribute 5, "rv32i2p1_p0p15"
+; RV32ZIBI: .attribute 5, "rv32i2p1_zibi0p1"
 
 ; RV64M: .attribute 5, "rv64i2p1_m2p0_zmmul1p0"
 ; RV64ZMMUL: .attribute 5, "rv64i2p1_zmmul1p0"
@@ -599,6 +602,7 @@
 ; RV64SDEXT: .attribute 5, "rv64i2p1_sdext1p0"
 ; RV64SDTRIG: .attribute 5, "rv64i2p1_sdtrig1p0"
 ; RV64P: .attribute 5, "rv64i2p1_p0p15"
+; RV64ZIBI: .attribute 5, "rv64i2p1_zibi0p1"
 
 ; RVI20U32: .attribute 5, "rv32i2p1"
 ; RVI20U64: .attribute 5, "rv64i2p1"
diff --git a/llvm/test/CodeGen/RISCV/condops.ll b/llvm/test/CodeGen/RISCV/condops.ll
index 4fb3dff..9d95f1f 100644
--- a/llvm/test/CodeGen/RISCV/condops.ll
+++ b/llvm/test/CodeGen/RISCV/condops.ll
@@ -3,7 +3,7 @@
 ; RUN: llc -mtriple=riscv64 -target-abi=lp64f -mattr=+f,+zbs < %s | FileCheck %s -check-prefix=RV64I
 ; RUN: llc -mtriple=riscv32 -target-abi=ilp32f -mattr=+f,+zbs,+xventanacondops < %s | FileCheck %s -check-prefix=RV32XVENTANACONDOPS
 ; RUN: llc -mtriple=riscv64 -target-abi=lp64f -mattr=+f,+zbs,+xventanacondops < %s | FileCheck %s -check-prefix=RV64XVENTANACONDOPS
-; RUN: llc -mtriple=riscv64 -target-abi=lp64f -mattr=+f,+zbs,+xtheadcondmov < %s | FileCheck %s -check-prefix=RV64XTHEADCONDMOV
+; RUN: llc -mtriple=riscv64 -target-abi=lp64f -mattr=+f,+xtheadbs,+xtheadcondmov < %s | FileCheck %s -check-prefix=RV64XTHEADCONDMOV
 ; RUN: llc -mtriple=riscv32 -target-abi=ilp32f -mattr=+f,+zbs,+zicond < %s | FileCheck %s -check-prefix=RV32ZICOND
 ; RUN: llc -mtriple=riscv64 -target-abi=lp64f -mattr=+f,+zbs,+zicond < %s | FileCheck %s -check-prefix=RV64ZICOND
 
@@ -126,7 +126,7 @@ define i64 @zero_singlebit1(i64 %rs1, i64 %rs2) {
 ;
 ; RV64XTHEADCONDMOV-LABEL: zero_singlebit1:
 ; RV64XTHEADCONDMOV:       # %bb.0:
-; RV64XTHEADCONDMOV-NEXT:    bexti a1, a1, 12
+; RV64XTHEADCONDMOV-NEXT:    th.tst a1, a1, 12
 ; RV64XTHEADCONDMOV-NEXT:    th.mvnez a0, zero, a1
 ; RV64XTHEADCONDMOV-NEXT:    ret
 ;
@@ -179,9 +179,8 @@ define i64 @zero_singlebit2(i64 %rs1, i64 %rs2) {
 ;
 ; RV64XTHEADCONDMOV-LABEL: zero_singlebit2:
 ; RV64XTHEADCONDMOV:       # %bb.0:
-; RV64XTHEADCONDMOV-NEXT:    slli a1, a1, 51
-; RV64XTHEADCONDMOV-NEXT:    srai a1, a1, 63
-; RV64XTHEADCONDMOV-NEXT:    and a0, a1, a0
+; RV64XTHEADCONDMOV-NEXT:    th.tst a1, a1, 12
+; RV64XTHEADCONDMOV-NEXT:    th.mveqz a0, zero, a1
 ; RV64XTHEADCONDMOV-NEXT:    ret
 ;
 ; RV32ZICOND-LABEL: zero_singlebit2:
@@ -4297,9 +4296,8 @@ define i64 @single_bit(i64 %x) {
 ;
 ; RV64XTHEADCONDMOV-LABEL: single_bit:
 ; RV64XTHEADCONDMOV:       # %bb.0: # %entry
-; RV64XTHEADCONDMOV-NEXT:    slli a1, a0, 53
-; RV64XTHEADCONDMOV-NEXT:    srai a1, a1, 63
-; RV64XTHEADCONDMOV-NEXT:    and a0, a1, a0
+; RV64XTHEADCONDMOV-NEXT:    andi a1, a0, 1024
+; RV64XTHEADCONDMOV-NEXT:    th.mveqz a0, zero, a1
 ; RV64XTHEADCONDMOV-NEXT:    ret
 ;
 ; RV32ZICOND-LABEL: single_bit:
@@ -4353,9 +4351,8 @@ define i64 @single_bit2(i64 %x) {
 ;
 ; RV64XTHEADCONDMOV-LABEL: single_bit2:
 ; RV64XTHEADCONDMOV:       # %bb.0: # %entry
-; RV64XTHEADCONDMOV-NEXT:    slli a1, a0, 52
-; RV64XTHEADCONDMOV-NEXT:    srai a1, a1, 63
-; RV64XTHEADCONDMOV-NEXT:    and a0, a1, a0
+; RV64XTHEADCONDMOV-NEXT:    th.tst a1, a0, 11
+; RV64XTHEADCONDMOV-NEXT:    th.mveqz a0, zero, a1
 ; RV64XTHEADCONDMOV-NEXT:    ret
 ;
 ; RV32ZICOND-LABEL: single_bit2:
diff --git a/llvm/test/CodeGen/RISCV/features-info.ll b/llvm/test/CodeGen/RISCV/features-info.ll
index 01b8c0e..a3b56c6 100644
--- a/llvm/test/CodeGen/RISCV/features-info.ll
+++ b/llvm/test/CodeGen/RISCV/features-info.ll
@@ -52,6 +52,7 @@
 ; CHECK-NEXT:   experimental-xsfmclic            - 'XSfmclic' (SiFive CLIC Machine-mode CSRs).
 ; CHECK-NEXT:   experimental-xsfsclic            - 'XSfsclic' (SiFive CLIC Supervisor-mode CSRs).
 ; CHECK-NEXT:   experimental-zalasr              - 'Zalasr' (Load-Acquire and Store-Release Instructions).
+; CHECK-NEXT:   experimental-zibi                - 'Zibi' (Branch with Immediate).
 ; CHECK-NEXT:   experimental-zicfilp             - 'Zicfilp' (Landing pad).
 ; CHECK-NEXT:   experimental-zicfiss             - 'Zicfiss' (Shadow stack).
 ; CHECK-NEXT:   experimental-zvbc32e             - 'Zvbc32e' (Vector Carryless Multiplication with 32-bits elements).
diff --git a/llvm/test/CodeGen/RISCV/machine-combiner.ll b/llvm/test/CodeGen/RISCV/machine-combiner.ll
index 7a1c41c..69eca6d 100644
--- a/llvm/test/CodeGen/RISCV/machine-combiner.ll
+++ b/llvm/test/CodeGen/RISCV/machine-combiner.ll
@@ -1094,33 +1094,19 @@ declare float @llvm.maxnum.f32(float, float)
 declare double @llvm.maxnum.f64(double, double)
 
 define double @test_fmadd_strategy(double %a0, double %a1, double %a2, double %a3, i64 %flag) {
-; CHECK_LOCAL-LABEL: test_fmadd_strategy:
-; CHECK_LOCAL:       # %bb.0: # %entry
-; CHECK_LOCAL-NEXT:    fsub.d fa4, fa0, fa1
-; CHECK_LOCAL-NEXT:    andi a0, a0, 1
-; CHECK_LOCAL-NEXT:    fmv.d fa5, fa0
-; CHECK_LOCAL-NEXT:    fmul.d fa0, fa4, fa2
-; CHECK_LOCAL-NEXT:    beqz a0, .LBB76_2
-; CHECK_LOCAL-NEXT:  # %bb.1: # %entry
-; CHECK_LOCAL-NEXT:    fmul.d fa4, fa5, fa1
-; CHECK_LOCAL-NEXT:    fmadd.d fa5, fa5, fa1, fa0
-; CHECK_LOCAL-NEXT:    fsub.d fa0, fa5, fa4
-; CHECK_LOCAL-NEXT:  .LBB76_2: # %entry
-; CHECK_LOCAL-NEXT:    ret
-;
-; CHECK_GLOBAL-LABEL: test_fmadd_strategy:
-; CHECK_GLOBAL:       # %bb.0: # %entry
-; CHECK_GLOBAL-NEXT:    fsub.d fa4, fa0, fa1
-; CHECK_GLOBAL-NEXT:    andi a0, a0, 1
-; CHECK_GLOBAL-NEXT:    fmv.d fa5, fa0
-; CHECK_GLOBAL-NEXT:    fmul.d fa0, fa4, fa2
-; CHECK_GLOBAL-NEXT:    beqz a0, .LBB76_2
-; CHECK_GLOBAL-NEXT:  # %bb.1: # %entry
-; CHECK_GLOBAL-NEXT:    fmul.d fa5, fa5, fa1
-; CHECK_GLOBAL-NEXT:    fadd.d fa4, fa5, fa0
-; CHECK_GLOBAL-NEXT:    fsub.d fa0, fa4, fa5
-; CHECK_GLOBAL-NEXT:  .LBB76_2: # %entry
-; CHECK_GLOBAL-NEXT:    ret
+; CHECK-LABEL: test_fmadd_strategy:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    fsub.d fa5, fa0, fa1
+; CHECK-NEXT:    andi a0, a0, 1
+; CHECK-NEXT:    beqz a0, .LBB76_2
+; CHECK-NEXT:  # %bb.1: # %entry
+; CHECK-NEXT:    fmul.d fa4, fa0, fa1
+; CHECK-NEXT:    fmadd.d fa5, fa5, fa2, fa4
+; CHECK-NEXT:    fsub.d fa0, fa5, fa4
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB76_2:
+; CHECK-NEXT:    fmul.d fa0, fa5, fa2
+; CHECK-NEXT:    ret
 entry:
   %sub = fsub contract double %a0, %a1
   %mul = fmul contract double %sub, %a2
@@ -1132,3 +1118,6 @@ entry:
   %retval.0 = select i1 %tobool.not, double %mul, double %sub3
   ret double %retval.0
 }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK_GLOBAL: {{.*}}
+; CHECK_LOCAL: {{.*}}
diff --git a/llvm/test/CodeGen/RISCV/neg-abs.ll b/llvm/test/CodeGen/RISCV/neg-abs.ll
index da81fe5..f9ccf76 100644
--- a/llvm/test/CodeGen/RISCV/neg-abs.ll
+++ b/llvm/test/CodeGen/RISCV/neg-abs.ll
@@ -208,14 +208,14 @@ define i64 @neg_abs64_multiuse(i64 %x, ptr %y) {
 ; RV32I-NEXT:    sub a1, a1, a3
 ; RV32I-NEXT:    neg a0, a0
 ; RV32I-NEXT:  .LBB5_2:
-; RV32I-NEXT:    snez a3, a0
-; RV32I-NEXT:    neg a4, a1
-; RV32I-NEXT:    sub a3, a4, a3
-; RV32I-NEXT:    neg a4, a0
+; RV32I-NEXT:    snez a4, a0
+; RV32I-NEXT:    neg a3, a0
+; RV32I-NEXT:    add a4, a1, a4
+; RV32I-NEXT:    neg a4, a4
 ; RV32I-NEXT:    sw a0, 0(a2)
 ; RV32I-NEXT:    sw a1, 4(a2)
-; RV32I-NEXT:    mv a0, a4
-; RV32I-NEXT:    mv a1, a3
+; RV32I-NEXT:    mv a0, a3
+; RV32I-NEXT:    mv a1, a4
 ; RV32I-NEXT:    ret
 ;
 ; RV32ZBB-LABEL: neg_abs64_multiuse:
@@ -227,14 +227,14 @@ define i64 @neg_abs64_multiuse(i64 %x, ptr %y) {
 ; RV32ZBB-NEXT:    sub a1, a1, a3
 ; RV32ZBB-NEXT:    neg a0, a0
 ; RV32ZBB-NEXT:  .LBB5_2:
-; RV32ZBB-NEXT:    snez a3, a0
-; RV32ZBB-NEXT:    neg a4, a1
-; RV32ZBB-NEXT:    sub a3, a4, a3
-; RV32ZBB-NEXT:    neg a4, a0
+; RV32ZBB-NEXT:    snez a4, a0
+; RV32ZBB-NEXT:    neg a3, a0
+; RV32ZBB-NEXT:    add a4, a1, a4
+; RV32ZBB-NEXT:    neg a4, a4
 ; RV32ZBB-NEXT:    sw a0, 0(a2)
 ; RV32ZBB-NEXT:    sw a1, 4(a2)
-; RV32ZBB-NEXT:    mv a0, a4
-; RV32ZBB-NEXT:    mv a1, a3
+; RV32ZBB-NEXT:    mv a0, a3
+; RV32ZBB-NEXT:    mv a1, a4
 ; RV32ZBB-NEXT:    ret
 ;
 ; RV64I-LABEL: neg_abs64_multiuse:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll
index 83b435d..056f552 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll
@@ -934,7 +934,7 @@ define void @strided_load_startval_add_with_splat(ptr noalias nocapture %arg, pt
 ; CHECK-NEXT:    add a1, a1, a5
 ; CHECK-NEXT:    slli a3, a3, 32
 ; CHECK-NEXT:    srli a3, a3, 32
-; CHECK-NEXT:    add a0, a4, a0
+; CHECK-NEXT:    add a0, a0, a4
 ; CHECK-NEXT:    add a0, a0, a3
 ; CHECK-NEXT:    addi a0, a0, 1
 ; CHECK-NEXT:  .LBB14_6: # %bb35
diff --git a/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll b/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll
index dddcd4f..ead79fc 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll
@@ -18,13 +18,10 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
 ; RV32-NEXT:  # %bb.1: # %for.cond1.preheader.lr.ph
 ; RV32-NEXT:    blez a6, .LBB0_17
 ; RV32-NEXT:  # %bb.2: # %for.cond1.preheader.us.preheader
-; RV32-NEXT:    addi t0, a7, -1
+; RV32-NEXT:    addi t3, a7, -1
 ; RV32-NEXT:    csrr t2, vlenb
-; RV32-NEXT:    mul t3, a1, t0
-; RV32-NEXT:    mul t4, a3, t0
-; RV32-NEXT:    mul t5, a5, t0
 ; RV32-NEXT:    slli t1, t2, 1
-; RV32-NEXT:    li t6, 32
+; RV32-NEXT:    li t4, 32
 ; RV32-NEXT:    mv t0, t1
 ; RV32-NEXT:  # %bb.3: # %for.cond1.preheader.us.preheader
 ; RV32-NEXT:    li t0, 32
@@ -34,27 +31,32 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
 ; RV32-NEXT:    sw s0, 12(sp) # 4-byte Folded Spill
 ; RV32-NEXT:    sw s1, 8(sp) # 4-byte Folded Spill
 ; RV32-NEXT:    sw s2, 4(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s3, 0(sp) # 4-byte Folded Spill
 ; RV32-NEXT:    .cfi_offset s0, -4
 ; RV32-NEXT:    .cfi_offset s1, -8
 ; RV32-NEXT:    .cfi_offset s2, -12
+; RV32-NEXT:    .cfi_offset s3, -16
 ; RV32-NEXT:    .cfi_remember_state
-; RV32-NEXT:    add t3, a0, t3
-; RV32-NEXT:    add t4, a2, t4
-; RV32-NEXT:    add s0, a4, t5
-; RV32-NEXT:    bltu t6, t1, .LBB0_6
+; RV32-NEXT:    mul t5, a1, t3
+; RV32-NEXT:    add s0, a0, a6
+; RV32-NEXT:    mul t6, a3, t3
+; RV32-NEXT:    add s2, a2, a6
+; RV32-NEXT:    mul s1, a5, t3
+; RV32-NEXT:    add s3, a4, a6
+; RV32-NEXT:    bltu t4, t1, .LBB0_6
 ; RV32-NEXT:  # %bb.5: # %for.cond1.preheader.us.preheader
 ; RV32-NEXT:    li t1, 32
 ; RV32-NEXT:  .LBB0_6: # %for.cond1.preheader.us.preheader
-; RV32-NEXT:    add t3, t3, a6
-; RV32-NEXT:    add t5, t4, a6
-; RV32-NEXT:    add t4, s0, a6
+; RV32-NEXT:    add t3, s0, t5
+; RV32-NEXT:    add t6, s2, t6
+; RV32-NEXT:    add t4, s3, s1
 ; RV32-NEXT:    j .LBB0_8
 ; RV32-NEXT:  # %bb.7: # %for.cond1.preheader.us.preheader
 ; RV32-NEXT:    mv t1, t0
 ; RV32-NEXT:  .LBB0_8: # %for.cond1.preheader.us.preheader
 ; RV32-NEXT:    .cfi_restore_state
 ; RV32-NEXT:    li t0, 0
-; RV32-NEXT:    sltu t5, a0, t5
+; RV32-NEXT:    sltu t5, a0, t6
 ; RV32-NEXT:    sltu t6, a2, t3
 ; RV32-NEXT:    and t5, t5, t6
 ; RV32-NEXT:    sltu t4, a0, t4
@@ -140,9 +142,11 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
 ; RV32-NEXT:    lw s0, 12(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    lw s1, 8(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    lw s2, 4(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s3, 0(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    .cfi_restore s0
 ; RV32-NEXT:    .cfi_restore s1
 ; RV32-NEXT:    .cfi_restore s2
+; RV32-NEXT:    .cfi_restore s3
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    .cfi_def_cfa_offset 0
 ; RV32-NEXT:  .LBB0_17: # %for.cond.cleanup
@@ -190,7 +194,7 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
 ; RV64P670-NEXT:    or t6, s0, s1
 ; RV64P670-NEXT:    sltu s1, a0, t5
 ; RV64P670-NEXT:    sltu s0, a4, t4
-; RV64P670-NEXT:    mv t5, a0
+; RV64P670-NEXT:    add t4, a0, a6
 ; RV64P670-NEXT:    and s0, s0, s1
 ; RV64P670-NEXT:    or s1, a1, a5
 ; RV64P670-NEXT:    srli s1, s1, 63
@@ -200,11 +204,11 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
 ; RV64P670-NEXT:    or s0, t6, s0
 ; RV64P670-NEXT:    sltu s1, a6, s1
 ; RV64P670-NEXT:    or s0, s0, s1
-; RV64P670-NEXT:    andi t4, s0, 1
+; RV64P670-NEXT:    andi t5, s0, 1
 ; RV64P670-NEXT:    j .LBB0_4
 ; RV64P670-NEXT:  .LBB0_3: # %for.cond1.for.cond.cleanup3_crit_edge.us
 ; RV64P670-NEXT:    # in Loop: Header=BB0_4 Depth=1
-; RV64P670-NEXT:    add t5, t5, a1
+; RV64P670-NEXT:    add a0, a0, a1
 ; RV64P670-NEXT:    add a2, a2, a3
 ; RV64P670-NEXT:    add a4, a4, a5
 ; RV64P670-NEXT:    addiw t1, t1, 1
@@ -214,7 +218,7 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
 ; RV64P670-NEXT:    # =>This Loop Header: Depth=1
 ; RV64P670-NEXT:    # Child Loop BB0_7 Depth 2
 ; RV64P670-NEXT:    # Child Loop BB0_10 Depth 2
-; RV64P670-NEXT:    beqz t4, .LBB0_6
+; RV64P670-NEXT:    beqz t5, .LBB0_6
 ; RV64P670-NEXT:  # %bb.5: # in Loop: Header=BB0_4 Depth=1
 ; RV64P670-NEXT:    li t6, 0
 ; RV64P670-NEXT:    j .LBB0_9
@@ -223,7 +227,7 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
 ; RV64P670-NEXT:    slli s1, t2, 28
 ; RV64P670-NEXT:    mv s2, a2
 ; RV64P670-NEXT:    mv s3, a4
-; RV64P670-NEXT:    mv s4, t5
+; RV64P670-NEXT:    mv s4, a0
 ; RV64P670-NEXT:    sub s1, s1, t3
 ; RV64P670-NEXT:    vsetvli s0, zero, e8, m2, ta, ma
 ; RV64P670-NEXT:    and t6, s1, a6
@@ -246,11 +250,10 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
 ; RV64P670-NEXT:  .LBB0_9: # %for.body4.us.preheader
 ; RV64P670-NEXT:    # in Loop: Header=BB0_4 Depth=1
 ; RV64P670-NEXT:    mul s2, a1, t0
-; RV64P670-NEXT:    add s0, a0, a6
-; RV64P670-NEXT:    add s1, t5, t6
+; RV64P670-NEXT:    add s1, a0, t6
 ; RV64P670-NEXT:    add s4, a4, t6
 ; RV64P670-NEXT:    add t6, t6, a2
-; RV64P670-NEXT:    add s2, s2, s0
+; RV64P670-NEXT:    add s2, s2, t4
 ; RV64P670-NEXT:  .LBB0_10: # %for.body4.us
 ; RV64P670-NEXT:    # Parent Loop BB0_4 Depth=1
 ; RV64P670-NEXT:    # => This Inner Loop Header: Depth=2
@@ -332,12 +335,12 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
 ; RV64X60-NEXT:    or s0, t4, s0
 ; RV64X60-NEXT:    sltu s1, a6, s1
 ; RV64X60-NEXT:    or s0, s0, s1
-; RV64X60-NEXT:    andi t4, s0, 1
-; RV64X60-NEXT:    mv t5, a0
+; RV64X60-NEXT:    add t4, a0, a6
+; RV64X60-NEXT:    andi t5, s0, 1
 ; RV64X60-NEXT:    j .LBB0_4
 ; RV64X60-NEXT:  .LBB0_3: # %for.cond1.for.cond.cleanup3_crit_edge.us
 ; RV64X60-NEXT:    # in Loop: Header=BB0_4 Depth=1
-; RV64X60-NEXT:    add t5, t5, a1
+; RV64X60-NEXT:    add a0, a0, a1
 ; RV64X60-NEXT:    add a2, a2, a3
 ; RV64X60-NEXT:    addiw t1, t1, 1
 ; RV64X60-NEXT:    add a4, a4, a5
@@ -347,7 +350,7 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
 ; RV64X60-NEXT:    # =>This Loop Header: Depth=1
 ; RV64X60-NEXT:    # Child Loop BB0_7 Depth 2
 ; RV64X60-NEXT:    # Child Loop BB0_10 Depth 2
-; RV64X60-NEXT:    beqz t4, .LBB0_6
+; RV64X60-NEXT:    beqz t5, .LBB0_6
 ; RV64X60-NEXT:  # %bb.5: # in Loop: Header=BB0_4 Depth=1
 ; RV64X60-NEXT:    li t6, 0
 ; RV64X60-NEXT:    j .LBB0_9
@@ -358,7 +361,7 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
 ; RV64X60-NEXT:    and t6, s1, a6
 ; RV64X60-NEXT:    mv s2, a2
 ; RV64X60-NEXT:    mv s3, a4
-; RV64X60-NEXT:    mv s4, t5
+; RV64X60-NEXT:    mv s4, a0
 ; RV64X60-NEXT:    mv s1, t6
 ; RV64X60-NEXT:    vsetvli s0, zero, e8, m2, ta, ma
 ; RV64X60-NEXT:  .LBB0_7: # %vector.body
@@ -379,9 +382,8 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
 ; RV64X60-NEXT:  .LBB0_9: # %for.body4.us.preheader
 ; RV64X60-NEXT:    # in Loop: Header=BB0_4 Depth=1
 ; RV64X60-NEXT:    mul s2, a1, t0
-; RV64X60-NEXT:    add s1, a0, a6
-; RV64X60-NEXT:    add s0, t5, t6
-; RV64X60-NEXT:    add s2, s2, s1
+; RV64X60-NEXT:    add s0, a0, t6
+; RV64X60-NEXT:    add s2, s2, t4
 ; RV64X60-NEXT:    add s4, a4, t6
 ; RV64X60-NEXT:    add t6, t6, a2
 ; RV64X60-NEXT:  .LBB0_10: # %for.body4.us
@@ -466,16 +468,16 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
 ; RV64-NEXT:    or s0, a1, a5
 ; RV64-NEXT:    srli s0, s0, 63
 ; RV64-NEXT:    or t5, t5, s0
+; RV64-NEXT:    sltu s0, a6, t4
 ; RV64-NEXT:    or t5, t6, t5
-; RV64-NEXT:    sltu t4, a6, t4
-; RV64-NEXT:    or t4, t4, t5
-; RV64-NEXT:    andi t4, t4, 1
-; RV64-NEXT:    mv t5, a0
+; RV64-NEXT:    add t4, a0, a6
+; RV64-NEXT:    or t5, s0, t5
+; RV64-NEXT:    andi t5, t5, 1
 ; RV64-NEXT:    csrwi vxrm, 0
 ; RV64-NEXT:    j .LBB0_6
 ; RV64-NEXT:  .LBB0_5: # %for.cond1.for.cond.cleanup3_crit_edge.us
 ; RV64-NEXT:    # in Loop: Header=BB0_6 Depth=1
-; RV64-NEXT:    add t5, t5, a1
+; RV64-NEXT:    add a0, a0, a1
 ; RV64-NEXT:    add a2, a2, a3
 ; RV64-NEXT:    add a4, a4, a5
 ; RV64-NEXT:    addiw t3, t3, 1
@@ -485,7 +487,7 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
 ; RV64-NEXT:    # =>This Loop Header: Depth=1
 ; RV64-NEXT:    # Child Loop BB0_9 Depth 2
 ; RV64-NEXT:    # Child Loop BB0_12 Depth 2
-; RV64-NEXT:    beqz t4, .LBB0_8
+; RV64-NEXT:    beqz t5, .LBB0_8
 ; RV64-NEXT:  # %bb.7: # in Loop: Header=BB0_6 Depth=1
 ; RV64-NEXT:    li t6, 0
 ; RV64-NEXT:    j .LBB0_11
@@ -496,7 +498,7 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
 ; RV64-NEXT:    and t6, t6, a6
 ; RV64-NEXT:    mv s0, a2
 ; RV64-NEXT:    mv s1, a4
-; RV64-NEXT:    mv s2, t5
+; RV64-NEXT:    mv s2, a0
 ; RV64-NEXT:    mv s3, t6
 ; RV64-NEXT:    vsetvli s4, zero, e8, m2, ta, ma
 ; RV64-NEXT:  .LBB0_9: # %vector.body
@@ -516,25 +518,24 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
 ; RV64-NEXT:    beq t6, a6, .LBB0_5
 ; RV64-NEXT:  .LBB0_11: # %for.body4.us.preheader
 ; RV64-NEXT:    # in Loop: Header=BB0_6 Depth=1
-; RV64-NEXT:    mul s1, a1, t2
-; RV64-NEXT:    add s2, a0, a6
-; RV64-NEXT:    add s0, t5, t6
-; RV64-NEXT:    add s1, s2, s1
-; RV64-NEXT:    add s2, a4, t6
+; RV64-NEXT:    mul s2, a1, t2
+; RV64-NEXT:    add s0, a0, t6
+; RV64-NEXT:    add s1, a4, t6
+; RV64-NEXT:    add s2, t4, s2
 ; RV64-NEXT:    add t6, a2, t6
 ; RV64-NEXT:  .LBB0_12: # %for.body4.us
 ; RV64-NEXT:    # Parent Loop BB0_6 Depth=1
 ; RV64-NEXT:    # => This Inner Loop Header: Depth=2
 ; RV64-NEXT:    lbu s3, 0(t6)
-; RV64-NEXT:    lbu s4, 0(s2)
+; RV64-NEXT:    lbu s4, 0(s1)
 ; RV64-NEXT:    add s3, s3, s4
 ; RV64-NEXT:    addi s3, s3, 1
 ; RV64-NEXT:    srli s3, s3, 1
 ; RV64-NEXT:    sb s3, 0(s0)
 ; RV64-NEXT:    addi s0, s0, 1
-; RV64-NEXT:    addi s2, s2, 1
+; RV64-NEXT:    addi s1, s1, 1
 ; RV64-NEXT:    addi t6, t6, 1
-; RV64-NEXT:    bne s0, s1, .LBB0_12
+; RV64-NEXT:    bne s0, s2, .LBB0_12
 ; RV64-NEXT:    j .LBB0_5
 ; RV64-NEXT:  .LBB0_13:
 ; RV64-NEXT:    ld s0, 40(sp) # 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/select-zbb.ll b/llvm/test/CodeGen/RISCV/select-zbb.ll
index 0af699a..efc3f46 100644
--- a/llvm/test/CodeGen/RISCV/select-zbb.ll
+++ b/llvm/test/CodeGen/RISCV/select-zbb.ll
@@ -12,96 +12,80 @@
 define i32 @select_umin_1(i1 zeroext %cond, i32 %a, i32 %b) {
 ; RV32IM-LABEL: select_umin_1:
 ; RV32IM:       # %bb.0: # %entry
-; RV32IM-NEXT:    bgeu a1, a2, .LBB0_3
+; RV32IM-NEXT:    addi a0, a0, -1
+; RV32IM-NEXT:    or a1, a0, a1
+; RV32IM-NEXT:    mv a0, a2
+; RV32IM-NEXT:    bltu a2, a1, .LBB0_2
 ; RV32IM-NEXT:  # %bb.1: # %entry
-; RV32IM-NEXT:    beqz a0, .LBB0_4
-; RV32IM-NEXT:  .LBB0_2: # %entry
 ; RV32IM-NEXT:    mv a0, a1
-; RV32IM-NEXT:    ret
-; RV32IM-NEXT:  .LBB0_3: # %entry
-; RV32IM-NEXT:    mv a1, a2
-; RV32IM-NEXT:    bnez a0, .LBB0_2
-; RV32IM-NEXT:  .LBB0_4: # %entry
-; RV32IM-NEXT:    mv a0, a2
+; RV32IM-NEXT:  .LBB0_2: # %entry
 ; RV32IM-NEXT:    ret
 ;
 ; RV64IM-LABEL: select_umin_1:
 ; RV64IM:       # %bb.0: # %entry
-; RV64IM-NEXT:    sext.w a3, a2
+; RV64IM-NEXT:    mv a3, a0
+; RV64IM-NEXT:    sext.w a0, a2
+; RV64IM-NEXT:    addi a3, a3, -1
+; RV64IM-NEXT:    or a1, a3, a1
 ; RV64IM-NEXT:    sext.w a1, a1
-; RV64IM-NEXT:    bgeu a1, a3, .LBB0_3
+; RV64IM-NEXT:    bltu a0, a1, .LBB0_2
 ; RV64IM-NEXT:  # %bb.1: # %entry
-; RV64IM-NEXT:    beqz a0, .LBB0_4
-; RV64IM-NEXT:  .LBB0_2: # %entry
 ; RV64IM-NEXT:    mv a0, a1
-; RV64IM-NEXT:    ret
-; RV64IM-NEXT:  .LBB0_3: # %entry
-; RV64IM-NEXT:    mv a1, a3
-; RV64IM-NEXT:    bnez a0, .LBB0_2
-; RV64IM-NEXT:  .LBB0_4: # %entry
-; RV64IM-NEXT:    mv a0, a2
+; RV64IM-NEXT:  .LBB0_2: # %entry
 ; RV64IM-NEXT:    ret
 ;
 ; RV32IMZBB-LABEL: select_umin_1:
 ; RV32IMZBB:       # %bb.0: # %entry
-; RV32IMZBB-NEXT:    beqz a0, .LBB0_2
-; RV32IMZBB-NEXT:  # %bb.1:
-; RV32IMZBB-NEXT:    minu a2, a1, a2
-; RV32IMZBB-NEXT:  .LBB0_2: # %entry
-; RV32IMZBB-NEXT:    mv a0, a2
+; RV32IMZBB-NEXT:    addi a0, a0, -1
+; RV32IMZBB-NEXT:    or a0, a0, a1
+; RV32IMZBB-NEXT:    minu a0, a2, a0
 ; RV32IMZBB-NEXT:    ret
 ;
 ; RV64IMZBB-LABEL: select_umin_1:
 ; RV64IMZBB:       # %bb.0: # %entry
-; RV64IMZBB-NEXT:    beqz a0, .LBB0_2
-; RV64IMZBB-NEXT:  # %bb.1:
 ; RV64IMZBB-NEXT:    sext.w a2, a2
-; RV64IMZBB-NEXT:    sext.w a1, a1
-; RV64IMZBB-NEXT:    minu a2, a1, a2
-; RV64IMZBB-NEXT:  .LBB0_2: # %entry
-; RV64IMZBB-NEXT:    mv a0, a2
+; RV64IMZBB-NEXT:    addi a0, a0, -1
+; RV64IMZBB-NEXT:    or a0, a0, a1
+; RV64IMZBB-NEXT:    sext.w a0, a0
+; RV64IMZBB-NEXT:    minu a0, a2, a0
 ; RV64IMZBB-NEXT:    ret
 ;
 ; RV32IMZICOND-LABEL: select_umin_1:
 ; RV32IMZICOND:       # %bb.0: # %entry
-; RV32IMZICOND-NEXT:    sltu a3, a1, a2
-; RV32IMZICOND-NEXT:    czero.nez a4, a2, a3
-; RV32IMZICOND-NEXT:    czero.eqz a1, a1, a3
-; RV32IMZICOND-NEXT:    or a1, a1, a4
-; RV32IMZICOND-NEXT:    czero.eqz a1, a1, a0
-; RV32IMZICOND-NEXT:    czero.nez a0, a2, a0
+; RV32IMZICOND-NEXT:    addi a0, a0, -1
+; RV32IMZICOND-NEXT:    or a0, a0, a1
+; RV32IMZICOND-NEXT:    sltu a1, a2, a0
+; RV32IMZICOND-NEXT:    czero.nez a0, a0, a1
+; RV32IMZICOND-NEXT:    czero.eqz a1, a2, a1
 ; RV32IMZICOND-NEXT:    or a0, a1, a0
 ; RV32IMZICOND-NEXT:    ret
 ;
 ; RV64IMZICOND-LABEL: select_umin_1:
 ; RV64IMZICOND:       # %bb.0: # %entry
-; RV64IMZICOND-NEXT:    sext.w a3, a2
-; RV64IMZICOND-NEXT:    sext.w a1, a1
-; RV64IMZICOND-NEXT:    sltu a4, a1, a3
-; RV64IMZICOND-NEXT:    czero.nez a3, a3, a4
-; RV64IMZICOND-NEXT:    czero.eqz a1, a1, a4
-; RV64IMZICOND-NEXT:    or a1, a1, a3
-; RV64IMZICOND-NEXT:    czero.eqz a1, a1, a0
-; RV64IMZICOND-NEXT:    czero.nez a0, a2, a0
+; RV64IMZICOND-NEXT:    sext.w a2, a2
+; RV64IMZICOND-NEXT:    addi a0, a0, -1
+; RV64IMZICOND-NEXT:    or a0, a0, a1
+; RV64IMZICOND-NEXT:    sext.w a0, a0
+; RV64IMZICOND-NEXT:    sltu a1, a2, a0
+; RV64IMZICOND-NEXT:    czero.nez a0, a0, a1
+; RV64IMZICOND-NEXT:    czero.eqz a1, a2, a1
 ; RV64IMZICOND-NEXT:    or a0, a1, a0
 ; RV64IMZICOND-NEXT:    ret
 ;
 ; RV32IMBOTH-LABEL: select_umin_1:
 ; RV32IMBOTH:       # %bb.0: # %entry
-; RV32IMBOTH-NEXT:    minu a1, a1, a2
-; RV32IMBOTH-NEXT:    czero.nez a2, a2, a0
-; RV32IMBOTH-NEXT:    czero.eqz a0, a1, a0
-; RV32IMBOTH-NEXT:    or a0, a0, a2
+; RV32IMBOTH-NEXT:    addi a0, a0, -1
+; RV32IMBOTH-NEXT:    or a0, a0, a1
+; RV32IMBOTH-NEXT:    minu a0, a2, a0
 ; RV32IMBOTH-NEXT:    ret
 ;
 ; RV64IMBOTH-LABEL: select_umin_1:
 ; RV64IMBOTH:       # %bb.0: # %entry
-; RV64IMBOTH-NEXT:    sext.w a3, a2
-; RV64IMBOTH-NEXT:    sext.w a1, a1
-; RV64IMBOTH-NEXT:    minu a1, a1, a3
-; RV64IMBOTH-NEXT:    czero.nez a2, a2, a0
-; RV64IMBOTH-NEXT:    czero.eqz a0, a1, a0
-; RV64IMBOTH-NEXT:    or a0, a0, a2
+; RV64IMBOTH-NEXT:    sext.w a2, a2
+; RV64IMBOTH-NEXT:    addi a0, a0, -1
+; RV64IMBOTH-NEXT:    or a0, a0, a1
+; RV64IMBOTH-NEXT:    sext.w a0, a0
+; RV64IMBOTH-NEXT:    minu a0, a2, a0
 ; RV64IMBOTH-NEXT:    ret
 entry:
   %c = call i32 @llvm.umin(i32 %a, i32 %b)
@@ -112,97 +96,80 @@ entry:
 define i32 @select_umin_2(i1 zeroext %cond, i32 %a, i32 %b) {
 ; RV32IM-LABEL: select_umin_2:
 ; RV32IM:       # %bb.0: # %entry
-; RV32IM-NEXT:    mv a3, a1
-; RV32IM-NEXT:    bgeu a1, a2, .LBB1_3
+; RV32IM-NEXT:    neg a0, a0
+; RV32IM-NEXT:    or a2, a0, a2
+; RV32IM-NEXT:    mv a0, a1
+; RV32IM-NEXT:    bltu a1, a2, .LBB1_2
 ; RV32IM-NEXT:  # %bb.1: # %entry
-; RV32IM-NEXT:    beqz a0, .LBB1_4
+; RV32IM-NEXT:    mv a0, a2
 ; RV32IM-NEXT:  .LBB1_2: # %entry
-; RV32IM-NEXT:    mv a0, a1
-; RV32IM-NEXT:    ret
-; RV32IM-NEXT:  .LBB1_3: # %entry
-; RV32IM-NEXT:    mv a3, a2
-; RV32IM-NEXT:    bnez a0, .LBB1_2
-; RV32IM-NEXT:  .LBB1_4: # %entry
-; RV32IM-NEXT:    mv a0, a3
 ; RV32IM-NEXT:    ret
 ;
 ; RV64IM-LABEL: select_umin_2:
 ; RV64IM:       # %bb.0: # %entry
-; RV64IM-NEXT:    sext.w a3, a2
-; RV64IM-NEXT:    sext.w a2, a1
-; RV64IM-NEXT:    bgeu a2, a3, .LBB1_3
+; RV64IM-NEXT:    mv a3, a0
+; RV64IM-NEXT:    sext.w a0, a1
+; RV64IM-NEXT:    neg a1, a3
+; RV64IM-NEXT:    or a1, a1, a2
+; RV64IM-NEXT:    sext.w a1, a1
+; RV64IM-NEXT:    bltu a0, a1, .LBB1_2
 ; RV64IM-NEXT:  # %bb.1: # %entry
-; RV64IM-NEXT:    beqz a0, .LBB1_4
-; RV64IM-NEXT:  .LBB1_2: # %entry
 ; RV64IM-NEXT:    mv a0, a1
-; RV64IM-NEXT:    ret
-; RV64IM-NEXT:  .LBB1_3: # %entry
-; RV64IM-NEXT:    mv a2, a3
-; RV64IM-NEXT:    bnez a0, .LBB1_2
-; RV64IM-NEXT:  .LBB1_4: # %entry
-; RV64IM-NEXT:    mv a0, a2
+; RV64IM-NEXT:  .LBB1_2: # %entry
 ; RV64IM-NEXT:    ret
 ;
 ; RV32IMZBB-LABEL: select_umin_2:
 ; RV32IMZBB:       # %bb.0: # %entry
-; RV32IMZBB-NEXT:    bnez a0, .LBB1_2
-; RV32IMZBB-NEXT:  # %bb.1: # %entry
-; RV32IMZBB-NEXT:    minu a1, a1, a2
-; RV32IMZBB-NEXT:  .LBB1_2: # %entry
-; RV32IMZBB-NEXT:    mv a0, a1
+; RV32IMZBB-NEXT:    neg a0, a0
+; RV32IMZBB-NEXT:    or a0, a0, a2
+; RV32IMZBB-NEXT:    minu a0, a1, a0
 ; RV32IMZBB-NEXT:    ret
 ;
 ; RV64IMZBB-LABEL: select_umin_2:
 ; RV64IMZBB:       # %bb.0: # %entry
-; RV64IMZBB-NEXT:    bnez a0, .LBB1_2
-; RV64IMZBB-NEXT:  # %bb.1: # %entry
-; RV64IMZBB-NEXT:    sext.w a2, a2
 ; RV64IMZBB-NEXT:    sext.w a1, a1
-; RV64IMZBB-NEXT:    minu a1, a1, a2
-; RV64IMZBB-NEXT:  .LBB1_2: # %entry
-; RV64IMZBB-NEXT:    mv a0, a1
+; RV64IMZBB-NEXT:    neg a0, a0
+; RV64IMZBB-NEXT:    or a0, a0, a2
+; RV64IMZBB-NEXT:    sext.w a0, a0
+; RV64IMZBB-NEXT:    minu a0, a1, a0
 ; RV64IMZBB-NEXT:    ret
 ;
 ; RV32IMZICOND-LABEL: select_umin_2:
 ; RV32IMZICOND:       # %bb.0: # %entry
-; RV32IMZICOND-NEXT:    sltu a3, a1, a2
-; RV32IMZICOND-NEXT:    czero.nez a2, a2, a3
-; RV32IMZICOND-NEXT:    czero.eqz a3, a1, a3
-; RV32IMZICOND-NEXT:    or a2, a3, a2
-; RV32IMZICOND-NEXT:    czero.nez a2, a2, a0
-; RV32IMZICOND-NEXT:    czero.eqz a0, a1, a0
+; RV32IMZICOND-NEXT:    neg a0, a0
 ; RV32IMZICOND-NEXT:    or a0, a0, a2
+; RV32IMZICOND-NEXT:    sltu a2, a1, a0
+; RV32IMZICOND-NEXT:    czero.nez a0, a0, a2
+; RV32IMZICOND-NEXT:    czero.eqz a1, a1, a2
+; RV32IMZICOND-NEXT:    or a0, a1, a0
 ; RV32IMZICOND-NEXT:    ret
 ;
 ; RV64IMZICOND-LABEL: select_umin_2:
 ; RV64IMZICOND:       # %bb.0: # %entry
-; RV64IMZICOND-NEXT:    sext.w a2, a2
-; RV64IMZICOND-NEXT:    sext.w a3, a1
-; RV64IMZICOND-NEXT:    sltu a4, a3, a2
-; RV64IMZICOND-NEXT:    czero.nez a2, a2, a4
-; RV64IMZICOND-NEXT:    czero.eqz a3, a3, a4
-; RV64IMZICOND-NEXT:    or a2, a3, a2
-; RV64IMZICOND-NEXT:    czero.nez a2, a2, a0
-; RV64IMZICOND-NEXT:    czero.eqz a0, a1, a0
+; RV64IMZICOND-NEXT:    sext.w a1, a1
+; RV64IMZICOND-NEXT:    neg a0, a0
 ; RV64IMZICOND-NEXT:    or a0, a0, a2
+; RV64IMZICOND-NEXT:    sext.w a0, a0
+; RV64IMZICOND-NEXT:    sltu a2, a1, a0
+; RV64IMZICOND-NEXT:    czero.nez a0, a0, a2
+; RV64IMZICOND-NEXT:    czero.eqz a1, a1, a2
+; RV64IMZICOND-NEXT:    or a0, a1, a0
 ; RV64IMZICOND-NEXT:    ret
 ;
 ; RV32IMBOTH-LABEL: select_umin_2:
 ; RV32IMBOTH:       # %bb.0: # %entry
-; RV32IMBOTH-NEXT:    minu a2, a1, a2
-; RV32IMBOTH-NEXT:    czero.eqz a1, a1, a0
-; RV32IMBOTH-NEXT:    czero.nez a0, a2, a0
-; RV32IMBOTH-NEXT:    or a0, a1, a0
+; RV32IMBOTH-NEXT:    neg a0, a0
+; RV32IMBOTH-NEXT:    or a0, a0, a2
+; RV32IMBOTH-NEXT:    minu a0, a1, a0
 ; RV32IMBOTH-NEXT:    ret
 ;
 ; RV64IMBOTH-LABEL: select_umin_2:
 ; RV64IMBOTH:       # %bb.0: # %entry
-; RV64IMBOTH-NEXT:    sext.w a2, a2
-; RV64IMBOTH-NEXT:    sext.w a3, a1
-; RV64IMBOTH-NEXT:    minu a2, a3, a2
-; RV64IMBOTH-NEXT:    czero.eqz a1, a1, a0
-; RV64IMBOTH-NEXT:    czero.nez a0, a2, a0
-; RV64IMBOTH-NEXT:    or a0, a1, a0
+; RV64IMBOTH-NEXT:    sext.w a1, a1
+; RV64IMBOTH-NEXT:    neg a0, a0
+; RV64IMBOTH-NEXT:    or a0, a0, a2
+; RV64IMBOTH-NEXT:    sext.w a0, a0
+; RV64IMBOTH-NEXT:    minu a0, a1, a0
 ; RV64IMBOTH-NEXT:    ret
 entry:
   %c = call i32 @llvm.umin(i32 %a, i32 %b)
@@ -213,99 +180,76 @@ entry:
 define i32 @select_umin_3(i1 zeroext %cond, i32 %a) {
 ; RV32IM-LABEL: select_umin_3:
 ; RV32IM:       # %bb.0: # %entry
-; RV32IM-NEXT:    li a3, 32
-; RV32IM-NEXT:    mv a2, a1
-; RV32IM-NEXT:    bgeu a1, a3, .LBB2_3
-; RV32IM-NEXT:  # %bb.1: # %entry
-; RV32IM-NEXT:    beqz a0, .LBB2_4
-; RV32IM-NEXT:  .LBB2_2: # %entry
+; RV32IM-NEXT:    neg a0, a0
+; RV32IM-NEXT:    ori a2, a0, 32
 ; RV32IM-NEXT:    mv a0, a1
-; RV32IM-NEXT:    ret
-; RV32IM-NEXT:  .LBB2_3: # %entry
-; RV32IM-NEXT:    li a2, 32
-; RV32IM-NEXT:    bnez a0, .LBB2_2
-; RV32IM-NEXT:  .LBB2_4: # %entry
+; RV32IM-NEXT:    bltu a1, a2, .LBB2_2
+; RV32IM-NEXT:  # %bb.1: # %entry
 ; RV32IM-NEXT:    mv a0, a2
+; RV32IM-NEXT:  .LBB2_2: # %entry
 ; RV32IM-NEXT:    ret
 ;
 ; RV64IM-LABEL: select_umin_3:
 ; RV64IM:       # %bb.0: # %entry
-; RV64IM-NEXT:    sext.w a2, a1
-; RV64IM-NEXT:    li a3, 32
-; RV64IM-NEXT:    bgeu a2, a3, .LBB2_3
+; RV64IM-NEXT:    mv a2, a0
+; RV64IM-NEXT:    sext.w a0, a1
+; RV64IM-NEXT:    neg a1, a2
+; RV64IM-NEXT:    ori a1, a1, 32
+; RV64IM-NEXT:    bltu a0, a1, .LBB2_2
 ; RV64IM-NEXT:  # %bb.1: # %entry
-; RV64IM-NEXT:    beqz a0, .LBB2_4
-; RV64IM-NEXT:  .LBB2_2: # %entry
 ; RV64IM-NEXT:    mv a0, a1
-; RV64IM-NEXT:    ret
-; RV64IM-NEXT:  .LBB2_3: # %entry
-; RV64IM-NEXT:    li a2, 32
-; RV64IM-NEXT:    bnez a0, .LBB2_2
-; RV64IM-NEXT:  .LBB2_4: # %entry
-; RV64IM-NEXT:    mv a0, a2
+; RV64IM-NEXT:  .LBB2_2: # %entry
 ; RV64IM-NEXT:    ret
 ;
 ; RV32IMZBB-LABEL: select_umin_3:
 ; RV32IMZBB:       # %bb.0: # %entry
-; RV32IMZBB-NEXT:    bnez a0, .LBB2_2
-; RV32IMZBB-NEXT:  # %bb.1: # %entry
-; RV32IMZBB-NEXT:    li a0, 32
-; RV32IMZBB-NEXT:    minu a1, a1, a0
-; RV32IMZBB-NEXT:  .LBB2_2: # %entry
-; RV32IMZBB-NEXT:    mv a0, a1
+; RV32IMZBB-NEXT:    neg a0, a0
+; RV32IMZBB-NEXT:    ori a0, a0, 32
+; RV32IMZBB-NEXT:    minu a0, a1, a0
 ; RV32IMZBB-NEXT:    ret
 ;
 ; RV64IMZBB-LABEL: select_umin_3:
 ; RV64IMZBB:       # %bb.0: # %entry
-; RV64IMZBB-NEXT:    bnez a0, .LBB2_2
-; RV64IMZBB-NEXT:  # %bb.1: # %entry
 ; RV64IMZBB-NEXT:    sext.w a1, a1
-; RV64IMZBB-NEXT:    li a0, 32
-; RV64IMZBB-NEXT:    minu a1, a1, a0
-; RV64IMZBB-NEXT:  .LBB2_2: # %entry
-; RV64IMZBB-NEXT:    mv a0, a1
+; RV64IMZBB-NEXT:    neg a0, a0
+; RV64IMZBB-NEXT:    ori a0, a0, 32
+; RV64IMZBB-NEXT:    minu a0, a1, a0
 ; RV64IMZBB-NEXT:    ret
 ;
 ; RV32IMZICOND-LABEL: select_umin_3:
 ; RV32IMZICOND:       # %bb.0: # %entry
-; RV32IMZICOND-NEXT:    sltiu a2, a1, 32
-; RV32IMZICOND-NEXT:    addi a3, a1, -32
-; RV32IMZICOND-NEXT:    czero.eqz a2, a3, a2
-; RV32IMZICOND-NEXT:    addi a2, a2, 32
-; RV32IMZICOND-NEXT:    czero.eqz a1, a1, a0
-; RV32IMZICOND-NEXT:    czero.nez a0, a2, a0
+; RV32IMZICOND-NEXT:    neg a0, a0
+; RV32IMZICOND-NEXT:    ori a0, a0, 32
+; RV32IMZICOND-NEXT:    sltu a2, a1, a0
+; RV32IMZICOND-NEXT:    czero.nez a0, a0, a2
+; RV32IMZICOND-NEXT:    czero.eqz a1, a1, a2
 ; RV32IMZICOND-NEXT:    or a0, a1, a0
 ; RV32IMZICOND-NEXT:    ret
 ;
 ; RV64IMZICOND-LABEL: select_umin_3:
 ; RV64IMZICOND:       # %bb.0: # %entry
-; RV64IMZICOND-NEXT:    sext.w a2, a1
-; RV64IMZICOND-NEXT:    sltiu a3, a2, 32
-; RV64IMZICOND-NEXT:    addi a2, a2, -32
-; RV64IMZICOND-NEXT:    czero.eqz a2, a2, a3
-; RV64IMZICOND-NEXT:    addi a2, a2, 32
-; RV64IMZICOND-NEXT:    czero.eqz a1, a1, a0
-; RV64IMZICOND-NEXT:    czero.nez a0, a2, a0
+; RV64IMZICOND-NEXT:    sext.w a1, a1
+; RV64IMZICOND-NEXT:    neg a0, a0
+; RV64IMZICOND-NEXT:    ori a0, a0, 32
+; RV64IMZICOND-NEXT:    sltu a2, a1, a0
+; RV64IMZICOND-NEXT:    czero.nez a0, a0, a2
+; RV64IMZICOND-NEXT:    czero.eqz a1, a1, a2
 ; RV64IMZICOND-NEXT:    or a0, a1, a0
 ; RV64IMZICOND-NEXT:    ret
 ;
 ; RV32IMBOTH-LABEL: select_umin_3:
 ; RV32IMBOTH:       # %bb.0: # %entry
-; RV32IMBOTH-NEXT:    li a2, 32
-; RV32IMBOTH-NEXT:    minu a2, a1, a2
-; RV32IMBOTH-NEXT:    czero.eqz a1, a1, a0
-; RV32IMBOTH-NEXT:    czero.nez a0, a2, a0
-; RV32IMBOTH-NEXT:    or a0, a1, a0
+; RV32IMBOTH-NEXT:    neg a0, a0
+; RV32IMBOTH-NEXT:    ori a0, a0, 32
+; RV32IMBOTH-NEXT:    minu a0, a1, a0
 ; RV32IMBOTH-NEXT:    ret
 ;
 ; RV64IMBOTH-LABEL: select_umin_3:
 ; RV64IMBOTH:       # %bb.0: # %entry
-; RV64IMBOTH-NEXT:    sext.w a2, a1
-; RV64IMBOTH-NEXT:    li a3, 32
-; RV64IMBOTH-NEXT:    minu a2, a2, a3
-; RV64IMBOTH-NEXT:    czero.eqz a1, a1, a0
-; RV64IMBOTH-NEXT:    czero.nez a0, a2, a0
-; RV64IMBOTH-NEXT:    or a0, a1, a0
+; RV64IMBOTH-NEXT:    sext.w a1, a1
+; RV64IMBOTH-NEXT:    neg a0, a0
+; RV64IMBOTH-NEXT:    ori a0, a0, 32
+; RV64IMBOTH-NEXT:    minu a0, a1, a0
 ; RV64IMBOTH-NEXT:    ret
 entry:
   %c = call i32 @llvm.umin(i32 %a, i32 32)
@@ -316,94 +260,80 @@ entry:
 define i32 @select_umin_4(i1 zeroext %cond, i32 %x) {
 ; RV32IM-LABEL: select_umin_4:
 ; RV32IM:       # %bb.0:
-; RV32IM-NEXT:    li a2, 128
-; RV32IM-NEXT:    bgeu a1, a2, .LBB3_3
+; RV32IM-NEXT:    neg a0, a0
+; RV32IM-NEXT:    or a0, a0, a1
+; RV32IM-NEXT:    li a1, 128
+; RV32IM-NEXT:    bltu a0, a1, .LBB3_2
 ; RV32IM-NEXT:  # %bb.1:
-; RV32IM-NEXT:    beqz a0, .LBB3_4
+; RV32IM-NEXT:    li a0, 128
 ; RV32IM-NEXT:  .LBB3_2:
-; RV32IM-NEXT:    mv a0, a2
-; RV32IM-NEXT:    ret
-; RV32IM-NEXT:  .LBB3_3:
-; RV32IM-NEXT:    li a1, 128
-; RV32IM-NEXT:    bnez a0, .LBB3_2
-; RV32IM-NEXT:  .LBB3_4:
-; RV32IM-NEXT:    mv a0, a1
 ; RV32IM-NEXT:    ret
 ;
 ; RV64IM-LABEL: select_umin_4:
 ; RV64IM:       # %bb.0:
-; RV64IM-NEXT:    sext.w a2, a1
+; RV64IM-NEXT:    neg a0, a0
+; RV64IM-NEXT:    or a0, a0, a1
+; RV64IM-NEXT:    sext.w a0, a0
 ; RV64IM-NEXT:    li a1, 128
-; RV64IM-NEXT:    bgeu a2, a1, .LBB3_3
+; RV64IM-NEXT:    bltu a0, a1, .LBB3_2
 ; RV64IM-NEXT:  # %bb.1:
-; RV64IM-NEXT:    beqz a0, .LBB3_4
+; RV64IM-NEXT:    li a0, 128
 ; RV64IM-NEXT:  .LBB3_2:
-; RV64IM-NEXT:    mv a0, a1
-; RV64IM-NEXT:    ret
-; RV64IM-NEXT:  .LBB3_3:
-; RV64IM-NEXT:    li a2, 128
-; RV64IM-NEXT:    bnez a0, .LBB3_2
-; RV64IM-NEXT:  .LBB3_4:
-; RV64IM-NEXT:    mv a0, a2
 ; RV64IM-NEXT:    ret
 ;
 ; RV32IMZBB-LABEL: select_umin_4:
 ; RV32IMZBB:       # %bb.0:
-; RV32IMZBB-NEXT:    mv a2, a0
-; RV32IMZBB-NEXT:    li a0, 128
-; RV32IMZBB-NEXT:    bnez a2, .LBB3_2
-; RV32IMZBB-NEXT:  # %bb.1:
-; RV32IMZBB-NEXT:    minu a0, a1, a0
-; RV32IMZBB-NEXT:  .LBB3_2:
+; RV32IMZBB-NEXT:    neg a0, a0
+; RV32IMZBB-NEXT:    or a0, a0, a1
+; RV32IMZBB-NEXT:    li a1, 128
+; RV32IMZBB-NEXT:    minu a0, a0, a1
 ; RV32IMZBB-NEXT:    ret
 ;
 ; RV64IMZBB-LABEL: select_umin_4:
 ; RV64IMZBB:       # %bb.0:
-; RV64IMZBB-NEXT:    mv a2, a0
-; RV64IMZBB-NEXT:    li a0, 128
-; RV64IMZBB-NEXT:    bnez a2, .LBB3_2
-; RV64IMZBB-NEXT:  # %bb.1:
-; RV64IMZBB-NEXT:    sext.w a1, a1
-; RV64IMZBB-NEXT:    minu a0, a1, a0
-; RV64IMZBB-NEXT:  .LBB3_2:
+; RV64IMZBB-NEXT:    neg a0, a0
+; RV64IMZBB-NEXT:    or a0, a0, a1
+; RV64IMZBB-NEXT:    sext.w a0, a0
+; RV64IMZBB-NEXT:    li a1, 128
+; RV64IMZBB-NEXT:    minu a0, a0, a1
 ; RV64IMZBB-NEXT:    ret
 ;
 ; RV32IMZICOND-LABEL: select_umin_4:
 ; RV32IMZICOND:       # %bb.0:
-; RV32IMZICOND-NEXT:    sltiu a2, a1, 128
-; RV32IMZICOND-NEXT:    addi a1, a1, -128
-; RV32IMZICOND-NEXT:    czero.eqz a1, a1, a2
-; RV32IMZICOND-NEXT:    czero.nez a0, a1, a0
+; RV32IMZICOND-NEXT:    neg a0, a0
+; RV32IMZICOND-NEXT:    or a0, a0, a1
+; RV32IMZICOND-NEXT:    sltiu a1, a0, 128
+; RV32IMZICOND-NEXT:    addi a0, a0, -128
+; RV32IMZICOND-NEXT:    czero.eqz a0, a0, a1
 ; RV32IMZICOND-NEXT:    addi a0, a0, 128
 ; RV32IMZICOND-NEXT:    ret
 ;
 ; RV64IMZICOND-LABEL: select_umin_4:
 ; RV64IMZICOND:       # %bb.0:
-; RV64IMZICOND-NEXT:    sext.w a1, a1
-; RV64IMZICOND-NEXT:    sltiu a2, a1, 128
-; RV64IMZICOND-NEXT:    addi a1, a1, -128
-; RV64IMZICOND-NEXT:    czero.eqz a1, a1, a2
-; RV64IMZICOND-NEXT:    czero.nez a0, a1, a0
+; RV64IMZICOND-NEXT:    neg a0, a0
+; RV64IMZICOND-NEXT:    or a0, a0, a1
+; RV64IMZICOND-NEXT:    sext.w a0, a0
+; RV64IMZICOND-NEXT:    sltiu a1, a0, 128
+; RV64IMZICOND-NEXT:    addi a0, a0, -128
+; RV64IMZICOND-NEXT:    czero.eqz a0, a0, a1
 ; RV64IMZICOND-NEXT:    addi a0, a0, 128
 ; RV64IMZICOND-NEXT:    ret
 ;
 ; RV32IMBOTH-LABEL: select_umin_4:
 ; RV32IMBOTH:       # %bb.0:
-; RV32IMBOTH-NEXT:    li a2, 128
-; RV32IMBOTH-NEXT:    minu a1, a1, a2
-; RV32IMBOTH-NEXT:    addi a1, a1, -128
-; RV32IMBOTH-NEXT:    czero.nez a0, a1, a0
-; RV32IMBOTH-NEXT:    addi a0, a0, 128
+; RV32IMBOTH-NEXT:    neg a0, a0
+; RV32IMBOTH-NEXT:    or a0, a0, a1
+; RV32IMBOTH-NEXT:    li a1, 128
+; RV32IMBOTH-NEXT:    minu a0, a0, a1
 ; RV32IMBOTH-NEXT:    ret
 ;
 ; RV64IMBOTH-LABEL: select_umin_4:
 ; RV64IMBOTH:       # %bb.0:
-; RV64IMBOTH-NEXT:    sext.w a1, a1
-; RV64IMBOTH-NEXT:    li a2, 128
-; RV64IMBOTH-NEXT:    minu a1, a1, a2
-; RV64IMBOTH-NEXT:    addi a1, a1, -128
-; RV64IMBOTH-NEXT:    czero.nez a0, a1, a0
-; RV64IMBOTH-NEXT:    addi a0, a0, 128
+; RV64IMBOTH-NEXT:    neg a0, a0
+; RV64IMBOTH-NEXT:    or a0, a0, a1
+; RV64IMBOTH-NEXT:    sext.w a0, a0
+; RV64IMBOTH-NEXT:    li a1, 128
+; RV64IMBOTH-NEXT:    minu a0, a0, a1
 ; RV64IMBOTH-NEXT:    ret
   %minmax = call i32 @llvm.umin(i32 %x, i32 128)
   %sel = select i1 %cond, i32 128, i32 %minmax
@@ -413,96 +343,76 @@ define i32 @select_umin_4(i1 zeroext %cond, i32 %x) {
 define i32 @select_umax_1(i1 zeroext %cond, i32 %a, i32 %b) {
 ; RV32IM-LABEL: select_umax_1:
 ; RV32IM:       # %bb.0: # %entry
-; RV32IM-NEXT:    bgeu a2, a1, .LBB4_3
+; RV32IM-NEXT:    neg a0, a0
+; RV32IM-NEXT:    and a1, a0, a1
+; RV32IM-NEXT:    mv a0, a2
+; RV32IM-NEXT:    bltu a1, a2, .LBB4_2
 ; RV32IM-NEXT:  # %bb.1: # %entry
-; RV32IM-NEXT:    beqz a0, .LBB4_4
-; RV32IM-NEXT:  .LBB4_2: # %entry
 ; RV32IM-NEXT:    mv a0, a1
-; RV32IM-NEXT:    ret
-; RV32IM-NEXT:  .LBB4_3: # %entry
-; RV32IM-NEXT:    mv a1, a2
-; RV32IM-NEXT:    bnez a0, .LBB4_2
-; RV32IM-NEXT:  .LBB4_4: # %entry
-; RV32IM-NEXT:    mv a0, a2
+; RV32IM-NEXT:  .LBB4_2: # %entry
 ; RV32IM-NEXT:    ret
 ;
 ; RV64IM-LABEL: select_umax_1:
 ; RV64IM:       # %bb.0: # %entry
+; RV64IM-NEXT:    mv a3, a0
+; RV64IM-NEXT:    sext.w a0, a2
+; RV64IM-NEXT:    neg a2, a3
+; RV64IM-NEXT:    and a1, a2, a1
 ; RV64IM-NEXT:    sext.w a1, a1
-; RV64IM-NEXT:    sext.w a3, a2
-; RV64IM-NEXT:    bgeu a3, a1, .LBB4_3
+; RV64IM-NEXT:    bltu a1, a0, .LBB4_2
 ; RV64IM-NEXT:  # %bb.1: # %entry
-; RV64IM-NEXT:    beqz a0, .LBB4_4
-; RV64IM-NEXT:  .LBB4_2: # %entry
 ; RV64IM-NEXT:    mv a0, a1
-; RV64IM-NEXT:    ret
-; RV64IM-NEXT:  .LBB4_3: # %entry
-; RV64IM-NEXT:    mv a1, a3
-; RV64IM-NEXT:    bnez a0, .LBB4_2
-; RV64IM-NEXT:  .LBB4_4: # %entry
-; RV64IM-NEXT:    mv a0, a2
+; RV64IM-NEXT:  .LBB4_2: # %entry
 ; RV64IM-NEXT:    ret
 ;
 ; RV32IMZBB-LABEL: select_umax_1:
 ; RV32IMZBB:       # %bb.0: # %entry
-; RV32IMZBB-NEXT:    beqz a0, .LBB4_2
-; RV32IMZBB-NEXT:  # %bb.1:
-; RV32IMZBB-NEXT:    maxu a2, a1, a2
-; RV32IMZBB-NEXT:  .LBB4_2: # %entry
-; RV32IMZBB-NEXT:    mv a0, a2
+; RV32IMZBB-NEXT:    neg a0, a0
+; RV32IMZBB-NEXT:    and a0, a0, a1
+; RV32IMZBB-NEXT:    maxu a0, a2, a0
 ; RV32IMZBB-NEXT:    ret
 ;
 ; RV64IMZBB-LABEL: select_umax_1:
 ; RV64IMZBB:       # %bb.0: # %entry
-; RV64IMZBB-NEXT:    beqz a0, .LBB4_2
-; RV64IMZBB-NEXT:  # %bb.1:
 ; RV64IMZBB-NEXT:    sext.w a2, a2
-; RV64IMZBB-NEXT:    sext.w a1, a1
-; RV64IMZBB-NEXT:    maxu a2, a1, a2
-; RV64IMZBB-NEXT:  .LBB4_2: # %entry
-; RV64IMZBB-NEXT:    mv a0, a2
+; RV64IMZBB-NEXT:    neg a0, a0
+; RV64IMZBB-NEXT:    and a0, a0, a1
+; RV64IMZBB-NEXT:    sext.w a0, a0
+; RV64IMZBB-NEXT:    maxu a0, a2, a0
 ; RV64IMZBB-NEXT:    ret
 ;
 ; RV32IMZICOND-LABEL: select_umax_1:
 ; RV32IMZICOND:       # %bb.0: # %entry
-; RV32IMZICOND-NEXT:    sltu a3, a2, a1
-; RV32IMZICOND-NEXT:    czero.nez a4, a2, a3
-; RV32IMZICOND-NEXT:    czero.eqz a1, a1, a3
-; RV32IMZICOND-NEXT:    or a1, a1, a4
-; RV32IMZICOND-NEXT:    czero.eqz a1, a1, a0
-; RV32IMZICOND-NEXT:    czero.nez a0, a2, a0
+; RV32IMZICOND-NEXT:    czero.eqz a0, a1, a0
+; RV32IMZICOND-NEXT:    sltu a1, a0, a2
+; RV32IMZICOND-NEXT:    czero.nez a0, a0, a1
+; RV32IMZICOND-NEXT:    czero.eqz a1, a2, a1
 ; RV32IMZICOND-NEXT:    or a0, a1, a0
 ; RV32IMZICOND-NEXT:    ret
 ;
 ; RV64IMZICOND-LABEL: select_umax_1:
 ; RV64IMZICOND:       # %bb.0: # %entry
-; RV64IMZICOND-NEXT:    sext.w a1, a1
-; RV64IMZICOND-NEXT:    sext.w a3, a2
-; RV64IMZICOND-NEXT:    sltu a4, a3, a1
-; RV64IMZICOND-NEXT:    czero.nez a3, a3, a4
-; RV64IMZICOND-NEXT:    czero.eqz a1, a1, a4
-; RV64IMZICOND-NEXT:    or a1, a1, a3
-; RV64IMZICOND-NEXT:    czero.eqz a1, a1, a0
-; RV64IMZICOND-NEXT:    czero.nez a0, a2, a0
+; RV64IMZICOND-NEXT:    sext.w a2, a2
+; RV64IMZICOND-NEXT:    czero.eqz a0, a1, a0
+; RV64IMZICOND-NEXT:    sext.w a0, a0
+; RV64IMZICOND-NEXT:    sltu a1, a0, a2
+; RV64IMZICOND-NEXT:    czero.nez a0, a0, a1
+; RV64IMZICOND-NEXT:    czero.eqz a1, a2, a1
 ; RV64IMZICOND-NEXT:    or a0, a1, a0
 ; RV64IMZICOND-NEXT:    ret
 ;
 ; RV32IMBOTH-LABEL: select_umax_1:
 ; RV32IMBOTH:       # %bb.0: # %entry
-; RV32IMBOTH-NEXT:    maxu a1, a1, a2
-; RV32IMBOTH-NEXT:    czero.nez a2, a2, a0
 ; RV32IMBOTH-NEXT:    czero.eqz a0, a1, a0
-; RV32IMBOTH-NEXT:    or a0, a0, a2
+; RV32IMBOTH-NEXT:    maxu a0, a2, a0
 ; RV32IMBOTH-NEXT:    ret
 ;
 ; RV64IMBOTH-LABEL: select_umax_1:
 ; RV64IMBOTH:       # %bb.0: # %entry
-; RV64IMBOTH-NEXT:    sext.w a3, a2
-; RV64IMBOTH-NEXT:    sext.w a1, a1
-; RV64IMBOTH-NEXT:    maxu a1, a1, a3
-; RV64IMBOTH-NEXT:    czero.nez a2, a2, a0
+; RV64IMBOTH-NEXT:    sext.w a2, a2
 ; RV64IMBOTH-NEXT:    czero.eqz a0, a1, a0
-; RV64IMBOTH-NEXT:    or a0, a0, a2
+; RV64IMBOTH-NEXT:    sext.w a0, a0
+; RV64IMBOTH-NEXT:    maxu a0, a2, a0
 ; RV64IMBOTH-NEXT:    ret
 entry:
   %c = call i32 @llvm.umax(i32 %a, i32 %b)
@@ -513,97 +423,76 @@ entry:
 define i32 @select_umax_2(i1 zeroext %cond, i32 %a, i32 %b) {
 ; RV32IM-LABEL: select_umax_2:
 ; RV32IM:       # %bb.0: # %entry
-; RV32IM-NEXT:    mv a3, a1
-; RV32IM-NEXT:    bgeu a2, a1, .LBB5_3
+; RV32IM-NEXT:    addi a0, a0, -1
+; RV32IM-NEXT:    and a2, a0, a2
+; RV32IM-NEXT:    mv a0, a1
+; RV32IM-NEXT:    bltu a2, a1, .LBB5_2
 ; RV32IM-NEXT:  # %bb.1: # %entry
-; RV32IM-NEXT:    beqz a0, .LBB5_4
+; RV32IM-NEXT:    mv a0, a2
 ; RV32IM-NEXT:  .LBB5_2: # %entry
-; RV32IM-NEXT:    mv a0, a1
-; RV32IM-NEXT:    ret
-; RV32IM-NEXT:  .LBB5_3: # %entry
-; RV32IM-NEXT:    mv a3, a2
-; RV32IM-NEXT:    bnez a0, .LBB5_2
-; RV32IM-NEXT:  .LBB5_4: # %entry
-; RV32IM-NEXT:    mv a0, a3
 ; RV32IM-NEXT:    ret
 ;
 ; RV64IM-LABEL: select_umax_2:
 ; RV64IM:       # %bb.0: # %entry
-; RV64IM-NEXT:    sext.w a3, a1
-; RV64IM-NEXT:    sext.w a2, a2
-; RV64IM-NEXT:    bgeu a2, a3, .LBB5_3
+; RV64IM-NEXT:    mv a3, a0
+; RV64IM-NEXT:    sext.w a0, a1
+; RV64IM-NEXT:    addi a3, a3, -1
+; RV64IM-NEXT:    and a1, a3, a2
+; RV64IM-NEXT:    sext.w a1, a1
+; RV64IM-NEXT:    bltu a1, a0, .LBB5_2
 ; RV64IM-NEXT:  # %bb.1: # %entry
-; RV64IM-NEXT:    beqz a0, .LBB5_4
-; RV64IM-NEXT:  .LBB5_2: # %entry
 ; RV64IM-NEXT:    mv a0, a1
-; RV64IM-NEXT:    ret
-; RV64IM-NEXT:  .LBB5_3: # %entry
-; RV64IM-NEXT:    mv a3, a2
-; RV64IM-NEXT:    bnez a0, .LBB5_2
-; RV64IM-NEXT:  .LBB5_4: # %entry
-; RV64IM-NEXT:    mv a0, a3
+; RV64IM-NEXT:  .LBB5_2: # %entry
 ; RV64IM-NEXT:    ret
 ;
 ; RV32IMZBB-LABEL: select_umax_2:
 ; RV32IMZBB:       # %bb.0: # %entry
-; RV32IMZBB-NEXT:    bnez a0, .LBB5_2
-; RV32IMZBB-NEXT:  # %bb.1: # %entry
-; RV32IMZBB-NEXT:    maxu a1, a1, a2
-; RV32IMZBB-NEXT:  .LBB5_2: # %entry
-; RV32IMZBB-NEXT:    mv a0, a1
+; RV32IMZBB-NEXT:    addi a0, a0, -1
+; RV32IMZBB-NEXT:    and a0, a0, a2
+; RV32IMZBB-NEXT:    maxu a0, a1, a0
 ; RV32IMZBB-NEXT:    ret
 ;
 ; RV64IMZBB-LABEL: select_umax_2:
 ; RV64IMZBB:       # %bb.0: # %entry
-; RV64IMZBB-NEXT:    bnez a0, .LBB5_2
-; RV64IMZBB-NEXT:  # %bb.1: # %entry
-; RV64IMZBB-NEXT:    sext.w a2, a2
 ; RV64IMZBB-NEXT:    sext.w a1, a1
-; RV64IMZBB-NEXT:    maxu a1, a1, a2
-; RV64IMZBB-NEXT:  .LBB5_2: # %entry
-; RV64IMZBB-NEXT:    mv a0, a1
+; RV64IMZBB-NEXT:    addi a0, a0, -1
+; RV64IMZBB-NEXT:    and a0, a0, a2
+; RV64IMZBB-NEXT:    sext.w a0, a0
+; RV64IMZBB-NEXT:    maxu a0, a1, a0
 ; RV64IMZBB-NEXT:    ret
 ;
 ; RV32IMZICOND-LABEL: select_umax_2:
 ; RV32IMZICOND:       # %bb.0: # %entry
-; RV32IMZICOND-NEXT:    sltu a3, a2, a1
-; RV32IMZICOND-NEXT:    czero.nez a2, a2, a3
-; RV32IMZICOND-NEXT:    czero.eqz a3, a1, a3
-; RV32IMZICOND-NEXT:    or a2, a3, a2
-; RV32IMZICOND-NEXT:    czero.nez a2, a2, a0
-; RV32IMZICOND-NEXT:    czero.eqz a0, a1, a0
-; RV32IMZICOND-NEXT:    or a0, a0, a2
+; RV32IMZICOND-NEXT:    czero.nez a0, a2, a0
+; RV32IMZICOND-NEXT:    sltu a2, a0, a1
+; RV32IMZICOND-NEXT:    czero.nez a0, a0, a2
+; RV32IMZICOND-NEXT:    czero.eqz a1, a1, a2
+; RV32IMZICOND-NEXT:    or a0, a1, a0
 ; RV32IMZICOND-NEXT:    ret
 ;
 ; RV64IMZICOND-LABEL: select_umax_2:
 ; RV64IMZICOND:       # %bb.0: # %entry
-; RV64IMZICOND-NEXT:    sext.w a3, a1
-; RV64IMZICOND-NEXT:    sext.w a2, a2
-; RV64IMZICOND-NEXT:    sltu a4, a2, a3
-; RV64IMZICOND-NEXT:    czero.nez a2, a2, a4
-; RV64IMZICOND-NEXT:    czero.eqz a3, a3, a4
-; RV64IMZICOND-NEXT:    or a2, a3, a2
-; RV64IMZICOND-NEXT:    czero.nez a2, a2, a0
-; RV64IMZICOND-NEXT:    czero.eqz a0, a1, a0
-; RV64IMZICOND-NEXT:    or a0, a0, a2
+; RV64IMZICOND-NEXT:    sext.w a1, a1
+; RV64IMZICOND-NEXT:    czero.nez a0, a2, a0
+; RV64IMZICOND-NEXT:    sext.w a0, a0
+; RV64IMZICOND-NEXT:    sltu a2, a0, a1
+; RV64IMZICOND-NEXT:    czero.nez a0, a0, a2
+; RV64IMZICOND-NEXT:    czero.eqz a1, a1, a2
+; RV64IMZICOND-NEXT:    or a0, a1, a0
 ; RV64IMZICOND-NEXT:    ret
 ;
 ; RV32IMBOTH-LABEL: select_umax_2:
 ; RV32IMBOTH:       # %bb.0: # %entry
-; RV32IMBOTH-NEXT:    maxu a2, a1, a2
-; RV32IMBOTH-NEXT:    czero.eqz a1, a1, a0
 ; RV32IMBOTH-NEXT:    czero.nez a0, a2, a0
-; RV32IMBOTH-NEXT:    or a0, a1, a0
+; RV32IMBOTH-NEXT:    maxu a0, a1, a0
 ; RV32IMBOTH-NEXT:    ret
 ;
 ; RV64IMBOTH-LABEL: select_umax_2:
 ; RV64IMBOTH:       # %bb.0: # %entry
-; RV64IMBOTH-NEXT:    sext.w a2, a2
-; RV64IMBOTH-NEXT:    sext.w a3, a1
-; RV64IMBOTH-NEXT:    maxu a2, a3, a2
-; RV64IMBOTH-NEXT:    czero.eqz a1, a1, a0
+; RV64IMBOTH-NEXT:    sext.w a1, a1
 ; RV64IMBOTH-NEXT:    czero.nez a0, a2, a0
-; RV64IMBOTH-NEXT:    or a0, a1, a0
+; RV64IMBOTH-NEXT:    sext.w a0, a0
+; RV64IMBOTH-NEXT:    maxu a0, a1, a0
 ; RV64IMBOTH-NEXT:    ret
 entry:
   %c = call i32 @llvm.umax(i32 %a, i32 %b)
@@ -614,99 +503,76 @@ entry:
 define i32 @select_umax_3(i1 zeroext %cond, i32 %a) {
 ; RV32IM-LABEL: select_umax_3:
 ; RV32IM:       # %bb.0: # %entry
-; RV32IM-NEXT:    li a3, 32
-; RV32IM-NEXT:    mv a2, a1
-; RV32IM-NEXT:    bgeu a3, a1, .LBB6_3
-; RV32IM-NEXT:  # %bb.1: # %entry
-; RV32IM-NEXT:    beqz a0, .LBB6_4
-; RV32IM-NEXT:  .LBB6_2: # %entry
+; RV32IM-NEXT:    addi a0, a0, -1
+; RV32IM-NEXT:    andi a2, a0, 32
 ; RV32IM-NEXT:    mv a0, a1
-; RV32IM-NEXT:    ret
-; RV32IM-NEXT:  .LBB6_3: # %entry
-; RV32IM-NEXT:    li a2, 32
-; RV32IM-NEXT:    bnez a0, .LBB6_2
-; RV32IM-NEXT:  .LBB6_4: # %entry
+; RV32IM-NEXT:    bltu a2, a1, .LBB6_2
+; RV32IM-NEXT:  # %bb.1: # %entry
 ; RV32IM-NEXT:    mv a0, a2
+; RV32IM-NEXT:  .LBB6_2: # %entry
 ; RV32IM-NEXT:    ret
 ;
 ; RV64IM-LABEL: select_umax_3:
 ; RV64IM:       # %bb.0: # %entry
-; RV64IM-NEXT:    sext.w a2, a1
-; RV64IM-NEXT:    li a3, 32
-; RV64IM-NEXT:    bgeu a3, a2, .LBB6_3
+; RV64IM-NEXT:    mv a2, a0
+; RV64IM-NEXT:    sext.w a0, a1
+; RV64IM-NEXT:    addi a2, a2, -1
+; RV64IM-NEXT:    andi a1, a2, 32
+; RV64IM-NEXT:    bltu a1, a0, .LBB6_2
 ; RV64IM-NEXT:  # %bb.1: # %entry
-; RV64IM-NEXT:    beqz a0, .LBB6_4
-; RV64IM-NEXT:  .LBB6_2: # %entry
 ; RV64IM-NEXT:    mv a0, a1
-; RV64IM-NEXT:    ret
-; RV64IM-NEXT:  .LBB6_3: # %entry
-; RV64IM-NEXT:    li a2, 32
-; RV64IM-NEXT:    bnez a0, .LBB6_2
-; RV64IM-NEXT:  .LBB6_4: # %entry
-; RV64IM-NEXT:    mv a0, a2
+; RV64IM-NEXT:  .LBB6_2: # %entry
 ; RV64IM-NEXT:    ret
 ;
 ; RV32IMZBB-LABEL: select_umax_3:
 ; RV32IMZBB:       # %bb.0: # %entry
-; RV32IMZBB-NEXT:    bnez a0, .LBB6_2
-; RV32IMZBB-NEXT:  # %bb.1: # %entry
-; RV32IMZBB-NEXT:    li a0, 32
-; RV32IMZBB-NEXT:    maxu a1, a1, a0
-; RV32IMZBB-NEXT:  .LBB6_2: # %entry
-; RV32IMZBB-NEXT:    mv a0, a1
+; RV32IMZBB-NEXT:    addi a0, a0, -1
+; RV32IMZBB-NEXT:    andi a0, a0, 32
+; RV32IMZBB-NEXT:    maxu a0, a1, a0
 ; RV32IMZBB-NEXT:    ret
 ;
 ; RV64IMZBB-LABEL: select_umax_3:
 ; RV64IMZBB:       # %bb.0: # %entry
-; RV64IMZBB-NEXT:    bnez a0, .LBB6_2
-; RV64IMZBB-NEXT:  # %bb.1: # %entry
 ; RV64IMZBB-NEXT:    sext.w a1, a1
-; RV64IMZBB-NEXT:    li a0, 32
-; RV64IMZBB-NEXT:    maxu a1, a1, a0
-; RV64IMZBB-NEXT:  .LBB6_2: # %entry
-; RV64IMZBB-NEXT:    mv a0, a1
+; RV64IMZBB-NEXT:    addi a0, a0, -1
+; RV64IMZBB-NEXT:    andi a0, a0, 32
+; RV64IMZBB-NEXT:    maxu a0, a1, a0
 ; RV64IMZBB-NEXT:    ret
 ;
 ; RV32IMZICOND-LABEL: select_umax_3:
 ; RV32IMZICOND:       # %bb.0: # %entry
-; RV32IMZICOND-NEXT:    sltiu a2, a1, 33
-; RV32IMZICOND-NEXT:    addi a3, a1, -32
-; RV32IMZICOND-NEXT:    czero.nez a2, a3, a2
-; RV32IMZICOND-NEXT:    addi a2, a2, 32
-; RV32IMZICOND-NEXT:    czero.eqz a1, a1, a0
-; RV32IMZICOND-NEXT:    czero.nez a0, a2, a0
+; RV32IMZICOND-NEXT:    addi a0, a0, -1
+; RV32IMZICOND-NEXT:    andi a0, a0, 32
+; RV32IMZICOND-NEXT:    sltu a2, a0, a1
+; RV32IMZICOND-NEXT:    czero.nez a0, a0, a2
+; RV32IMZICOND-NEXT:    czero.eqz a1, a1, a2
 ; RV32IMZICOND-NEXT:    or a0, a1, a0
 ; RV32IMZICOND-NEXT:    ret
 ;
 ; RV64IMZICOND-LABEL: select_umax_3:
 ; RV64IMZICOND:       # %bb.0: # %entry
-; RV64IMZICOND-NEXT:    sext.w a2, a1
-; RV64IMZICOND-NEXT:    sltiu a3, a2, 33
-; RV64IMZICOND-NEXT:    addi a2, a2, -32
-; RV64IMZICOND-NEXT:    czero.nez a2, a2, a3
-; RV64IMZICOND-NEXT:    addi a2, a2, 32
-; RV64IMZICOND-NEXT:    czero.eqz a1, a1, a0
-; RV64IMZICOND-NEXT:    czero.nez a0, a2, a0
+; RV64IMZICOND-NEXT:    sext.w a1, a1
+; RV64IMZICOND-NEXT:    addi a0, a0, -1
+; RV64IMZICOND-NEXT:    andi a0, a0, 32
+; RV64IMZICOND-NEXT:    sltu a2, a0, a1
+; RV64IMZICOND-NEXT:    czero.nez a0, a0, a2
+; RV64IMZICOND-NEXT:    czero.eqz a1, a1, a2
 ; RV64IMZICOND-NEXT:    or a0, a1, a0
 ; RV64IMZICOND-NEXT:    ret
 ;
 ; RV32IMBOTH-LABEL: select_umax_3:
 ; RV32IMBOTH:       # %bb.0: # %entry
-; RV32IMBOTH-NEXT:    li a2, 32
-; RV32IMBOTH-NEXT:    maxu a2, a1, a2
-; RV32IMBOTH-NEXT:    czero.eqz a1, a1, a0
-; RV32IMBOTH-NEXT:    czero.nez a0, a2, a0
-; RV32IMBOTH-NEXT:    or a0, a1, a0
+; RV32IMBOTH-NEXT:    addi a0, a0, -1
+; RV32IMBOTH-NEXT:    andi a0, a0, 32
+; RV32IMBOTH-NEXT:    maxu a0, a1, a0
 ; RV32IMBOTH-NEXT:    ret
 ;
 ; RV64IMBOTH-LABEL: select_umax_3:
 ; RV64IMBOTH:       # %bb.0: # %entry
-; RV64IMBOTH-NEXT:    sext.w a2, a1
-; RV64IMBOTH-NEXT:    li a3, 32
-; RV64IMBOTH-NEXT:    maxu a2, a2, a3
-; RV64IMBOTH-NEXT:    czero.eqz a1, a1, a0
-; RV64IMBOTH-NEXT:    czero.nez a0, a2, a0
-; RV64IMBOTH-NEXT:    or a0, a1, a0
+; RV64IMBOTH-NEXT:    sext.w a1, a1
+; RV64IMBOTH-NEXT:    addi a0, a0, -1
+; RV64IMBOTH-NEXT:    andi a0, a0, 32
+; RV64IMBOTH-NEXT:    maxu a0, a1, a0
 ; RV64IMBOTH-NEXT:    ret
 entry:
   %c = call i32 @llvm.umax(i32 %a, i32 32)
@@ -717,94 +583,76 @@ entry:
 define i32 @select_umax_4(i1 zeroext %cond, i32 %x) {
 ; RV32IM-LABEL: select_umax_4:
 ; RV32IM:       # %bb.0:
-; RV32IM-NEXT:    li a2, 128
-; RV32IM-NEXT:    bgeu a2, a1, .LBB7_3
+; RV32IM-NEXT:    addi a0, a0, -1
+; RV32IM-NEXT:    and a0, a0, a1
+; RV32IM-NEXT:    li a1, 128
+; RV32IM-NEXT:    bltu a1, a0, .LBB7_2
 ; RV32IM-NEXT:  # %bb.1:
-; RV32IM-NEXT:    beqz a0, .LBB7_4
+; RV32IM-NEXT:    li a0, 128
 ; RV32IM-NEXT:  .LBB7_2:
-; RV32IM-NEXT:    mv a0, a2
-; RV32IM-NEXT:    ret
-; RV32IM-NEXT:  .LBB7_3:
-; RV32IM-NEXT:    li a1, 128
-; RV32IM-NEXT:    bnez a0, .LBB7_2
-; RV32IM-NEXT:  .LBB7_4:
-; RV32IM-NEXT:    mv a0, a1
 ; RV32IM-NEXT:    ret
 ;
 ; RV64IM-LABEL: select_umax_4:
 ; RV64IM:       # %bb.0:
-; RV64IM-NEXT:    sext.w a2, a1
+; RV64IM-NEXT:    addi a0, a0, -1
+; RV64IM-NEXT:    and a0, a0, a1
+; RV64IM-NEXT:    sext.w a0, a0
 ; RV64IM-NEXT:    li a1, 128
-; RV64IM-NEXT:    bgeu a1, a2, .LBB7_3
+; RV64IM-NEXT:    bltu a1, a0, .LBB7_2
 ; RV64IM-NEXT:  # %bb.1:
-; RV64IM-NEXT:    beqz a0, .LBB7_4
+; RV64IM-NEXT:    li a0, 128
 ; RV64IM-NEXT:  .LBB7_2:
-; RV64IM-NEXT:    mv a0, a1
-; RV64IM-NEXT:    ret
-; RV64IM-NEXT:  .LBB7_3:
-; RV64IM-NEXT:    li a2, 128
-; RV64IM-NEXT:    bnez a0, .LBB7_2
-; RV64IM-NEXT:  .LBB7_4:
-; RV64IM-NEXT:    mv a0, a2
 ; RV64IM-NEXT:    ret
 ;
 ; RV32IMZBB-LABEL: select_umax_4:
 ; RV32IMZBB:       # %bb.0:
-; RV32IMZBB-NEXT:    mv a2, a0
-; RV32IMZBB-NEXT:    li a0, 128
-; RV32IMZBB-NEXT:    bnez a2, .LBB7_2
-; RV32IMZBB-NEXT:  # %bb.1:
-; RV32IMZBB-NEXT:    maxu a0, a1, a0
-; RV32IMZBB-NEXT:  .LBB7_2:
+; RV32IMZBB-NEXT:    addi a0, a0, -1
+; RV32IMZBB-NEXT:    and a0, a0, a1
+; RV32IMZBB-NEXT:    li a1, 128
+; RV32IMZBB-NEXT:    maxu a0, a0, a1
 ; RV32IMZBB-NEXT:    ret
 ;
 ; RV64IMZBB-LABEL: select_umax_4:
 ; RV64IMZBB:       # %bb.0:
-; RV64IMZBB-NEXT:    mv a2, a0
-; RV64IMZBB-NEXT:    li a0, 128
-; RV64IMZBB-NEXT:    bnez a2, .LBB7_2
-; RV64IMZBB-NEXT:  # %bb.1:
-; RV64IMZBB-NEXT:    sext.w a1, a1
-; RV64IMZBB-NEXT:    maxu a0, a1, a0
-; RV64IMZBB-NEXT:  .LBB7_2:
+; RV64IMZBB-NEXT:    addi a0, a0, -1
+; RV64IMZBB-NEXT:    and a0, a0, a1
+; RV64IMZBB-NEXT:    sext.w a0, a0
+; RV64IMZBB-NEXT:    li a1, 128
+; RV64IMZBB-NEXT:    maxu a0, a0, a1
 ; RV64IMZBB-NEXT:    ret
 ;
 ; RV32IMZICOND-LABEL: select_umax_4:
 ; RV32IMZICOND:       # %bb.0:
-; RV32IMZICOND-NEXT:    sltiu a2, a1, 129
-; RV32IMZICOND-NEXT:    addi a1, a1, -128
-; RV32IMZICOND-NEXT:    czero.nez a1, a1, a2
 ; RV32IMZICOND-NEXT:    czero.nez a0, a1, a0
+; RV32IMZICOND-NEXT:    sltiu a1, a0, 129
+; RV32IMZICOND-NEXT:    addi a0, a0, -128
+; RV32IMZICOND-NEXT:    czero.nez a0, a0, a1
 ; RV32IMZICOND-NEXT:    addi a0, a0, 128
 ; RV32IMZICOND-NEXT:    ret
 ;
 ; RV64IMZICOND-LABEL: select_umax_4:
 ; RV64IMZICOND:       # %bb.0:
-; RV64IMZICOND-NEXT:    sext.w a1, a1
-; RV64IMZICOND-NEXT:    sltiu a2, a1, 129
-; RV64IMZICOND-NEXT:    addi a1, a1, -128
-; RV64IMZICOND-NEXT:    czero.nez a1, a1, a2
 ; RV64IMZICOND-NEXT:    czero.nez a0, a1, a0
+; RV64IMZICOND-NEXT:    sext.w a0, a0
+; RV64IMZICOND-NEXT:    sltiu a1, a0, 129
+; RV64IMZICOND-NEXT:    addi a0, a0, -128
+; RV64IMZICOND-NEXT:    czero.nez a0, a0, a1
 ; RV64IMZICOND-NEXT:    addi a0, a0, 128
 ; RV64IMZICOND-NEXT:    ret
 ;
 ; RV32IMBOTH-LABEL: select_umax_4:
 ; RV32IMBOTH:       # %bb.0:
-; RV32IMBOTH-NEXT:    li a2, 128
-; RV32IMBOTH-NEXT:    maxu a1, a1, a2
-; RV32IMBOTH-NEXT:    addi a1, a1, -128
 ; RV32IMBOTH-NEXT:    czero.nez a0, a1, a0
-; RV32IMBOTH-NEXT:    addi a0, a0, 128
+; RV32IMBOTH-NEXT:    li a1, 128
+; RV32IMBOTH-NEXT:    maxu a0, a0, a1
 ; RV32IMBOTH-NEXT:    ret
 ;
 ; RV64IMBOTH-LABEL: select_umax_4:
 ; RV64IMBOTH:       # %bb.0:
-; RV64IMBOTH-NEXT:    sext.w a1, a1
-; RV64IMBOTH-NEXT:    li a2, 128
-; RV64IMBOTH-NEXT:    maxu a1, a1, a2
-; RV64IMBOTH-NEXT:    addi a1, a1, -128
 ; RV64IMBOTH-NEXT:    czero.nez a0, a1, a0
-; RV64IMBOTH-NEXT:    addi a0, a0, 128
+; RV64IMBOTH-NEXT:    sext.w a0, a0
+; RV64IMBOTH-NEXT:    li a1, 128
+; RV64IMBOTH-NEXT:    maxu a0, a0, a1
 ; RV64IMBOTH-NEXT:    ret
   %minmax = call i32 @llvm.umax(i32 %x, i32 128)
   %sel = select i1 %cond, i32 128, i32 %minmax
diff --git a/llvm/test/CodeGen/RISCV/short-forward-branch-opt.ll b/llvm/test/CodeGen/RISCV/short-forward-branch-opt.ll
index 59a702a..1bfeeb9 100644
--- a/llvm/test/CodeGen/RISCV/short-forward-branch-opt.ll
+++ b/llvm/test/CodeGen/RISCV/short-forward-branch-opt.ll
@@ -2075,14 +2075,14 @@ define i64 @abs_i64(i64 %x) {
 ; RV32SFB-LABEL: abs_i64:
 ; RV32SFB:       # %bb.0:
 ; RV32SFB-NEXT:    snez a2, a0
-; RV32SFB-NEXT:    add a2, a2, a1
+; RV32SFB-NEXT:    neg a3, a1
 ; RV32SFB-NEXT:    bgez a1, .LBB35_2
 ; RV32SFB-NEXT:  # %bb.1:
 ; RV32SFB-NEXT:    neg a0, a0
 ; RV32SFB-NEXT:  .LBB35_2:
 ; RV32SFB-NEXT:    bgez a1, .LBB35_4
 ; RV32SFB-NEXT:  # %bb.3:
-; RV32SFB-NEXT:    neg a1, a2
+; RV32SFB-NEXT:    sub a1, a3, a2
 ; RV32SFB-NEXT:  .LBB35_4:
 ; RV32SFB-NEXT:    ret
   %a = call i64 @llvm.abs.i64(i64 %x, i1 false)
diff --git a/llvm/test/CodeGen/RISCV/xcvbitmanip.ll b/llvm/test/CodeGen/RISCV/xcvbitmanip.ll
index d25ff284..b2cebab 100644
--- a/llvm/test/CodeGen/RISCV/xcvbitmanip.ll
+++ b/llvm/test/CodeGen/RISCV/xcvbitmanip.ll
@@ -229,3 +229,50 @@ define i32 @test.llvm.bitrev(i32 %a) {
   %1 = call i32 @llvm.bitreverse(i32 %a)
   ret i32 %1
 }
+
+define i1 @ctpop_i32_ult_two(i32 signext %a) nounwind {
+; CHECK-LABEL: ctpop_i32_ult_two:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    cv.cnt a0, a0
+; CHECK-NEXT:    sltiu a0, a0, 2
+; CHECK-NEXT:    ret
+  %1 = call i32 @llvm.ctpop.i32(i32 %a)
+  %2 = icmp ult i32 %1, 2
+  ret i1 %2
+}
+
+define i1 @ctpop_i32_ugt_one(i32 signext %a) nounwind {
+; CHECK-LABEL: ctpop_i32_ugt_one:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    cv.cnt a0, a0
+; CHECK-NEXT:    sltiu a0, a0, 2
+; CHECK-NEXT:    xori a0, a0, 1
+; CHECK-NEXT:    ret
+  %1 = call i32 @llvm.ctpop.i32(i32 %a)
+  %2 = icmp ugt i32 %1, 1
+  ret i1 %2
+}
+
+define i1 @ctpop_i32_eq_one(i32 signext %a) nounwind {
+; CHECK-LABEL: ctpop_i32_eq_one:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    cv.cnt a0, a0
+; CHECK-NEXT:    addi a0, a0, -1
+; CHECK-NEXT:    seqz a0, a0
+; CHECK-NEXT:    ret
+  %1 = call i32 @llvm.ctpop.i32(i32 %a)
+  %2 = icmp eq i32 %1, 1
+  ret i1 %2
+}
+
+define i1 @ctpop_i32_ne_one(i32 signext %a) nounwind {
+; CHECK-LABEL: ctpop_i32_ne_one:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    cv.cnt a0, a0
+; CHECK-NEXT:    addi a0, a0, -1
+; CHECK-NEXT:    snez a0, a0
+; CHECK-NEXT:    ret
+  %1 = call i32 @llvm.ctpop.i32(i32 %a)
+  %2 = icmp ne i32 %1, 1
+  ret i1 %2
+}
diff --git a/llvm/test/CodeGen/Thumb2/bti-indirect-branches.ll b/llvm/test/CodeGen/Thumb2/bti-indirect-branches.ll
index c6ffb92..8e570f0 100644
--- a/llvm/test/CodeGen/Thumb2/bti-indirect-branches.ll
+++ b/llvm/test/CodeGen/Thumb2/bti-indirect-branches.ll
@@ -25,7 +25,7 @@ define internal i32 @table_switch(i32 %x) "branch-target-enforcement" {
 ; CHECK-NEXT:    movs r0, #3
 ; CHECK-NEXT:    bx lr
 ; CHECK-NEXT:  .LBB0_5: @ %bb4
-; CHECK-NEXT:    movs r0, #4
+; CHECK-NEXT:    movs r0, #5
 ; CHECK-NEXT:    bx lr
 ; CHECK-NEXT:  .LBB0_6: @ %sw.epilog
 ; CHECK-NEXT:    movs r0, #0
@@ -51,7 +51,7 @@ sw.epilog:
   br label %return
 
 return:
-  %ret = phi i32 [ 0, %sw.epilog ], [ 1, %bb1 ], [ 2, %bb2 ], [ 3, %bb3 ], [ 4, %bb4 ]
+  %ret = phi i32 [ 0, %sw.epilog ], [ 1, %bb1 ], [ 2, %bb2 ], [ 3, %bb3 ], [ 5, %bb4 ]
   ret i32 %ret
 }
 
diff --git a/llvm/test/CodeGen/WebAssembly/partial-reduce-accumulate.ll b/llvm/test/CodeGen/WebAssembly/partial-reduce-accumulate.ll
new file mode 100644
index 0000000..47ea762
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/partial-reduce-accumulate.ll
@@ -0,0 +1,609 @@
+; RUN: opt -mattr=+simd128 -passes=loop-vectorize %s | llc -mtriple=wasm32 -mattr=+simd128 -verify-machineinstrs -o - | FileCheck %s
+; RUN: opt -mattr=+simd128 -passes=loop-vectorize -vectorizer-maximize-bandwidth %s | llc -mtriple=wasm32 -mattr=+simd128 -verify-machineinstrs -o - | FileCheck %s --check-prefix=MAX-BANDWIDTH
+
+target triple = "wasm32"
+
+define hidden i32 @accumulate_add_u8_u8(ptr noundef readonly  %a, ptr noundef readonly  %b, i32 noundef %N) {
+; CHECK-LABEL: accumulate_add_u8_u8:
+; CHECK: loop
+; CHECK: v128.load32_zero
+; CHECK: i16x8.extend_low_i8x16_u
+; CHECK: i32x4.extend_low_i16x8_u
+; CHECK: i32x4.add
+; CHECK: v128.load32_zero
+; CHECK: i16x8.extend_low_i8x16_u
+; CHECK: i32x4.extend_low_i16x8_u
+; CHECK: i32x4.add
+
+; MAX-BANDWIDTH: loop
+; MAX-BANDWIDTH: v128.load
+; MAX-BANDWIDTH: i16x8.extadd_pairwise_i8x16_u
+; MAX-BANDWIDTH: i32x4.extadd_pairwise_i16x8_u
+; MAX-BANDWIDTH: i32x4.add
+; MAX-BANDWIDTH: v128.load
+; MAX-BANDWIDTH: i16x8.extadd_pairwise_i8x16_u
+; MAX-BANDWIDTH: i32x4.extadd_pairwise_i16x8_u
+; MAX-BANDWIDTH: i32x4.add
+
+entry:
+  %cmp8.not = icmp eq i32 %N, 0
+  br i1 %cmp8.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  %result.0.lcssa = phi i32 [ 0, %entry ], [ %add3, %for.body ]
+  ret i32 %result.0.lcssa
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.010 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %result.09 = phi i32 [ %add3, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds nuw i8, ptr %a, i32 %i.010
+  %0 = load i8, ptr %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %arrayidx1 = getelementptr inbounds nuw i8, ptr %b, i32 %i.010
+  %1 = load i8, ptr %arrayidx1, align 1
+  %conv2 = zext i8 %1 to i32
+  %add = add i32 %result.09, %conv
+  %add3 = add i32 %add, %conv2
+  %inc = add nuw i32 %i.010, 1
+  %exitcond.not = icmp eq i32 %inc, %N
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+define hidden i32 @accumulate_add_s8_s8(ptr noundef readonly  %a, ptr noundef readonly  %b, i32 noundef %N) {
+; CHECK-LABEL: accumulate_add_s8_s8:
+; CHECK: loop
+; CHECK: v128.load32_zero
+; CHECK: i16x8.extend_low_i8x16_s
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: i32x4.add
+; CHECK: v128.load32_zero
+; CHECK: i16x8.extend_low_i8x16_s
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: i32x4.add
+
+; MAX-BANDWIDTH: loop
+; MAX-BANDWIDTH: v128.load
+; MAX-BANDWIDTH: i16x8.extadd_pairwise_i8x16_s
+; MAX-BANDWIDTH: i32x4.extadd_pairwise_i16x8_s
+; MAX-BANDWIDTH: i32x4.add
+; MAX-BANDWIDTH: v128.load
+; MAX-BANDWIDTH: i16x8.extadd_pairwise_i8x16_s
+; MAX-BANDWIDTH: i32x4.extadd_pairwise_i16x8_s
+; MAX-BANDWIDTH: i32x4.add
+entry:
+  %cmp8.not = icmp eq i32 %N, 0
+  br i1 %cmp8.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  %result.0.lcssa = phi i32 [ 0, %entry ], [ %add3, %for.body ]
+  ret i32 %result.0.lcssa
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.010 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %result.09 = phi i32 [ %add3, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds nuw i8, ptr %a, i32 %i.010
+  %0 = load i8, ptr %arrayidx, align 1
+  %conv = sext i8 %0 to i32
+  %arrayidx1 = getelementptr inbounds nuw i8, ptr %b, i32 %i.010
+  %1 = load i8, ptr %arrayidx1, align 1
+  %conv2 = sext i8 %1 to i32
+  %add = add i32 %result.09, %conv
+  %add3 = add i32 %add, %conv2
+  %inc = add nuw i32 %i.010, 1
+  %exitcond.not = icmp eq i32 %inc, %N
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+define hidden i32 @accumulate_add_s8_u8(ptr noundef readonly  %a, ptr noundef readonly  %b, i32 noundef %N) {
+; CHECK-LABEL: accumulate_add_s8_u8:
+; CHECK: loop
+; CHECK: v128.load32_zero
+; CHECK: i16x8.extend_low_i8x16_s
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: i32x4.add
+; CHECK: v128.load32_zero
+; CHECK: i16x8.extend_low_i8x16_u
+; CHECK: i32x4.extend_low_i16x8_u
+; CHECK: i32x4.add
+
+; MAX-BANDWIDTH: loop
+; MAX-BANDWIDTH: v128.load
+; MAX-BANDWIDTH: i16x8.extadd_pairwise_i8x16_s
+; MAX-BANDWIDTH: i32x4.extadd_pairwise_i16x8_s
+; MAX-BANDWIDTH: i32x4.add
+; MAX-BANDWIDTH: v128.load
+; MAX-BANDWIDTH: i16x8.extadd_pairwise_i8x16_u
+; MAX-BANDWIDTH: i32x4.extadd_pairwise_i16x8_u
+; MAX-BANDWIDTH: i32x4.add
+entry:
+  %cmp8.not = icmp eq i32 %N, 0
+  br i1 %cmp8.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  %result.0.lcssa = phi i32 [ 0, %entry ], [ %add3, %for.body ]
+  ret i32 %result.0.lcssa
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.010 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %result.09 = phi i32 [ %add3, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds nuw i8, ptr %a, i32 %i.010
+  %0 = load i8, ptr %arrayidx, align 1
+  %conv = sext i8 %0 to i32
+  %arrayidx1 = getelementptr inbounds nuw i8, ptr %b, i32 %i.010
+  %1 = load i8, ptr %arrayidx1, align 1
+  %conv2 = zext i8 %1 to i32
+  %add = add i32 %result.09, %conv
+  %add3 = add i32 %add, %conv2
+  %inc = add nuw i32 %i.010, 1
+  %exitcond.not = icmp eq i32 %inc, %N
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+define hidden i32 @accumulate_add_s8_s16(ptr noundef readonly  %a, ptr noundef readonly  %b, i32 noundef %N) {
+; CHECK-LABEL: accumulate_add_s8_s16:
+; CHECK: loop
+; CHECK: v128.load32_zero
+; CHECK: i16x8.extend_low_i8x16_s
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: i32x4.add
+; CHECK: i32x4.load16x4_s
+; CHECK: i32x4.add
+
+; MAX-BANDWIDTH: loop
+; MAX-BANDWIDTH: v128.load
+; MAX-BANDWIDTH: i16x8.extend_low_i8x16_s
+; MAX-BANDWIDTH: i32x4.extend_low_i16x8_s
+; MAX-BANDWIDTH: i32x4.add
+; MAX-BANDWIDTH: v128.load
+; MAX-BANDWIDTH: i32x4.extend_low_i16x8_s
+; MAX-BANDWIDTH: i32x4.add
+; MAX-BANDWIDTH: i8x16.shuffle	12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; MAX-BANDWIDTH: i16x8.extend_low_i8x16_s
+; MAX-BANDWIDTH: i32x4.extend_low_i16x8_s
+; MAX-BANDWIDTH: i32x4.add
+; MAX-BANDWIDTH: v128.load
+; MAX-BANDWIDTH: i32x4.extend_high_i16x8_s
+; MAX-BANDWIDTH: i32x4.add
+; MAX-BANDWIDTH: i8x16.shuffle	8, 9, 10, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; MAX-BANDWIDTH: i16x8.extend_low_i8x16_s
+; MAX-BANDWIDTH: i32x4.extend_low_i16x8_s
+; MAX-BANDWIDTH: i32x4.add
+; MAX-BANDWIDTH: i32x4.extend_low_i16x8_s
+; MAX-BANDWIDTH: i32x4.add
+; MAX-BANDWIDTH: i8x16.shuffle	4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; MAX-BANDWIDTH: i16x8.extend_low_i8x16_s
+; MAX-BANDWIDTH: i32x4.extend_low_i16x8_s
+; MAX-BANDWIDTH: i32x4.add
+; MAX-BANDWIDTH: i32x4.extend_high_i16x8_s
+; MAX-BANDWIDTH: i32x4.add
+entry:
+  %cmp8.not = icmp eq i32 %N, 0
+  br i1 %cmp8.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  %result.0.lcssa = phi i32 [ 0, %entry ], [ %add3, %for.body ]
+  ret i32 %result.0.lcssa
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.010 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %result.09 = phi i32 [ %add3, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds nuw i8, ptr %a, i32 %i.010
+  %0 = load i8, ptr %arrayidx, align 1
+  %conv = sext i8 %0 to i32
+  %arrayidx1 = getelementptr inbounds nuw i16, ptr %b, i32 %i.010
+  %1 = load i16, ptr %arrayidx1, align 2
+  %conv2 = sext i16 %1 to i32
+  %add = add i32 %result.09, %conv
+  %add3 = add i32 %add, %conv2
+  %inc = add nuw i32 %i.010, 1
+  %exitcond.not = icmp eq i32 %inc, %N
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+define hidden i32 @accumulate_shr_u8(ptr noundef readonly  %a, i32 noundef %N) {
+; CHECK-LABEL: accumulate_shr_u8:
+; CHECK: loop
+; CHECK: v128.load32_zero
+; CHECK: i8x16.shr_u
+; CHECK: i16x8.extend_low_i8x16_u
+; CHECK: i32x4.extend_low_i16x8_u
+; CHECK: i32x4.add
+
+; MAX-BANDWIDTH: loop
+; MAX-BANDWIDTH: v128.load
+; MAX-BANDWIDTH: i8x16.shr_u
+; MAX-BANDWIDTH: i16x8.extadd_pairwise_i8x16_u
+; MAX-BANDWIDTH: i32x4.extadd_pairwise_i16x8_u
+; MAX-BANDWIDTH: i32x4.add
+entry:
+  %cmp4.not = icmp eq i32 %N, 0
+  br i1 %cmp4.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  %result.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  ret i32 %result.0.lcssa
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.06 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %result.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds nuw i8, ptr %a, i32 %i.06
+  %0 = load i8, ptr %arrayidx, align 1
+  %1 = lshr i8 %0, 1
+  %shr = zext nneg i8 %1 to i32
+  %add = add i32 %result.05, %shr
+  %inc = add nuw i32 %i.06, 1
+  %exitcond.not = icmp eq i32 %inc, %N
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+define hidden i32 @accumulate_shr_s8(ptr noundef readonly  %a, i32 noundef %N) {
+; CHECK-LABEL: accumulate_shr_s8:
+; CHECK: loop
+; CHECK: v128.load32_zero
+; CHECK: i8x16.shr_s
+; CHECK: i16x8.extend_low_i8x16_s
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: i32x4.add
+
+; MAX-BANDWIDTH: loop
+; MAX-BANDWIDTH: v128.load
+; MAX-BANDWIDTH: i8x16.shr_s
+; MAX-BANDWIDTH: i16x8.extadd_pairwise_i8x16_s
+; MAX-BANDWIDTH: i32x4.extadd_pairwise_i16x8_s
+; MAX-BANDWIDTH: i32x4.add
+entry:
+  %cmp4.not = icmp eq i32 %N, 0
+  br i1 %cmp4.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  %result.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  ret i32 %result.0.lcssa
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.06 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %result.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds nuw i8, ptr %a, i32 %i.06
+  %0 = load i8, ptr %arrayidx, align 1
+  %1 = ashr i8 %0, 1
+  %shr = sext i8 %1 to i32
+  %add = add nsw i32 %result.05, %shr
+  %inc = add nuw i32 %i.06, 1
+  %exitcond.not = icmp eq i32 %inc, %N
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+define hidden i32 @accumulate_max_u8_u8(ptr noundef readonly  %a, ptr noundef readonly  %b, i32 noundef %N) {
+; CHECK-LABEL: accumulate_max_u8_u8:
+; CHECK: loop
+; CHECK: v128.load32_zero
+; CHECK: v128.load32_zero
+; CHECK: i8x16.max_u
+; CHECK: i16x8.extend_low_i8x16_u
+; CHECK: i32x4.extend_low_i16x8_u
+; CHECK: i32x4.add
+
+; MAX-BANDWIDTH: loop
+; MAX-BANDWIDTH: v128.load
+; MAX-BANDWIDTH: v128.load
+; MAX-BANDWIDTH: i8x16.max_u
+; MAX-BANDWIDTH: i16x8.extadd_pairwise_i8x16_u
+; MAX-BANDWIDTH: i32x4.extadd_pairwise_i16x8_u
+; MAX-BANDWIDTH: i32x4.add
+entry:
+  %cmp17.not = icmp eq i32 %N, 0
+  br i1 %cmp17.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  %result.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  ret i32 %result.0.lcssa
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.019 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %result.018 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds nuw i8, ptr %a, i32 %i.019
+  %0 = load i8, ptr %arrayidx, align 1
+  %arrayidx1 = getelementptr inbounds nuw i8, ptr %b, i32 %i.019
+  %1 = load i8, ptr %arrayidx1, align 1
+  %. = tail call i8 @llvm.umax.i8(i8 %0, i8 %1)
+  %cond = zext i8 %. to i32
+  %add = add i32 %result.018, %cond
+  %inc = add nuw i32 %i.019, 1
+  %exitcond.not = icmp eq i32 %inc, %N
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+define hidden i32 @accumulate_min_s8_s8(ptr noundef readonly  %a, ptr noundef readonly  %b, i32 noundef %N) {
+; CHECK-LABEL: accumulate_min_s8_s8:
+; CHECK: loop
+; CHECK: v128.load32_zero
+; CHECK: v128.load32_zero
+; CHECK: i8x16.min_s
+; CHECK: i16x8.extend_low_i8x16_s
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: i32x4.add
+
+; MAX-BANDWIDTH: loop
+; MAX-BANDWIDTH: v128.load
+; MAX-BANDWIDTH: v128.load
+; MAX-BANDWIDTH: i8x16.min_s
+; MAX-BANDWIDTH: i16x8.extadd_pairwise_i8x16_s
+; MAX-BANDWIDTH: i32x4.extadd_pairwise_i16x8_s
+; MAX-BANDWIDTH: i32x4.add
+entry:
+  %cmp17.not = icmp eq i32 %N, 0
+  br i1 %cmp17.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  %result.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  ret i32 %result.0.lcssa
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.019 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %result.018 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds nuw i8, ptr %a, i32 %i.019
+  %0 = load i8, ptr %arrayidx, align 1
+  %arrayidx1 = getelementptr inbounds nuw i8, ptr %b, i32 %i.019
+  %1 = load i8, ptr %arrayidx1, align 1
+  %. = tail call i8 @llvm.smin.i8(i8 %0, i8 %1)
+  %cond = sext i8 %. to i32
+  %add = add nsw i32 %result.018, %cond
+  %inc = add nuw i32 %i.019, 1
+  %exitcond.not = icmp eq i32 %inc, %N
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+define hidden i32 @accumulate_add_u16_u16(ptr noundef readonly  %a, ptr noundef readonly  %b, i32 noundef %N) {
+; CHECK-LABEL: accumulate_add_u16_u16:
+; CHECK: loop
+; CHECK: i32x4.load16x4_u
+; CHECK: i32x4.add
+; CHECK: i32x4.load16x4_u
+; CHECK: i32x4.add
+
+; MAX-BANDWIDTH: loop
+; MAX-BANDWIDTH: v128.load
+; MAX-BANDWIDTH: i32x4.extadd_pairwise_i16x8_u
+; MAX-BANDWIDTH: i32x4.add
+; MAX-BANDWIDTH: v128.load
+; MAX-BANDWIDTH: i32x4.extadd_pairwise_i16x8_u
+; MAX-BANDWIDTH: i32x4.add
+entry:
+  %cmp8.not = icmp eq i32 %N, 0
+  br i1 %cmp8.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  %result.0.lcssa = phi i32 [ 0, %entry ], [ %add3, %for.body ]
+  ret i32 %result.0.lcssa
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.010 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %result.09 = phi i32 [ %add3, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds nuw i16, ptr %a, i32 %i.010
+  %0 = load i16, ptr %arrayidx, align 2
+  %conv = zext i16 %0 to i32
+  %arrayidx1 = getelementptr inbounds nuw i16, ptr %b, i32 %i.010
+  %1 = load i16, ptr %arrayidx1, align 2
+  %conv2 = zext i16 %1 to i32
+  %add = add i32 %result.09, %conv
+  %add3 = add i32 %add, %conv2
+  %inc = add nuw i32 %i.010, 1
+  %exitcond.not = icmp eq i32 %inc, %N
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+define hidden i32 @accumulate_add_s16_s16(ptr noundef readonly  %a, ptr noundef readonly  %b, i32 noundef %N) {
+; CHECK-LABEL: accumulate_add_s16_s16:
+; CHECK: loop
+; CHECK: i32x4.load16x4_s
+; CHECK: i32x4.add
+; CHECK: i32x4.load16x4_s
+; CHECK: i32x4.add
+
+; MAX-BANDWIDTH: loop
+; MAX-BANDWIDTH: v128.load
+; MAX-BANDWIDTH: i32x4.extadd_pairwise_i16x8_s
+; MAX-BANDWIDTH: i32x4.add
+; MAX-BANDWIDTH: v128.load
+; MAX-BANDWIDTH: i32x4.extadd_pairwise_i16x8_s
+; MAX-BANDWIDTH: i32x4.add
+entry:
+  %cmp8.not = icmp eq i32 %N, 0
+  br i1 %cmp8.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  %result.0.lcssa = phi i32 [ 0, %entry ], [ %add3, %for.body ]
+  ret i32 %result.0.lcssa
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.010 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %result.09 = phi i32 [ %add3, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds nuw i16, ptr %a, i32 %i.010
+  %0 = load i16, ptr %arrayidx, align 2
+  %conv = sext i16 %0 to i32
+  %arrayidx1 = getelementptr inbounds nuw i16, ptr %b, i32 %i.010
+  %1 = load i16, ptr %arrayidx1, align 2
+  %conv2 = sext i16 %1 to i32
+  %add = add i32 %result.09, %conv
+  %add3 = add i32 %add, %conv2
+  %inc = add nuw i32 %i.010, 1
+  %exitcond.not = icmp eq i32 %inc, %N
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+define hidden i32 @accumulate_shr_u16(ptr noundef readonly  %a, i32 noundef %N) {
+; CHECK-LABEL: accumulate_shr_u16:
+; CHECK: loop
+; CHECK: v128.load64_zero
+; CHECK: i16x8.shr_u
+; CHECK: i32x4.extend_low_i16x8_u
+; CHECK: i32x4.add
+
+; MAX-BANDWIDTH: loop
+; MAX-BANDWIDTH: v128.load
+; MAX-BANDWIDTH: i16x8.shr_u
+; MAX-BANDWIDTH: i32x4.extadd_pairwise_i16x8_u
+; MAX-BANDWIDTH: i32x4.add
+entry:
+  %cmp4.not = icmp eq i32 %N, 0
+  br i1 %cmp4.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  %result.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  ret i32 %result.0.lcssa
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.06 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %result.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds nuw i16, ptr %a, i32 %i.06
+  %0 = load i16, ptr %arrayidx, align 2
+  %1 = lshr i16 %0, 1
+  %shr = zext nneg i16 %1 to i32
+  %add = add i32 %result.05, %shr
+  %inc = add nuw i32 %i.06, 1
+  %exitcond.not = icmp eq i32 %inc, %N
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+define hidden i32 @accumulate_shr_s16(ptr noundef readonly  %a, i32 noundef %N) {
+; CHECK-LABEL: accumulate_shr_s16:
+; CHECK: loop
+; CHECK: v128.load64_zero
+; CHECK: i16x8.shr_s
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: i32x4.add
+
+; MAX-BANDWIDTH: loop
+; MAX-BANDWIDTH: v128.load
+; MAX-BANDWIDTH: i16x8.shr_s
+; MAX-BANDWIDTH: i32x4.extadd_pairwise_i16x8_s
+; MAX-BANDWIDTH: i32x4.add
+entry:
+  %cmp4.not = icmp eq i32 %N, 0
+  br i1 %cmp4.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  %result.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  ret i32 %result.0.lcssa
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.06 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %result.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds nuw i16, ptr %a, i32 %i.06
+  %0 = load i16, ptr %arrayidx, align 2
+  %1 = ashr i16 %0, 1
+  %shr = sext i16 %1 to i32
+  %add = add nsw i32 %result.05, %shr
+  %inc = add nuw i32 %i.06, 1
+  %exitcond.not = icmp eq i32 %inc, %N
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+define hidden i32 @accumulate_sub_s8_s8(ptr noundef readonly  %a, ptr noundef readonly  %b, i32 noundef %N) {
+; CHECK-LABEL: accumulate_sub_s8_s8:
+; CHECK: loop
+; CHECK: v128.load32_zero
+; CHECK: i16x8.extend_low_i8x16_s
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: i32x4.add
+; CHECK: v128.load32_zero
+; CHECK: i16x8.extend_low_i8x16_s
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: i32x4.sub
+
+; MAX-BANDWIDTH: loop
+; MAX-BANDWIDTH: v128.load
+; MAX-BANDWIDTH: i8x16.shuffle	12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; MAX-BANDWIDTH: i16x8.extend_low_i8x16_s
+; MAX-BANDWIDTH: i32x4.extend_low_i16x8_s
+; MAX-BANDWIDTH: i32x4.add
+; MAX-BANDWIDTH: v128.load
+; MAX-BANDWIDTH: i8x16.shuffle	12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; MAX-BANDWIDTH: i16x8.extend_low_i8x16_s
+; MAX-BANDWIDTH: i32x4.extend_low_i16x8_s
+; MAX-BANDWIDTH: i32x4.sub
+; MAX-BANDWIDTH: i8x16.shuffle	8, 9, 10, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; MAX-BANDWIDTH: i16x8.extend_low_i8x16_s
+; MAX-BANDWIDTH: i32x4.extend_low_i16x8_s
+; MAX-BANDWIDTH: i32x4.add
+; MAX-BANDWIDTH: i8x16.shuffle	8, 9, 10, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; MAX-BANDWIDTH: i16x8.extend_low_i8x16_s
+; MAX-BANDWIDTH: i32x4.extend_low_i16x8_s
+; MAX-BANDWIDTH: i32x4.sub
+; MAX-BANDWIDTH: i8x16.shuffle	4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; MAX-BANDWIDTH: i16x8.extend_low_i8x16_s
+; MAX-BANDWIDTH: i32x4.extend_low_i16x8_s
+; MAX-BANDWIDTH: i32x4.add
+; MAX-BANDWIDTH: i8x16.shuffle	4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; MAX-BANDWIDTH: i16x8.extend_low_i8x16_s
+; MAX-BANDWIDTH: i32x4.extend_low_i16x8_s
+; MAX-BANDWIDTH: i32x4.sub
+entry:
+  %cmp7.not = icmp eq i32 %N, 0
+  br i1 %cmp7.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  %result.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  ret i32 %result.0.lcssa
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.09 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %result.08 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds nuw i8, ptr %a, i32 %i.09
+  %0 = load i8, ptr %arrayidx, align 1
+  %conv = sext i8 %0 to i32
+  %arrayidx1 = getelementptr inbounds nuw i8, ptr %b, i32 %i.09
+  %1 = load i8, ptr %arrayidx1, align 1
+  %conv2 = sext i8 %1 to i32
+  %sub = add i32 %result.08, %conv
+  %add = sub i32 %sub, %conv2
+  %inc = add nuw i32 %i.09, 1
+  %exitcond.not = icmp eq i32 %inc, %N
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+define hidden i32 @accumulate_sub_s16_s16(ptr noundef readonly  %a, ptr noundef readonly  %b, i32 noundef %N) {
+; CHECK-LABEL: accumulate_sub_s16_s16:
+; CHECK: loop
+; CHECK: i32x4.load16x4_s
+; CHECK: i32x4.add
+; CHECK: i32x4.load16x4_s
+; CHECK: i32x4.sub
+
+; MAX-BANDWIDTH: loop
+; MAX-BANDWIDTH: v128.load
+; MAX-BANDWIDTH: i32x4.extend_high_i16x8_s
+; MAX-BANDWIDTH: i32x4.add
+; MAX-BANDWIDTH: v128.load
+; MAX-BANDWIDTH: i32x4.extend_high_i16x8_s
+; MAX-BANDWIDTH: i32x4.sub
+; MAX-BANDWIDTH: i32x4.extend_low_i16x8_s
+; MAX-BANDWIDTH: i32x4.add
+; MAX-BANDWIDTH: i32x4.extend_low_i16x8_s
+; MAX-BANDWIDTH: i32x4.sub
+entry:
+  %cmp7.not = icmp eq i32 %N, 0
+  br i1 %cmp7.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  %result.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  ret i32 %result.0.lcssa
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.09 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %result.08 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds nuw i16, ptr %a, i32 %i.09
+  %0 = load i16, ptr %arrayidx, align 2
+  %conv = sext i16 %0 to i32
+  %arrayidx1 = getelementptr inbounds nuw i16, ptr %b, i32 %i.09
+  %1 = load i16, ptr %arrayidx1, align 2
+  %conv2 = sext i16 %1 to i32
+  %sub = add i32 %result.08, %conv
+  %add = sub i32 %sub, %conv2
+  %inc = add nuw i32 %i.09, 1
+  %exitcond.not = icmp eq i32 %inc, %N
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+declare i8 @llvm.umax.i8(i8, i8)
+
+declare i8 @llvm.smin.i8(i8, i8)
diff --git a/llvm/test/DebugInfo/RISCV/dw_op_entry_value_32bit.ll b/llvm/test/DebugInfo/RISCV/dw_op_entry_value_32bit.ll
new file mode 100644
index 0000000..cb7c61d
--- /dev/null
+++ b/llvm/test/DebugInfo/RISCV/dw_op_entry_value_32bit.ll
@@ -0,0 +1,65 @@
+;; Test RISC-V 32 bit:
+; RUN: llc -emit-call-site-info -stop-after=livedebugvalues -mtriple=riscv32-linux-gnu -o - %s | FileCheck %s --check-prefix=CHECK32
+
+;; Built from source:
+;; extern long fn1(long,long,long);
+;; long fn2(long a, long b, long c) {
+;;   long local = fn1(a+b, c, b+10);
+;;   if (local > 10)
+;;     return local + 10;
+;;   return b;
+;; }
+;; Using command:
+;; clang -g -O2 -target riscv32-linux-gnu m.c -c -S -emit-llvm
+;; Confirm that info from callSites attribute is used as entry_value in DIExpression.
+
+;; Test riscv32:
+; CHECK32: $x10 = nsw ADD $x11, killed renamable $x10
+; CHECK32-NEXT: DBG_VALUE $x10, $noreg, !{{.*}}, !DIExpression(DW_OP_LLVM_entry_value, 1)
+
+; ModuleID = 'm.c'
+source_filename = "m.c"
+target datalayout = "e-m:e-p:32:32-i64:64-n32-S128"
+target triple = "riscv32-unknown-linux-gnu"
+
+; Function Attrs: nounwind uwtable
+define i32 @fn2(i32 noundef %a, i32 noundef %b, i32 noundef %c) !dbg !14 {
+entry:
+    #dbg_value(i32 %a, !20, !DIExpression(), !23)
+    #dbg_value(i32 %b, !21, !DIExpression(), !23)
+    #dbg_value(i32 %c, !22, !DIExpression(), !23)
+  %add = add nsw i32 %b, %a
+  %add1 = add nsw i32 %b, 10
+  %call = tail call i32 @fn1(i32 noundef %add, i32 noundef %c, i32 noundef %add1)
+    #dbg_value(i32 %call, !22, !DIExpression(), !23)
+  %cmp = icmp sgt i32 %call, 10
+  %add2 = add nuw nsw i32 %call, 10
+  %retval.0 = select i1 %cmp, i32 %add2, i32 %b
+  ret i32 %retval.0, !dbg !29
+}
+
+declare !dbg !30 i32 @fn1(i32 noundef, i32 noundef, i32 noundef)
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2, !3, !4}
+!llvm.ident = !{!5}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C11, file: !1, producer: "clang", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None)
+!1 = !DIFile(filename: "m.c", directory: ".")
+!2 = !{i32 7, !"Dwarf Version", i32 5}
+!3 = !{i32 2, !"Debug Info Version", i32 3}
+!4 = !{i32 1, !"wchar_size", i32 4}
+!5 = !{!"clang"}
+!14 = distinct !DISubprogram(name: "fn2", scope: !1, file: !1, line: 2, type: !15, scopeLine: 2, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !18)
+!15 = !DISubroutineType(types: !16)
+!16 = !{!17, !17, !17, !17}
+!17 = !DIBasicType(name: "long", size: 64, encoding: DW_ATE_signed)
+!18 = !{!19, !20, !21, !22}
+!19 = !DILocalVariable(name: "a", arg: 1, scope: !14, file: !1, line: 2, type: !17)
+!20 = !DILocalVariable(name: "b", arg: 2, scope: !14, file: !1, line: 2, type: !17)
+!21 = !DILocalVariable(name: "c", arg: 3, scope: !14, file: !1, line: 2, type: !17)
+!22 = !DILocalVariable(name: "local", scope: !14, file: !1, line: 3, type: !17)
+!23 = !DILocation(line: 0, scope: !14)
+!29 = !DILocation(line: 7, column: 1, scope: !14)
+!30 = !DISubprogram(name: "fn1", scope: !1, file: !1, line: 1, type: !15, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized)
+
diff --git a/llvm/test/DebugInfo/RISCV/dw_op_entry_value_64bit.ll b/llvm/test/DebugInfo/RISCV/dw_op_entry_value_64bit.ll
new file mode 100644
index 0000000..cd6a765
--- /dev/null
+++ b/llvm/test/DebugInfo/RISCV/dw_op_entry_value_64bit.ll
@@ -0,0 +1,65 @@
+;; Test RISC-V 64 bit:
+; RUN: llc -emit-call-site-info -stop-after=livedebugvalues -mtriple=riscv64-linux-gnu -o - %s | FileCheck %s --check-prefix=CHECK64
+
+;; Built from source:
+;; extern long fn1(long,long,long);
+;; long fn2(long a, long b, long c) {
+;;   long local = fn1(a+b, c, b+10);
+;;   if (local > 10)
+;;     return local + 10;
+;;   return b;
+;; }
+;; Using command:
+;; clang -g -O2 -target riscv64-linux-gnu m.c -c -S -emit-llvm
+;; Confirm that info from callSites attribute is used as entry_value in DIExpression.
+
+;; Test riscv64:
+; CHECK64: $x10 = nsw ADD $x11, killed renamable $x10
+; CHECK64-NEXT: DBG_VALUE $x10, $noreg, !{{.*}}, !DIExpression(DW_OP_LLVM_entry_value, 1)
+
+; ModuleID = 'm.c'
+source_filename = "m.c"
+target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128"
+target triple = "riscv64-unknown-linux-gnu"
+
+; Function Attrs: nounwind uwtable
+define i64 @fn2(i64 noundef %a, i64 noundef %b, i64 noundef %c) !dbg !14 {
+entry:
+    #dbg_value(i64 %a, !19, !DIExpression(), !23)
+    #dbg_value(i64 %b, !20, !DIExpression(), !23)
+    #dbg_value(i64 %c, !21, !DIExpression(), !23)
+  %add = add nsw i64 %b, %a
+  %add1 = add nsw i64 %b, 10
+  %call = tail call i64 @fn1(i64 noundef %add, i64 noundef %c, i64 noundef %add1)
+    #dbg_value(i64 %call, !22, !DIExpression(), !23)
+  %cmp = icmp sgt i64 %call, 10
+  %add2 = add nuw nsw i64 %call, 10
+  %retval.0 = select i1 %cmp, i64 %add2, i64 %b
+  ret i64 %retval.0, !dbg !29
+}
+
+declare !dbg !30 i64 @fn1(i64 noundef, i64 noundef, i64 noundef)
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2, !3, !4}
+!llvm.ident = !{!5}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C11, file: !1, producer: "clang", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None)
+!1 = !DIFile(filename: "m.c", directory: ".")
+!2 = !{i32 7, !"Dwarf Version", i32 5}
+!3 = !{i32 2, !"Debug Info Version", i32 3}
+!4 = !{i32 1, !"wchar_size", i32 4}
+!5 = !{!"clang"}
+!14 = distinct !DISubprogram(name: "fn2", scope: !1, file: !1, line: 2, type: !15, scopeLine: 2, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !18)
+!15 = !DISubroutineType(types: !16)
+!16 = !{!17, !17, !17, !17}
+!17 = !DIBasicType(name: "long", size: 64, encoding: DW_ATE_signed)
+!18 = !{!19, !20, !21, !22}
+!19 = !DILocalVariable(name: "a", arg: 1, scope: !14, file: !1, line: 2, type: !17)
+!20 = !DILocalVariable(name: "b", arg: 2, scope: !14, file: !1, line: 2, type: !17)
+!21 = !DILocalVariable(name: "c", arg: 3, scope: !14, file: !1, line: 2, type: !17)
+!22 = !DILocalVariable(name: "local", scope: !14, file: !1, line: 3, type: !17)
+!23 = !DILocation(line: 0, scope: !14)
+!29 = !DILocation(line: 7, column: 1, scope: !14)
+!30 = !DISubprogram(name: "fn1", scope: !1, file: !1, line: 1, type: !15, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized)
+
diff --git a/llvm/test/DebugInfo/unrolled-loop-remainder.ll b/llvm/test/DebugInfo/unrolled-loop-remainder.ll
index f2bd855..c6035ff 100644
--- a/llvm/test/DebugInfo/unrolled-loop-remainder.ll
+++ b/llvm/test/DebugInfo/unrolled-loop-remainder.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt -passes=loop-unroll -unroll-runtime -unroll-allow-remainder -unroll-count=4 -unroll-remainder -S %s -o - | FileCheck %s
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
@@ -12,13 +12,14 @@ target triple = "x86_64-unknown-linux-gnu"
 
 define i32 @func_c() local_unnamed_addr #0 !dbg !14 {
 ;
-; CHECK-LABEL: @func_c(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[DOTPR:%.*]] = load i32, ptr @b, align 4, !dbg [[DBG17:![0-9]+]], !tbaa [[TBAA20:![0-9]+]]
+; CHECK-LABEL: define i32 @func_c(
+; CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] !dbg [[DBG14:![0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[DOTPR:%.*]] = load i32, ptr @b, align 4, !dbg [[DBG17:![0-9]+]], !tbaa [[INT_TBAA20:![0-9]+]]
 ; CHECK-NEXT:    [[TOBOOL1:%.*]] = icmp eq i32 [[DOTPR]], 0, !dbg [[DBG24:![0-9]+]]
-; CHECK-NEXT:    br i1 [[TOBOOL1]], label [[FOR_END:%.*]], label [[FOR_BODY_LR_PH:%.*]], !dbg [[DBG24]]
-; CHECK:       for.body.lr.ph:
-; CHECK-NEXT:    [[A_PROMOTED:%.*]] = load ptr, ptr @a, align 8, !dbg [[DBG25:![0-9]+]], !tbaa [[TBAA26:![0-9]+]]
+; CHECK-NEXT:    br i1 [[TOBOOL1]], label %[[FOR_END:.*]], label %[[FOR_BODY_LR_PH:.*]], !dbg [[DBG24]]
+; CHECK:       [[FOR_BODY_LR_PH]]:
+; CHECK-NEXT:    [[A_PROMOTED:%.*]] = load ptr, ptr @a, align 8, !dbg [[DBG25:![0-9]+]], !tbaa [[ANYPTR_TBAA26:![0-9]+]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = sub i32 -2, [[DOTPR]], !dbg [[DBG24]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[TMP0]], -2, !dbg [[DBG24]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[DOTPR]], [[TMP1]], !dbg [[DBG24]]
@@ -26,77 +27,77 @@ define i32 @func_c() local_unnamed_addr #0 !dbg !14 {
 ; CHECK-NEXT:    [[TMP4:%.*]] = add nuw i32 [[TMP3]], 1, !dbg [[DBG24]]
 ; CHECK-NEXT:    [[XTRAITER:%.*]] = and i32 [[TMP4]], 3, !dbg [[DBG24]]
 ; CHECK-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i32 [[XTRAITER]], 0, !dbg [[DBG24]]
-; CHECK-NEXT:    br i1 [[LCMP_MOD]], label [[FOR_BODY_PROL_PREHEADER:%.*]], label [[FOR_BODY_PROL_LOOPEXIT:%.*]], !dbg [[DBG24]]
-; CHECK:       for.body.prol.preheader:
-; CHECK-NEXT:    br label [[FOR_BODY_PROL:%.*]], !dbg [[DBG24]]
-; CHECK:       for.body.prol:
+; CHECK-NEXT:    br i1 [[LCMP_MOD]], label %[[FOR_BODY_PROL_PREHEADER:.*]], label %[[FOR_BODY_PROL_LOOPEXIT:.*]], !dbg [[DBG24]]
+; CHECK:       [[FOR_BODY_PROL_PREHEADER]]:
+; CHECK-NEXT:    br label %[[FOR_BODY_PROL:.*]], !dbg [[DBG24]]
+; CHECK:       [[FOR_BODY_PROL]]:
 ; CHECK-NEXT:    [[ARRAYIDX_PROL:%.*]] = getelementptr inbounds i32, ptr [[A_PROMOTED]], i64 1, !dbg [[DBG28:![0-9]+]]
-; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX_PROL]], align 4, !dbg [[DBG28]], !tbaa [[TBAA20]]
+; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX_PROL]], align 4, !dbg [[DBG28]], !tbaa [[INT_TBAA20]]
 ; CHECK-NEXT:    [[CONV_PROL:%.*]] = sext i32 [[TMP5]] to i64, !dbg [[DBG28]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[CONV_PROL]] to ptr, !dbg [[DBG28]]
 ; CHECK-NEXT:    [[ADD_PROL:%.*]] = add nsw i32 [[DOTPR]], 2, !dbg [[DBG29:![0-9]+]]
 ; CHECK-NEXT:    [[PROL_ITER_CMP:%.*]] = icmp ne i32 1, [[XTRAITER]], !dbg [[DBG24]]
-; CHECK-NEXT:    br i1 [[PROL_ITER_CMP]], label [[FOR_BODY_PROL_1:%.*]], label [[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA:%.*]], !dbg [[DBG24]]
-; CHECK:       for.body.prol.1:
+; CHECK-NEXT:    br i1 [[PROL_ITER_CMP]], label %[[FOR_BODY_PROL_1:.*]], label %[[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA:.*]], !dbg [[DBG24]]
+; CHECK:       [[FOR_BODY_PROL_1]]:
 ; CHECK-NEXT:    [[ARRAYIDX_PROL_1:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 1, !dbg [[DBG28]]
-; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX_PROL_1]], align 4, !dbg [[DBG28]], !tbaa [[TBAA20]]
+; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX_PROL_1]], align 4, !dbg [[DBG28]], !tbaa [[INT_TBAA20]]
 ; CHECK-NEXT:    [[CONV_PROL_1:%.*]] = sext i32 [[TMP7]] to i64, !dbg [[DBG28]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = inttoptr i64 [[CONV_PROL_1]] to ptr, !dbg [[DBG28]]
 ; CHECK-NEXT:    [[ADD_PROL_1:%.*]] = add nsw i32 [[DOTPR]], 4, !dbg [[DBG29]]
 ; CHECK-NEXT:    [[PROL_ITER_CMP_1:%.*]] = icmp ne i32 2, [[XTRAITER]], !dbg [[DBG24]]
-; CHECK-NEXT:    br i1 [[PROL_ITER_CMP_1]], label [[FOR_BODY_PROL_2:%.*]], label [[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA]], !dbg [[DBG24]]
-; CHECK:       for.body.prol.2:
+; CHECK-NEXT:    br i1 [[PROL_ITER_CMP_1]], label %[[FOR_BODY_PROL_2:.*]], label %[[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA]], !dbg [[DBG24]]
+; CHECK:       [[FOR_BODY_PROL_2]]:
 ; CHECK-NEXT:    [[ARRAYIDX_PROL_2:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 1, !dbg [[DBG28]]
-; CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX_PROL_2]], align 4, !dbg [[DBG28]], !tbaa [[TBAA20]]
+; CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX_PROL_2]], align 4, !dbg [[DBG28]], !tbaa [[INT_TBAA20]]
 ; CHECK-NEXT:    [[CONV_PROL_2:%.*]] = sext i32 [[TMP9]] to i64, !dbg [[DBG28]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = inttoptr i64 [[CONV_PROL_2]] to ptr, !dbg [[DBG28]]
 ; CHECK-NEXT:    [[ADD_PROL_2:%.*]] = add nsw i32 [[DOTPR]], 6, !dbg [[DBG29]]
-; CHECK-NEXT:    br label [[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA]]
-; CHECK:       for.body.prol.loopexit.unr-lcssa:
-; CHECK-NEXT:    [[DOTLCSSA_UNR_PH:%.*]] = phi ptr [ [[TMP6]], [[FOR_BODY_PROL]] ], [ [[TMP8]], [[FOR_BODY_PROL_1]] ], [ [[TMP10]], [[FOR_BODY_PROL_2]] ]
-; CHECK-NEXT:    [[DOTUNR_PH:%.*]] = phi ptr [ [[TMP6]], [[FOR_BODY_PROL]] ], [ [[TMP8]], [[FOR_BODY_PROL_1]] ], [ [[TMP10]], [[FOR_BODY_PROL_2]] ]
-; CHECK-NEXT:    [[DOTUNR1_PH:%.*]] = phi i32 [ [[ADD_PROL]], [[FOR_BODY_PROL]] ], [ [[ADD_PROL_1]], [[FOR_BODY_PROL_1]] ], [ [[ADD_PROL_2]], [[FOR_BODY_PROL_2]] ]
-; CHECK-NEXT:    br label [[FOR_BODY_PROL_LOOPEXIT]], !dbg [[DBG24]]
-; CHECK:       for.body.prol.loopexit:
-; CHECK-NEXT:    [[DOTLCSSA_UNR:%.*]] = phi ptr [ poison, [[FOR_BODY_LR_PH]] ], [ [[DOTLCSSA_UNR_PH]], [[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA]] ]
-; CHECK-NEXT:    [[DOTUNR:%.*]] = phi ptr [ [[A_PROMOTED]], [[FOR_BODY_LR_PH]] ], [ [[DOTUNR_PH]], [[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA]] ]
-; CHECK-NEXT:    [[DOTUNR1:%.*]] = phi i32 [ [[DOTPR]], [[FOR_BODY_LR_PH]] ], [ [[DOTUNR1_PH]], [[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA]] ]
+; CHECK-NEXT:    br label %[[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA]], !dbg [[DBG24]]
+; CHECK:       [[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA]]:
+; CHECK-NEXT:    [[DOTLCSSA_UNR_PH:%.*]] = phi ptr [ [[TMP6]], %[[FOR_BODY_PROL]] ], [ [[TMP8]], %[[FOR_BODY_PROL_1]] ], [ [[TMP10]], %[[FOR_BODY_PROL_2]] ]
+; CHECK-NEXT:    [[DOTUNR_PH:%.*]] = phi ptr [ [[TMP6]], %[[FOR_BODY_PROL]] ], [ [[TMP8]], %[[FOR_BODY_PROL_1]] ], [ [[TMP10]], %[[FOR_BODY_PROL_2]] ]
+; CHECK-NEXT:    [[DOTUNR1_PH:%.*]] = phi i32 [ [[ADD_PROL]], %[[FOR_BODY_PROL]] ], [ [[ADD_PROL_1]], %[[FOR_BODY_PROL_1]] ], [ [[ADD_PROL_2]], %[[FOR_BODY_PROL_2]] ]
+; CHECK-NEXT:    br label %[[FOR_BODY_PROL_LOOPEXIT]], !dbg [[DBG24]]
+; CHECK:       [[FOR_BODY_PROL_LOOPEXIT]]:
+; CHECK-NEXT:    [[DOTLCSSA_UNR:%.*]] = phi ptr [ poison, %[[FOR_BODY_LR_PH]] ], [ [[DOTLCSSA_UNR_PH]], %[[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA]] ]
+; CHECK-NEXT:    [[DOTUNR:%.*]] = phi ptr [ [[A_PROMOTED]], %[[FOR_BODY_LR_PH]] ], [ [[DOTUNR_PH]], %[[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA]] ]
+; CHECK-NEXT:    [[DOTUNR1:%.*]] = phi i32 [ [[DOTPR]], %[[FOR_BODY_LR_PH]] ], [ [[DOTUNR1_PH]], %[[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA]] ]
 ; CHECK-NEXT:    [[TMP11:%.*]] = icmp ult i32 [[TMP3]], 3, !dbg [[DBG24]]
-; CHECK-NEXT:    br i1 [[TMP11]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY_LR_PH_NEW:%.*]], !dbg [[DBG24]]
-; CHECK:       for.body.lr.ph.new:
-; CHECK-NEXT:    br label [[FOR_BODY:%.*]], !dbg [[DBG24]]
-; CHECK:       for.body:
-; CHECK-NEXT:    [[TMP12:%.*]] = phi ptr [ [[DOTUNR]], [[FOR_BODY_LR_PH_NEW]] ], [ [[TMP21:%.*]], [[FOR_BODY]] ], !dbg [[DBG28]]
-; CHECK-NEXT:    [[TMP13:%.*]] = phi i32 [ [[DOTUNR1]], [[FOR_BODY_LR_PH_NEW]] ], [ [[ADD_3:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    br i1 [[TMP11]], label %[[FOR_COND_FOR_END_CRIT_EDGE:.*]], label %[[FOR_BODY_LR_PH_NEW:.*]], !dbg [[DBG24]]
+; CHECK:       [[FOR_BODY_LR_PH_NEW]]:
+; CHECK-NEXT:    br label %[[FOR_BODY:.*]], !dbg [[DBG24]]
+; CHECK:       [[FOR_BODY]]:
+; CHECK-NEXT:    [[TMP12:%.*]] = phi ptr [ [[DOTUNR]], %[[FOR_BODY_LR_PH_NEW]] ], [ [[TMP21:%.*]], %[[FOR_BODY]] ], !dbg [[DBG28]]
+; CHECK-NEXT:    [[TMP13:%.*]] = phi i32 [ [[DOTUNR1]], %[[FOR_BODY_LR_PH_NEW]] ], [ [[ADD_3:%.*]], %[[FOR_BODY]] ]
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i64 1, !dbg [[DBG28]]
-; CHECK-NEXT:    [[TMP14:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !dbg [[DBG28]], !tbaa [[TBAA20]]
+; CHECK-NEXT:    [[TMP14:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !dbg [[DBG28]], !tbaa [[INT_TBAA20]]
 ; CHECK-NEXT:    [[CONV:%.*]] = sext i32 [[TMP14]] to i64, !dbg [[DBG28]]
 ; CHECK-NEXT:    [[TMP15:%.*]] = inttoptr i64 [[CONV]] to ptr, !dbg [[DBG28]]
 ; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i64 1, !dbg [[DBG28]]
-; CHECK-NEXT:    [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX_1]], align 4, !dbg [[DBG28]], !tbaa [[TBAA20]]
+; CHECK-NEXT:    [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX_1]], align 4, !dbg [[DBG28]], !tbaa [[INT_TBAA20]]
 ; CHECK-NEXT:    [[CONV_1:%.*]] = sext i32 [[TMP16]] to i64, !dbg [[DBG28]]
 ; CHECK-NEXT:    [[TMP17:%.*]] = inttoptr i64 [[CONV_1]] to ptr, !dbg [[DBG28]]
 ; CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i64 1, !dbg [[DBG28]]
-; CHECK-NEXT:    [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX_2]], align 4, !dbg [[DBG28]], !tbaa [[TBAA20]]
+; CHECK-NEXT:    [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX_2]], align 4, !dbg [[DBG28]], !tbaa [[INT_TBAA20]]
 ; CHECK-NEXT:    [[CONV_2:%.*]] = sext i32 [[TMP18]] to i64, !dbg [[DBG28]]
 ; CHECK-NEXT:    [[TMP19:%.*]] = inttoptr i64 [[CONV_2]] to ptr, !dbg [[DBG28]]
 ; CHECK-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i64 1, !dbg [[DBG28]]
-; CHECK-NEXT:    [[TMP20:%.*]] = load i32, ptr [[ARRAYIDX_3]], align 4, !dbg [[DBG28]], !tbaa [[TBAA20]]
+; CHECK-NEXT:    [[TMP20:%.*]] = load i32, ptr [[ARRAYIDX_3]], align 4, !dbg [[DBG28]], !tbaa [[INT_TBAA20]]
 ; CHECK-NEXT:    [[CONV_3:%.*]] = sext i32 [[TMP20]] to i64, !dbg [[DBG28]]
 ; CHECK-NEXT:    [[TMP21]] = inttoptr i64 [[CONV_3]] to ptr, !dbg [[DBG28]]
 ; CHECK-NEXT:    [[ADD_3]] = add nsw i32 [[TMP13]], 8, !dbg [[DBG29]]
 ; CHECK-NEXT:    [[TOBOOL_3:%.*]] = icmp eq i32 [[ADD_3]], 0, !dbg [[DBG24]]
-; CHECK-NEXT:    br i1 [[TOBOOL_3]], label [[FOR_COND_FOR_END_CRIT_EDGE_UNR_LCSSA:%.*]], label [[FOR_BODY]], !dbg [[DBG24]], !llvm.loop [[LOOP30:![0-9]+]]
-; CHECK:       for.cond.for.end_crit_edge.unr-lcssa:
-; CHECK-NEXT:    [[DOTLCSSA_PH:%.*]] = phi ptr [ [[TMP21]], [[FOR_BODY]] ]
-; CHECK-NEXT:    br label [[FOR_COND_FOR_END_CRIT_EDGE]], !dbg [[DBG24]]
-; CHECK:       for.cond.for.end_crit_edge:
-; CHECK-NEXT:    [[DOTLCSSA:%.*]] = phi ptr [ [[DOTLCSSA_UNR]], [[FOR_BODY_PROL_LOOPEXIT]] ], [ [[DOTLCSSA_PH]], [[FOR_COND_FOR_END_CRIT_EDGE_UNR_LCSSA]] ], !dbg [[DBG28]]
+; CHECK-NEXT:    br i1 [[TOBOOL_3]], label %[[FOR_COND_FOR_END_CRIT_EDGE_UNR_LCSSA:.*]], label %[[FOR_BODY]], !dbg [[DBG24]], !llvm.loop [[LOOP30:![0-9]+]]
+; CHECK:       [[FOR_COND_FOR_END_CRIT_EDGE_UNR_LCSSA]]:
+; CHECK-NEXT:    [[DOTLCSSA_PH:%.*]] = phi ptr [ [[TMP21]], %[[FOR_BODY]] ]
+; CHECK-NEXT:    br label %[[FOR_COND_FOR_END_CRIT_EDGE]], !dbg [[DBG24]]
+; CHECK:       [[FOR_COND_FOR_END_CRIT_EDGE]]:
+; CHECK-NEXT:    [[DOTLCSSA:%.*]] = phi ptr [ [[DOTLCSSA_UNR]], %[[FOR_BODY_PROL_LOOPEXIT]] ], [ [[DOTLCSSA_PH]], %[[FOR_COND_FOR_END_CRIT_EDGE_UNR_LCSSA]] ], !dbg [[DBG28]]
 ; CHECK-NEXT:    [[TMP22:%.*]] = add i32 [[TMP2]], 2, !dbg [[DBG24]]
-; CHECK-NEXT:    store ptr [[DOTLCSSA]], ptr @a, align 8, !dbg [[DBG25]], !tbaa [[TBAA26]]
-; CHECK-NEXT:    store i32 [[TMP22]], ptr @b, align 4, !dbg [[DBG33:![0-9]+]], !tbaa [[TBAA20]]
-; CHECK-NEXT:    br label [[FOR_END]], !dbg [[DBG24]]
-; CHECK:       for.end:
+; CHECK-NEXT:    store ptr [[DOTLCSSA]], ptr @a, align 8, !dbg [[DBG25]], !tbaa [[ANYPTR_TBAA26]]
+; CHECK-NEXT:    store i32 [[TMP22]], ptr @b, align 4, !dbg [[DBG33:![0-9]+]], !tbaa [[INT_TBAA20]]
+; CHECK-NEXT:    br label %[[FOR_END]], !dbg [[DBG24]]
+; CHECK:       [[FOR_END]]:
 ; CHECK-NEXT:    ret i32 undef, !dbg [[DBG34:![0-9]+]]
 ;
 entry:
@@ -134,8 +135,9 @@ for.end:
 
 define void @func_d() local_unnamed_addr #1 !dbg !34 {
 ;
-; CHECK-LABEL: @func_d(
-; CHECK-NEXT:  entry:
+; CHECK-LABEL: define void @func_d(
+; CHECK-SAME: ) local_unnamed_addr !dbg [[DBG35:![0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    ret void, !dbg [[DBG38:![0-9]+]]
 ;
 entry:
diff --git a/llvm/test/Instrumentation/MemorySanitizer/vector-track-origins-neon.ll b/llvm/test/Instrumentation/MemorySanitizer/vector-track-origins-neon.ll
index 05d4d2a..48de5d1 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/vector-track-origins-neon.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/vector-track-origins-neon.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt < %s -S -passes="msan<eager-checks;track-origins=2>" -msan-instrumentation-with-call-threshold=0 | FileCheck %s
 ;
 ; This test illustrates a bug in MemorySanitizer that will shortly be fixed
@@ -16,7 +16,7 @@ define dso_local void @_Z1cv() local_unnamed_addr #0 {
 ; CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:  [[ENTRY:.*]]:
 ; CHECK-NEXT:    call void @llvm.donothing()
-; CHECK-NEXT:    [[DOTPRE:%.*]] = load <4 x i16>, ptr @_Z1cv, align 8, !tbaa [[TBAA1:![0-9]+]]
+; CHECK-NEXT:    [[DOTPRE:%.*]] = load <4 x i16>, ptr @_Z1cv, align 8, !tbaa [[CHAR_TBAA1:![0-9]+]]
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr inttoptr (i64 xor (i64 ptrtoint (ptr @_Z1cv to i64), i64 193514046488576) to ptr), align 8
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr inttoptr (i64 add (i64 xor (i64 ptrtoint (ptr @_Z1cv to i64), i64 193514046488576), i64 35184372088832) to ptr), align 8
 ; CHECK-NEXT:    br label %[[FOR_COND:.*]]
@@ -36,7 +36,7 @@ define dso_local void @_Z1cv() local_unnamed_addr #0 {
 ; CHECK-NEXT:    [[CALL:%.*]] = tail call noundef i32 @_Z1b11__Int16x4_tS_(<4 x i16> noundef [[TMP1]], <4 x i16> noundef [[LANE]])
 ; CHECK-NEXT:    [[CONV:%.*]] = sext i32 [[CALL]] to i64
 ; CHECK-NEXT:    [[TMP4:%.*]] = inttoptr i64 [[CONV]] to ptr
-; CHECK-NEXT:    [[TMP5]] = load <4 x i16>, ptr [[TMP4]], align 8, !tbaa [[TBAA1]]
+; CHECK-NEXT:    [[TMP5]] = load <4 x i16>, ptr [[TMP4]], align 8, !tbaa [[CHAR_TBAA1]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = ptrtoint ptr [[TMP4]] to i64
 ; CHECK-NEXT:    [[TMP7:%.*]] = xor i64 [[TMP6]], 193514046488576
 ; CHECK-NEXT:    [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
@@ -47,7 +47,7 @@ define dso_local void @_Z1cv() local_unnamed_addr #0 {
 ; CHECK-NEXT:    store <4 x i16> [[_MSLD3]], ptr inttoptr (i64 xor (i64 ptrtoint (ptr @_Z1cv to i64), i64 193514046488576) to ptr), align 8
 ; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <4 x i16> [[_MSLD3]] to i64
 ; CHECK-NEXT:    call void @__msan_maybe_store_origin_8(i64 zeroext [[TMP12]], ptr @_Z1cv, i32 zeroext [[TMP11]])
-; CHECK-NEXT:    store <4 x i16> [[TMP5]], ptr @_Z1cv, align 8, !tbaa [[TBAA1]]
+; CHECK-NEXT:    store <4 x i16> [[TMP5]], ptr @_Z1cv, align 8, !tbaa [[CHAR_TBAA1]]
 ; CHECK-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]]
 ;
 entry:
@@ -76,7 +76,7 @@ attributes #0 = { mustprogress noreturn nounwind sanitize_memory "no-trapping-ma
 !5 = distinct !{!5, !6}
 !6 = !{!"llvm.loop.mustprogress"}
 ;.
-; CHECK: [[TBAA1]] = !{[[META2:![0-9]+]], [[META2]], i64 0}
+; CHECK: [[CHAR_TBAA1]] = !{[[META2:![0-9]+]], [[META2]], i64 0}
 ; CHECK: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0}
 ; CHECK: [[META3]] = !{!"Simple C++ TBAA"}
 ; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META5:![0-9]+]]}
diff --git a/llvm/test/Instrumentation/TypeSanitizer/access-with-offset.ll b/llvm/test/Instrumentation/TypeSanitizer/access-with-offset.ll
index 56cf3f5..84e0f73 100644
--- a/llvm/test/Instrumentation/TypeSanitizer/access-with-offset.ll
+++ b/llvm/test/Instrumentation/TypeSanitizer/access-with-offset.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 6
 ; RUN: opt -passes='tysan' -S %s | FileCheck %s
 
 ;.
@@ -12,8 +12,9 @@
 ; CHECK: @__tysan_app_memory_mask = external global i64
 ;.
 define ptr @test_load_offset(ptr %argv) {
-; CHECK-LABEL: @test_load_offset(
-; CHECK-NEXT:  entry:
+; CHECK-LABEL: define ptr @test_load_offset(
+; CHECK-SAME: ptr [[ARGV:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[APP_MEM_MASK:%.*]] = load i64, ptr @__tysan_app_memory_mask, align 4
 ; CHECK-NEXT:    [[SHADOW_BASE:%.*]] = load i64, ptr @__tysan_shadow_memory_address, align 4
 ; CHECK-NEXT:    [[APP_PTR_MASKED:%.*]] = and i64 0, [[APP_MEM_MASK]]
@@ -22,8 +23,8 @@ define ptr @test_load_offset(ptr %argv) {
 ; CHECK-NEXT:    [[SHADOW_PTR:%.*]] = inttoptr i64 [[SHADOW_PTR_INT]] to ptr
 ; CHECK-NEXT:    [[SHADOW_DESC:%.*]] = load ptr, ptr [[SHADOW_PTR]], align 8
 ; CHECK-NEXT:    [[DESC_SET:%.*]] = icmp eq ptr [[SHADOW_DESC]], null
-; CHECK-NEXT:    br i1 [[DESC_SET]], label [[SET_TYPE:%.*]], label [[TMP0:%.*]], !prof [[PROF0:![0-9]+]]
-; CHECK:       set.type:
+; CHECK-NEXT:    br i1 [[DESC_SET]], label %[[SET_TYPE:.*]], label %[[BB0:.*]], !prof [[PROF0:![0-9]+]]
+; CHECK:       [[SET_TYPE]]:
 ; CHECK-NEXT:    store ptr @__tysan_v1_any_20pointer_o_0, ptr [[SHADOW_PTR]], align 8
 ; CHECK-NEXT:    [[SHADOW_BYTE_1_OFFSET:%.*]] = add i64 [[SHADOW_PTR_INT]], 8
 ; CHECK-NEXT:    [[SHADOW_BYTE_1_PTR:%.*]] = inttoptr i64 [[SHADOW_BYTE_1_OFFSET]] to ptr
@@ -46,9 +47,9 @@ define ptr @test_load_offset(ptr %argv) {
 ; CHECK-NEXT:    [[SHADOW_BYTE_7_OFFSET:%.*]] = add i64 [[SHADOW_PTR_INT]], 56
 ; CHECK-NEXT:    [[SHADOW_BYTE_7_PTR:%.*]] = inttoptr i64 [[SHADOW_BYTE_7_OFFSET]] to ptr
 ; CHECK-NEXT:    store ptr inttoptr (i64 -7 to ptr), ptr [[SHADOW_BYTE_7_PTR]], align 8
-; CHECK-NEXT:    br label [[TMP0]]
-; CHECK:       0:
-; CHECK-NEXT:    [[L:%.*]] = load ptr, ptr null, align 8, !tbaa [[TBAA1:![0-9]+]]
+; CHECK-NEXT:    br label %[[BB0]]
+; CHECK:       [[BB0]]:
+; CHECK-NEXT:    [[L:%.*]] = load ptr, ptr null, align 8, !tbaa [[ANYPTR_TBAA1:![0-9]+]]
 ; CHECK-NEXT:    ret ptr [[L]]
 ;
 entry:
@@ -64,7 +65,7 @@ entry:
 ; CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind }
 ;.
 ; CHECK: [[PROF0]] = !{!"branch_weights", i32 1, i32 100000}
-; CHECK: [[TBAA1]] = !{[[META2:![0-9]+]], [[META2]], i64 0}
+; CHECK: [[ANYPTR_TBAA1]] = !{[[META2:![0-9]+]], [[META2]], i64 0}
 ; CHECK: [[META2]] = !{!"any pointer", [[META3:![0-9]+]], i64 0}
 ; CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0}
 ; CHECK: [[META4]] = !{!"Simple C/C++ TBAA"}
diff --git a/llvm/test/Instrumentation/TypeSanitizer/anon.ll b/llvm/test/Instrumentation/TypeSanitizer/anon.ll
index 37de1b7..1f0f1bd 100644
--- a/llvm/test/Instrumentation/TypeSanitizer/anon.ll
+++ b/llvm/test/Instrumentation/TypeSanitizer/anon.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 6
 ; Test basic type sanitizer instrumentation.
 ;
 ; RUN: opt -passes='tysan' -S %s | FileCheck %s
@@ -23,22 +23,23 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 ; CHECK: @llvm.used = appending global [6 x ptr] [ptr @tysan.module_ctor, ptr @__tysan_v1_Simple_20C_2b_2b_20TBAA, ptr @__tysan_v1_omnipotent_20char, ptr @__tysan_v1_int, ptr @__tysan_v1_____anonymous__027d9e575c5d34cb5d60d6a1d6276f95, ptr @__tysan_v1_____anonymous__027d9e575c5d34cb5d60d6a1d6276f95_o_24], section "llvm.metadata"
 ;.
 define void @test_anon_ns(ptr %a, ptr %b) sanitize_type {
-; CHECK-LABEL: @test_anon_ns(
-; CHECK-NEXT:  entry:
+; CHECK-LABEL: define void @test_anon_ns(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[APP_MEM_MASK:%.*]] = load i64, ptr @__tysan_app_memory_mask, align 8
 ; CHECK-NEXT:    [[SHADOW_BASE:%.*]] = load i64, ptr @__tysan_shadow_memory_address, align 8
-; CHECK-NEXT:    [[APP_PTR_INT:%.*]] = ptrtoint ptr [[A:%.*]] to i64
+; CHECK-NEXT:    [[APP_PTR_INT:%.*]] = ptrtoint ptr [[A]] to i64
 ; CHECK-NEXT:    [[APP_PTR_MASKED:%.*]] = and i64 [[APP_PTR_INT]], [[APP_MEM_MASK]]
 ; CHECK-NEXT:    [[APP_PTR_SHIFTED:%.*]] = shl i64 [[APP_PTR_MASKED]], 3
 ; CHECK-NEXT:    [[SHADOW_PTR_INT:%.*]] = add i64 [[APP_PTR_SHIFTED]], [[SHADOW_BASE]]
 ; CHECK-NEXT:    [[SHADOW_PTR:%.*]] = inttoptr i64 [[SHADOW_PTR_INT]] to ptr
 ; CHECK-NEXT:    [[SHADOW_DESC:%.*]] = load ptr, ptr [[SHADOW_PTR]], align 8
 ; CHECK-NEXT:    [[BAD_DESC:%.*]] = icmp ne ptr [[SHADOW_DESC]], @__tysan_v1___ZTSN12__GLOBAL____N__11zE_o_24
-; CHECK-NEXT:    br i1 [[BAD_DESC]], label [[TMP0:%.*]], label [[TMP22:%.*]], !prof [[PROF0:![0-9]+]]
-; CHECK:       0:
+; CHECK-NEXT:    br i1 [[BAD_DESC]], label %[[BB0:.*]], label %[[BB22:.*]], !prof [[PROF0:![0-9]+]]
+; CHECK:       [[BB0]]:
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq ptr [[SHADOW_DESC]], null
-; CHECK-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP20:%.*]]
-; CHECK:       2:
+; CHECK-NEXT:    br i1 [[TMP1]], label %[[BB2:.*]], label %[[BB20:.*]]
+; CHECK:       [[BB2]]:
 ; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[SHADOW_PTR_INT]], 8
 ; CHECK-NEXT:    [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr
 ; CHECK-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
@@ -54,11 +55,11 @@ define void @test_anon_ns(ptr %a, ptr %b) sanitize_type {
 ; CHECK-NEXT:    [[TMP15:%.*]] = load ptr, ptr [[TMP14]], align 8
 ; CHECK-NEXT:    [[TMP16:%.*]] = icmp ne ptr [[TMP15]], null
 ; CHECK-NEXT:    [[TMP17:%.*]] = or i1 [[TMP12]], [[TMP16]]
-; CHECK-NEXT:    br i1 [[TMP17]], label [[TMP18:%.*]], label [[TMP19:%.*]], !prof [[PROF0]]
-; CHECK:       18:
+; CHECK-NEXT:    br i1 [[TMP17]], label %[[BB18:.*]], label %[[BB19:.*]], !prof [[PROF0]]
+; CHECK:       [[BB18]]:
 ; CHECK-NEXT:    call void @__tysan_check(ptr [[A]], i32 4, ptr @__tysan_v1___ZTSN12__GLOBAL____N__11zE_o_24, i32 2)
-; CHECK-NEXT:    br label [[TMP19]]
-; CHECK:       19:
+; CHECK-NEXT:    br label %[[BB19]]
+; CHECK:       [[BB19]]:
 ; CHECK-NEXT:    store ptr @__tysan_v1___ZTSN12__GLOBAL____N__11zE_o_24, ptr [[SHADOW_PTR]], align 8
 ; CHECK-NEXT:    [[SHADOW_BYTE_1_OFFSET:%.*]] = add i64 [[SHADOW_PTR_INT]], 8
 ; CHECK-NEXT:    [[SHADOW_BYTE_1_PTR:%.*]] = inttoptr i64 [[SHADOW_BYTE_1_OFFSET]] to ptr
@@ -69,13 +70,13 @@ define void @test_anon_ns(ptr %a, ptr %b) sanitize_type {
 ; CHECK-NEXT:    [[SHADOW_BYTE_3_OFFSET:%.*]] = add i64 [[SHADOW_PTR_INT]], 24
 ; CHECK-NEXT:    [[SHADOW_BYTE_3_PTR:%.*]] = inttoptr i64 [[SHADOW_BYTE_3_OFFSET]] to ptr
 ; CHECK-NEXT:    store ptr inttoptr (i64 -3 to ptr), ptr [[SHADOW_BYTE_3_PTR]], align 8
-; CHECK-NEXT:    br label [[TMP21:%.*]]
-; CHECK:       20:
+; CHECK-NEXT:    br label %[[BB21:.*]]
+; CHECK:       [[BB20]]:
 ; CHECK-NEXT:    call void @__tysan_check(ptr [[A]], i32 4, ptr @__tysan_v1___ZTSN12__GLOBAL____N__11zE_o_24, i32 2)
-; CHECK-NEXT:    br label [[TMP21]]
-; CHECK:       21:
-; CHECK-NEXT:    br label [[TMP43:%.*]]
-; CHECK:       22:
+; CHECK-NEXT:    br label %[[BB21]]
+; CHECK:       [[BB21]]:
+; CHECK-NEXT:    br label %[[BB43:.*]]
+; CHECK:       [[BB22]]:
 ; CHECK-NEXT:    [[TMP23:%.*]] = add i64 [[SHADOW_PTR_INT]], 8
 ; CHECK-NEXT:    [[TMP24:%.*]] = inttoptr i64 [[TMP23]] to ptr
 ; CHECK-NEXT:    [[TMP25:%.*]] = load ptr, ptr [[TMP24]], align 8
@@ -94,26 +95,26 @@ define void @test_anon_ns(ptr %a, ptr %b) sanitize_type {
 ; CHECK-NEXT:    [[TMP38:%.*]] = ptrtoint ptr [[TMP37]] to i64
 ; CHECK-NEXT:    [[TMP39:%.*]] = icmp sge i64 [[TMP38]], 0
 ; CHECK-NEXT:    [[TMP40:%.*]] = or i1 [[TMP34]], [[TMP39]]
-; CHECK-NEXT:    br i1 [[TMP40]], label [[TMP41:%.*]], label [[TMP42:%.*]], !prof [[PROF0]]
-; CHECK:       41:
+; CHECK-NEXT:    br i1 [[TMP40]], label %[[BB41:.*]], label %[[BB42:.*]], !prof [[PROF0]]
+; CHECK:       [[BB41]]:
 ; CHECK-NEXT:    call void @__tysan_check(ptr [[A]], i32 4, ptr @__tysan_v1___ZTSN12__GLOBAL____N__11zE_o_24, i32 2)
-; CHECK-NEXT:    br label [[TMP42]]
-; CHECK:       42:
-; CHECK-NEXT:    br label [[TMP43]]
-; CHECK:       43:
-; CHECK-NEXT:    store i32 42, ptr [[A]], align 4, !tbaa [[TBAA1:![0-9]+]]
-; CHECK-NEXT:    [[APP_PTR_INT1:%.*]] = ptrtoint ptr [[B:%.*]] to i64
+; CHECK-NEXT:    br label %[[BB42]]
+; CHECK:       [[BB42]]:
+; CHECK-NEXT:    br label %[[BB43]]
+; CHECK:       [[BB43]]:
+; CHECK-NEXT:    store i32 42, ptr [[A]], align 4, !tbaa [[INT_TBAA1:![0-9]+]]
+; CHECK-NEXT:    [[APP_PTR_INT1:%.*]] = ptrtoint ptr [[B]] to i64
 ; CHECK-NEXT:    [[APP_PTR_MASKED2:%.*]] = and i64 [[APP_PTR_INT1]], [[APP_MEM_MASK]]
 ; CHECK-NEXT:    [[APP_PTR_SHIFTED3:%.*]] = shl i64 [[APP_PTR_MASKED2]], 3
 ; CHECK-NEXT:    [[SHADOW_PTR_INT4:%.*]] = add i64 [[APP_PTR_SHIFTED3]], [[SHADOW_BASE]]
 ; CHECK-NEXT:    [[SHADOW_PTR5:%.*]] = inttoptr i64 [[SHADOW_PTR_INT4]] to ptr
 ; CHECK-NEXT:    [[SHADOW_DESC6:%.*]] = load ptr, ptr [[SHADOW_PTR5]], align 8
 ; CHECK-NEXT:    [[BAD_DESC7:%.*]] = icmp ne ptr [[SHADOW_DESC6]], @__tysan_v1___ZTS1yIN12__GLOBAL____N__11zEE_o_24
-; CHECK-NEXT:    br i1 [[BAD_DESC7]], label [[TMP44:%.*]], label [[TMP66:%.*]], !prof [[PROF0]]
-; CHECK:       44:
+; CHECK-NEXT:    br i1 [[BAD_DESC7]], label %[[BB44:.*]], label %[[BB66:.*]], !prof [[PROF0]]
+; CHECK:       [[BB44]]:
 ; CHECK-NEXT:    [[TMP45:%.*]] = icmp eq ptr [[SHADOW_DESC6]], null
-; CHECK-NEXT:    br i1 [[TMP45]], label [[TMP46:%.*]], label [[TMP64:%.*]]
-; CHECK:       46:
+; CHECK-NEXT:    br i1 [[TMP45]], label %[[BB46:.*]], label %[[BB64:.*]]
+; CHECK:       [[BB46]]:
 ; CHECK-NEXT:    [[TMP47:%.*]] = add i64 [[SHADOW_PTR_INT4]], 8
 ; CHECK-NEXT:    [[TMP48:%.*]] = inttoptr i64 [[TMP47]] to ptr
 ; CHECK-NEXT:    [[TMP49:%.*]] = load ptr, ptr [[TMP48]], align 8
@@ -129,11 +130,11 @@ define void @test_anon_ns(ptr %a, ptr %b) sanitize_type {
 ; CHECK-NEXT:    [[TMP59:%.*]] = load ptr, ptr [[TMP58]], align 8
 ; CHECK-NEXT:    [[TMP60:%.*]] = icmp ne ptr [[TMP59]], null
 ; CHECK-NEXT:    [[TMP61:%.*]] = or i1 [[TMP56]], [[TMP60]]
-; CHECK-NEXT:    br i1 [[TMP61]], label [[TMP62:%.*]], label [[TMP63:%.*]], !prof [[PROF0]]
-; CHECK:       62:
+; CHECK-NEXT:    br i1 [[TMP61]], label %[[BB62:.*]], label %[[BB63:.*]], !prof [[PROF0]]
+; CHECK:       [[BB62]]:
 ; CHECK-NEXT:    call void @__tysan_check(ptr [[B]], i32 4, ptr @__tysan_v1___ZTS1yIN12__GLOBAL____N__11zEE_o_24, i32 2)
-; CHECK-NEXT:    br label [[TMP63]]
-; CHECK:       63:
+; CHECK-NEXT:    br label %[[BB63]]
+; CHECK:       [[BB63]]:
 ; CHECK-NEXT:    store ptr @__tysan_v1___ZTS1yIN12__GLOBAL____N__11zEE_o_24, ptr [[SHADOW_PTR5]], align 8
 ; CHECK-NEXT:    [[SHADOW_BYTE_1_OFFSET8:%.*]] = add i64 [[SHADOW_PTR_INT4]], 8
 ; CHECK-NEXT:    [[SHADOW_BYTE_1_PTR9:%.*]] = inttoptr i64 [[SHADOW_BYTE_1_OFFSET8]] to ptr
@@ -144,13 +145,13 @@ define void @test_anon_ns(ptr %a, ptr %b) sanitize_type {
 ; CHECK-NEXT:    [[SHADOW_BYTE_3_OFFSET12:%.*]] = add i64 [[SHADOW_PTR_INT4]], 24
 ; CHECK-NEXT:    [[SHADOW_BYTE_3_PTR13:%.*]] = inttoptr i64 [[SHADOW_BYTE_3_OFFSET12]] to ptr
 ; CHECK-NEXT:    store ptr inttoptr (i64 -3 to ptr), ptr [[SHADOW_BYTE_3_PTR13]], align 8
-; CHECK-NEXT:    br label [[TMP65:%.*]]
-; CHECK:       64:
+; CHECK-NEXT:    br label %[[BB65:.*]]
+; CHECK:       [[BB64]]:
 ; CHECK-NEXT:    call void @__tysan_check(ptr [[B]], i32 4, ptr @__tysan_v1___ZTS1yIN12__GLOBAL____N__11zEE_o_24, i32 2)
-; CHECK-NEXT:    br label [[TMP65]]
-; CHECK:       65:
-; CHECK-NEXT:    br label [[TMP87:%.*]]
-; CHECK:       66:
+; CHECK-NEXT:    br label %[[BB65]]
+; CHECK:       [[BB65]]:
+; CHECK-NEXT:    br label %[[BB87:.*]]
+; CHECK:       [[BB66]]:
 ; CHECK-NEXT:    [[TMP67:%.*]] = add i64 [[SHADOW_PTR_INT4]], 8
 ; CHECK-NEXT:    [[TMP68:%.*]] = inttoptr i64 [[TMP67]] to ptr
 ; CHECK-NEXT:    [[TMP69:%.*]] = load ptr, ptr [[TMP68]], align 8
@@ -169,14 +170,14 @@ define void @test_anon_ns(ptr %a, ptr %b) sanitize_type {
 ; CHECK-NEXT:    [[TMP82:%.*]] = ptrtoint ptr [[TMP81]] to i64
 ; CHECK-NEXT:    [[TMP83:%.*]] = icmp sge i64 [[TMP82]], 0
 ; CHECK-NEXT:    [[TMP84:%.*]] = or i1 [[TMP78]], [[TMP83]]
-; CHECK-NEXT:    br i1 [[TMP84]], label [[TMP85:%.*]], label [[TMP86:%.*]], !prof [[PROF0]]
-; CHECK:       85:
+; CHECK-NEXT:    br i1 [[TMP84]], label %[[BB85:.*]], label %[[BB86:.*]], !prof [[PROF0]]
+; CHECK:       [[BB85]]:
 ; CHECK-NEXT:    call void @__tysan_check(ptr [[B]], i32 4, ptr @__tysan_v1___ZTS1yIN12__GLOBAL____N__11zEE_o_24, i32 2)
-; CHECK-NEXT:    br label [[TMP86]]
-; CHECK:       86:
-; CHECK-NEXT:    br label [[TMP87]]
-; CHECK:       87:
-; CHECK-NEXT:    store i32 43, ptr [[B]], align 4, !tbaa [[TBAA6:![0-9]+]]
+; CHECK-NEXT:    br label %[[BB86]]
+; CHECK:       [[BB86]]:
+; CHECK-NEXT:    br label %[[BB87]]
+; CHECK:       [[BB87]]:
+; CHECK-NEXT:    store i32 43, ptr [[B]], align 4, !tbaa [[INT_TBAA6:![0-9]+]]
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -187,22 +188,23 @@ entry:
 }
 
 define void @test_anon_type(ptr %a) sanitize_type {
-; CHECK-LABEL: @test_anon_type(
-; CHECK-NEXT:  entry:
+; CHECK-LABEL: define void @test_anon_type(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[APP_MEM_MASK:%.*]] = load i64, ptr @__tysan_app_memory_mask, align 8
 ; CHECK-NEXT:    [[SHADOW_BASE:%.*]] = load i64, ptr @__tysan_shadow_memory_address, align 8
-; CHECK-NEXT:    [[APP_PTR_INT:%.*]] = ptrtoint ptr [[A:%.*]] to i64
+; CHECK-NEXT:    [[APP_PTR_INT:%.*]] = ptrtoint ptr [[A]] to i64
 ; CHECK-NEXT:    [[APP_PTR_MASKED:%.*]] = and i64 [[APP_PTR_INT]], [[APP_MEM_MASK]]
 ; CHECK-NEXT:    [[APP_PTR_SHIFTED:%.*]] = shl i64 [[APP_PTR_MASKED]], 3
 ; CHECK-NEXT:    [[SHADOW_PTR_INT:%.*]] = add i64 [[APP_PTR_SHIFTED]], [[SHADOW_BASE]]
 ; CHECK-NEXT:    [[SHADOW_PTR:%.*]] = inttoptr i64 [[SHADOW_PTR_INT]] to ptr
 ; CHECK-NEXT:    [[SHADOW_DESC:%.*]] = load ptr, ptr [[SHADOW_PTR]], align 8
 ; CHECK-NEXT:    [[BAD_DESC:%.*]] = icmp ne ptr [[SHADOW_DESC]], @__tysan_v1_____anonymous__027d9e575c5d34cb5d60d6a1d6276f95_o_24
-; CHECK-NEXT:    br i1 [[BAD_DESC]], label [[TMP0:%.*]], label [[TMP22:%.*]], !prof [[PROF0]]
-; CHECK:       0:
+; CHECK-NEXT:    br i1 [[BAD_DESC]], label %[[BB0:.*]], label %[[BB22:.*]], !prof [[PROF0]]
+; CHECK:       [[BB0]]:
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq ptr [[SHADOW_DESC]], null
-; CHECK-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP20:%.*]]
-; CHECK:       2:
+; CHECK-NEXT:    br i1 [[TMP1]], label %[[BB2:.*]], label %[[BB20:.*]]
+; CHECK:       [[BB2]]:
 ; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[SHADOW_PTR_INT]], 8
 ; CHECK-NEXT:    [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr
 ; CHECK-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
@@ -218,11 +220,11 @@ define void @test_anon_type(ptr %a) sanitize_type {
 ; CHECK-NEXT:    [[TMP15:%.*]] = load ptr, ptr [[TMP14]], align 8
 ; CHECK-NEXT:    [[TMP16:%.*]] = icmp ne ptr [[TMP15]], null
 ; CHECK-NEXT:    [[TMP17:%.*]] = or i1 [[TMP12]], [[TMP16]]
-; CHECK-NEXT:    br i1 [[TMP17]], label [[TMP18:%.*]], label [[TMP19:%.*]], !prof [[PROF0]]
-; CHECK:       18:
+; CHECK-NEXT:    br i1 [[TMP17]], label %[[BB18:.*]], label %[[BB19:.*]], !prof [[PROF0]]
+; CHECK:       [[BB18]]:
 ; CHECK-NEXT:    call void @__tysan_check(ptr [[A]], i32 4, ptr @__tysan_v1_____anonymous__027d9e575c5d34cb5d60d6a1d6276f95_o_24, i32 2)
-; CHECK-NEXT:    br label [[TMP19]]
-; CHECK:       19:
+; CHECK-NEXT:    br label %[[BB19]]
+; CHECK:       [[BB19]]:
 ; CHECK-NEXT:    store ptr @__tysan_v1_____anonymous__027d9e575c5d34cb5d60d6a1d6276f95_o_24, ptr [[SHADOW_PTR]], align 8
 ; CHECK-NEXT:    [[SHADOW_BYTE_1_OFFSET:%.*]] = add i64 [[SHADOW_PTR_INT]], 8
 ; CHECK-NEXT:    [[SHADOW_BYTE_1_PTR:%.*]] = inttoptr i64 [[SHADOW_BYTE_1_OFFSET]] to ptr
@@ -233,13 +235,13 @@ define void @test_anon_type(ptr %a) sanitize_type {
 ; CHECK-NEXT:    [[SHADOW_BYTE_3_OFFSET:%.*]] = add i64 [[SHADOW_PTR_INT]], 24
 ; CHECK-NEXT:    [[SHADOW_BYTE_3_PTR:%.*]] = inttoptr i64 [[SHADOW_BYTE_3_OFFSET]] to ptr
 ; CHECK-NEXT:    store ptr inttoptr (i64 -3 to ptr), ptr [[SHADOW_BYTE_3_PTR]], align 8
-; CHECK-NEXT:    br label [[TMP21:%.*]]
-; CHECK:       20:
+; CHECK-NEXT:    br label %[[BB21:.*]]
+; CHECK:       [[BB20]]:
 ; CHECK-NEXT:    call void @__tysan_check(ptr [[A]], i32 4, ptr @__tysan_v1_____anonymous__027d9e575c5d34cb5d60d6a1d6276f95_o_24, i32 2)
-; CHECK-NEXT:    br label [[TMP21]]
-; CHECK:       21:
-; CHECK-NEXT:    br label [[TMP43:%.*]]
-; CHECK:       22:
+; CHECK-NEXT:    br label %[[BB21]]
+; CHECK:       [[BB21]]:
+; CHECK-NEXT:    br label %[[BB43:.*]]
+; CHECK:       [[BB22]]:
 ; CHECK-NEXT:    [[TMP23:%.*]] = add i64 [[SHADOW_PTR_INT]], 8
 ; CHECK-NEXT:    [[TMP24:%.*]] = inttoptr i64 [[TMP23]] to ptr
 ; CHECK-NEXT:    [[TMP25:%.*]] = load ptr, ptr [[TMP24]], align 8
@@ -258,14 +260,14 @@ define void @test_anon_type(ptr %a) sanitize_type {
 ; CHECK-NEXT:    [[TMP38:%.*]] = ptrtoint ptr [[TMP37]] to i64
 ; CHECK-NEXT:    [[TMP39:%.*]] = icmp sge i64 [[TMP38]], 0
 ; CHECK-NEXT:    [[TMP40:%.*]] = or i1 [[TMP34]], [[TMP39]]
-; CHECK-NEXT:    br i1 [[TMP40]], label [[TMP41:%.*]], label [[TMP42:%.*]], !prof [[PROF0]]
-; CHECK:       41:
+; CHECK-NEXT:    br i1 [[TMP40]], label %[[BB41:.*]], label %[[BB42:.*]], !prof [[PROF0]]
+; CHECK:       [[BB41]]:
 ; CHECK-NEXT:    call void @__tysan_check(ptr [[A]], i32 4, ptr @__tysan_v1_____anonymous__027d9e575c5d34cb5d60d6a1d6276f95_o_24, i32 2)
-; CHECK-NEXT:    br label [[TMP42]]
-; CHECK:       42:
-; CHECK-NEXT:    br label [[TMP43]]
-; CHECK:       43:
-; CHECK-NEXT:    store i32 42, ptr [[A]], align 4, !tbaa [[TBAA8:![0-9]+]]
+; CHECK-NEXT:    br label %[[BB42]]
+; CHECK:       [[BB42]]:
+; CHECK-NEXT:    br label %[[BB43]]
+; CHECK:       [[BB43]]:
+; CHECK-NEXT:    store i32 42, ptr [[A]], align 4, !tbaa [[INT_TBAA8:![0-9]+]]
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -284,17 +286,17 @@ entry:
 !11 = !{!"", !2, i64 24}
 !12 = !{!11, !2, i64 24}
 ;.
-; CHECK: attributes #[[ATTR0:[0-9]+]] = { sanitize_type }
+; CHECK: attributes #[[ATTR0]] = { sanitize_type }
 ; CHECK: attributes #[[ATTR1:[0-9]+]] = { nounwind }
 ;.
 ; CHECK: [[PROF0]] = !{!"branch_weights", i32 1, i32 100000}
-; CHECK: [[TBAA1]] = !{[[META2:![0-9]+]], [[META3:![0-9]+]], i64 24}
+; CHECK: [[INT_TBAA1]] = !{[[META2:![0-9]+]], [[META3:![0-9]+]], i64 24}
 ; CHECK: [[META2]] = !{!"_ZTSN12_GLOBAL__N_11zE", [[META3]], i64 24}
 ; CHECK: [[META3]] = !{!"int", [[META4:![0-9]+]], i64 0}
 ; CHECK: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0}
 ; CHECK: [[META5]] = !{!"Simple C++ TBAA"}
-; CHECK: [[TBAA6]] = !{[[META7:![0-9]+]], [[META3]], i64 24}
+; CHECK: [[INT_TBAA6]] = !{[[META7:![0-9]+]], [[META3]], i64 24}
 ; CHECK: [[META7]] = !{!"_ZTS1yIN12_GLOBAL__N_11zEE", [[META3]], i64 24}
-; CHECK: [[TBAA8]] = !{[[META9:![0-9]+]], [[META3]], i64 24}
+; CHECK: [[INT_TBAA8]] = !{[[META9:![0-9]+]], [[META3]], i64 24}
 ; CHECK: [[META9]] = !{!"", [[META3]], i64 24}
 ;.
diff --git a/llvm/test/Instrumentation/TypeSanitizer/basic-nosan.ll b/llvm/test/Instrumentation/TypeSanitizer/basic-nosan.ll
index 8ddc573..c1a452d 100644
--- a/llvm/test/Instrumentation/TypeSanitizer/basic-nosan.ll
+++ b/llvm/test/Instrumentation/TypeSanitizer/basic-nosan.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals --include-generated-funcs
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --include-generated-funcs --version 6
 ; Test basic type sanitizer instrumentation.
 ; RUN: opt -passes='tysan' -S %s | FileCheck %s
 
@@ -31,19 +31,20 @@ entry:
 ; CHECK: @__tysan_shadow_memory_address = external global i64
 ; CHECK: @__tysan_app_memory_mask = external global i64
 ;.
-; CHECK-LABEL: @test_load_nsan(
-; CHECK-NEXT:  entry:
+; CHECK-LABEL: define i32 @test_load_nsan(
+; CHECK-SAME: ptr [[A:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[APP_MEM_MASK:%.*]] = load i64, ptr @__tysan_app_memory_mask, align 8
 ; CHECK-NEXT:    [[SHADOW_BASE:%.*]] = load i64, ptr @__tysan_shadow_memory_address, align 8
-; CHECK-NEXT:    [[APP_PTR_INT:%.*]] = ptrtoint ptr [[A:%.*]] to i64
+; CHECK-NEXT:    [[APP_PTR_INT:%.*]] = ptrtoint ptr [[A]] to i64
 ; CHECK-NEXT:    [[APP_PTR_MASKED:%.*]] = and i64 [[APP_PTR_INT]], [[APP_MEM_MASK]]
 ; CHECK-NEXT:    [[APP_PTR_SHIFTED:%.*]] = shl i64 [[APP_PTR_MASKED]], 3
 ; CHECK-NEXT:    [[SHADOW_PTR_INT:%.*]] = add i64 [[APP_PTR_SHIFTED]], [[SHADOW_BASE]]
 ; CHECK-NEXT:    [[SHADOW_PTR:%.*]] = inttoptr i64 [[SHADOW_PTR_INT]] to ptr
 ; CHECK-NEXT:    [[SHADOW_DESC:%.*]] = load ptr, ptr [[SHADOW_PTR]], align 8
 ; CHECK-NEXT:    [[DESC_SET:%.*]] = icmp eq ptr [[SHADOW_DESC]], null
-; CHECK-NEXT:    br i1 [[DESC_SET]], label [[SET_TYPE:%.*]], label [[TMP0:%.*]], !prof [[PROF0:![0-9]+]]
-; CHECK:       set.type:
+; CHECK-NEXT:    br i1 [[DESC_SET]], label %[[SET_TYPE:.*]], label %[[BB0:.*]], !prof [[PROF0:![0-9]+]]
+; CHECK:       [[SET_TYPE]]:
 ; CHECK-NEXT:    store ptr @__tysan_v1_int_o_0, ptr [[SHADOW_PTR]], align 8
 ; CHECK-NEXT:    [[SHADOW_BYTE_1_OFFSET:%.*]] = add i64 [[SHADOW_PTR_INT]], 8
 ; CHECK-NEXT:    [[SHADOW_BYTE_1_PTR:%.*]] = inttoptr i64 [[SHADOW_BYTE_1_OFFSET]] to ptr
@@ -54,25 +55,26 @@ entry:
 ; CHECK-NEXT:    [[SHADOW_BYTE_3_OFFSET:%.*]] = add i64 [[SHADOW_PTR_INT]], 24
 ; CHECK-NEXT:    [[SHADOW_BYTE_3_PTR:%.*]] = inttoptr i64 [[SHADOW_BYTE_3_OFFSET]] to ptr
 ; CHECK-NEXT:    store ptr inttoptr (i64 -3 to ptr), ptr [[SHADOW_BYTE_3_PTR]], align 8
-; CHECK-NEXT:    br label [[TMP0]]
-; CHECK:       0:
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A]], align 4, !tbaa [[TBAA1:![0-9]+]]
+; CHECK-NEXT:    br label %[[BB0]]
+; CHECK:       [[BB0]]:
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A]], align 4, !tbaa [[INT_TBAA1:![0-9]+]]
 ; CHECK-NEXT:    ret i32 [[TMP1]]
 ;
 ;
-; CHECK-LABEL: @test_store_nsan(
-; CHECK-NEXT:  entry:
+; CHECK-LABEL: define void @test_store_nsan(
+; CHECK-SAME: ptr [[A:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[APP_MEM_MASK:%.*]] = load i64, ptr @__tysan_app_memory_mask, align 8
 ; CHECK-NEXT:    [[SHADOW_BASE:%.*]] = load i64, ptr @__tysan_shadow_memory_address, align 8
-; CHECK-NEXT:    [[APP_PTR_INT:%.*]] = ptrtoint ptr [[A:%.*]] to i64
+; CHECK-NEXT:    [[APP_PTR_INT:%.*]] = ptrtoint ptr [[A]] to i64
 ; CHECK-NEXT:    [[APP_PTR_MASKED:%.*]] = and i64 [[APP_PTR_INT]], [[APP_MEM_MASK]]
 ; CHECK-NEXT:    [[APP_PTR_SHIFTED:%.*]] = shl i64 [[APP_PTR_MASKED]], 3
 ; CHECK-NEXT:    [[SHADOW_PTR_INT:%.*]] = add i64 [[APP_PTR_SHIFTED]], [[SHADOW_BASE]]
 ; CHECK-NEXT:    [[SHADOW_PTR:%.*]] = inttoptr i64 [[SHADOW_PTR_INT]] to ptr
 ; CHECK-NEXT:    [[SHADOW_DESC:%.*]] = load ptr, ptr [[SHADOW_PTR]], align 8
 ; CHECK-NEXT:    [[DESC_SET:%.*]] = icmp eq ptr [[SHADOW_DESC]], null
-; CHECK-NEXT:    br i1 [[DESC_SET]], label [[SET_TYPE:%.*]], label [[TMP0:%.*]], !prof [[PROF0]]
-; CHECK:       set.type:
+; CHECK-NEXT:    br i1 [[DESC_SET]], label %[[SET_TYPE:.*]], label %[[BB0:.*]], !prof [[PROF0]]
+; CHECK:       [[SET_TYPE]]:
 ; CHECK-NEXT:    store ptr @__tysan_v1_int_o_0, ptr [[SHADOW_PTR]], align 8
 ; CHECK-NEXT:    [[SHADOW_BYTE_1_OFFSET:%.*]] = add i64 [[SHADOW_PTR_INT]], 8
 ; CHECK-NEXT:    [[SHADOW_BYTE_1_PTR:%.*]] = inttoptr i64 [[SHADOW_BYTE_1_OFFSET]] to ptr
@@ -83,21 +85,22 @@ entry:
 ; CHECK-NEXT:    [[SHADOW_BYTE_3_OFFSET:%.*]] = add i64 [[SHADOW_PTR_INT]], 24
 ; CHECK-NEXT:    [[SHADOW_BYTE_3_PTR:%.*]] = inttoptr i64 [[SHADOW_BYTE_3_OFFSET]] to ptr
 ; CHECK-NEXT:    store ptr inttoptr (i64 -3 to ptr), ptr [[SHADOW_BYTE_3_PTR]], align 8
-; CHECK-NEXT:    br label [[TMP0]]
-; CHECK:       0:
-; CHECK-NEXT:    store i32 42, ptr [[A]], align 4, !tbaa [[TBAA1]]
+; CHECK-NEXT:    br label %[[BB0]]
+; CHECK:       [[BB0]]:
+; CHECK-NEXT:    store i32 42, ptr [[A]], align 4, !tbaa [[INT_TBAA1]]
 ; CHECK-NEXT:    ret void
 ;
 ;
-; CHECK-LABEL: @tysan.module_ctor(
+; CHECK-LABEL: define internal void @tysan.module_ctor(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:    call void @__tysan_init()
 ; CHECK-NEXT:    ret void
 ;
 ;.
-; CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind }
+; CHECK: attributes #[[ATTR0]] = { nounwind }
 ;.
 ; CHECK: [[PROF0]] = !{!"branch_weights", i32 1, i32 100000}
-; CHECK: [[TBAA1]] = !{[[META2:![0-9]+]], [[META2]], i64 0}
+; CHECK: [[INT_TBAA1]] = !{[[META2:![0-9]+]], [[META2]], i64 0}
 ; CHECK: [[META2]] = !{!"int", [[META3:![0-9]+]], i64 0}
 ; CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0}
 ; CHECK: [[META4]] = !{!"Simple C++ TBAA"}
diff --git a/llvm/test/Instrumentation/TypeSanitizer/basic.ll b/llvm/test/Instrumentation/TypeSanitizer/basic.ll
index b40b646..ae7ac53 100644
--- a/llvm/test/Instrumentation/TypeSanitizer/basic.ll
+++ b/llvm/test/Instrumentation/TypeSanitizer/basic.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 6
 ; Test basic type sanitizer instrumentation.
 ;
 ; RUN: opt -passes='tysan' -S %s | FileCheck %s
@@ -21,22 +21,23 @@ declare i32 @declaration_only(i32 %a) sanitize_type
 ; CHECK: @llvm.used = appending global [8 x ptr] [ptr @tysan.module_ctor, ptr @__tysan_v1_Simple_20C_2b_2b_20TBAA, ptr @__tysan_v1_omnipotent_20char, ptr @__tysan_v1_int, ptr @__tysan_v1_int_o_0, ptr @__tysan_v1___ZTS1x, ptr @__tysan_v1___ZTS1v, ptr @__tysan_v1___ZTS1v_o_12], section "llvm.metadata"
 ;.
 define i32 @test_load(ptr %a) sanitize_type {
-; CHECK-LABEL: @test_load(
-; CHECK-NEXT:  entry:
+; CHECK-LABEL: define i32 @test_load(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[APP_MEM_MASK:%.*]] = load i64, ptr @__tysan_app_memory_mask, align 8
 ; CHECK-NEXT:    [[SHADOW_BASE:%.*]] = load i64, ptr @__tysan_shadow_memory_address, align 8
-; CHECK-NEXT:    [[APP_PTR_INT:%.*]] = ptrtoint ptr [[A:%.*]] to i64
+; CHECK-NEXT:    [[APP_PTR_INT:%.*]] = ptrtoint ptr [[A]] to i64
 ; CHECK-NEXT:    [[APP_PTR_MASKED:%.*]] = and i64 [[APP_PTR_INT]], [[APP_MEM_MASK]]
 ; CHECK-NEXT:    [[APP_PTR_SHIFTED:%.*]] = shl i64 [[APP_PTR_MASKED]], 3
 ; CHECK-NEXT:    [[SHADOW_PTR_INT:%.*]] = add i64 [[APP_PTR_SHIFTED]], [[SHADOW_BASE]]
 ; CHECK-NEXT:    [[SHADOW_PTR:%.*]] = inttoptr i64 [[SHADOW_PTR_INT]] to ptr
 ; CHECK-NEXT:    [[SHADOW_DESC:%.*]] = load ptr, ptr [[SHADOW_PTR]], align 8
 ; CHECK-NEXT:    [[BAD_DESC:%.*]] = icmp ne ptr [[SHADOW_DESC]], @__tysan_v1_int_o_0
-; CHECK-NEXT:    br i1 [[BAD_DESC]], label [[TMP0:%.*]], label [[TMP22:%.*]], !prof [[PROF0:![0-9]+]]
-; CHECK:       0:
+; CHECK-NEXT:    br i1 [[BAD_DESC]], label %[[BB0:.*]], label %[[BB22:.*]], !prof [[PROF0:![0-9]+]]
+; CHECK:       [[BB0]]:
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq ptr [[SHADOW_DESC]], null
-; CHECK-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP20:%.*]]
-; CHECK:       2:
+; CHECK-NEXT:    br i1 [[TMP1]], label %[[BB2:.*]], label %[[BB20:.*]]
+; CHECK:       [[BB2]]:
 ; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[SHADOW_PTR_INT]], 8
 ; CHECK-NEXT:    [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr
 ; CHECK-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
@@ -52,11 +53,11 @@ define i32 @test_load(ptr %a) sanitize_type {
 ; CHECK-NEXT:    [[TMP15:%.*]] = load ptr, ptr [[TMP14]], align 8
 ; CHECK-NEXT:    [[TMP16:%.*]] = icmp ne ptr [[TMP15]], null
 ; CHECK-NEXT:    [[TMP17:%.*]] = or i1 [[TMP12]], [[TMP16]]
-; CHECK-NEXT:    br i1 [[TMP17]], label [[TMP18:%.*]], label [[TMP19:%.*]], !prof [[PROF0]]
-; CHECK:       18:
+; CHECK-NEXT:    br i1 [[TMP17]], label %[[BB18:.*]], label %[[BB19:.*]], !prof [[PROF0]]
+; CHECK:       [[BB18]]:
 ; CHECK-NEXT:    call void @__tysan_check(ptr [[A]], i32 4, ptr @__tysan_v1_int_o_0, i32 1)
-; CHECK-NEXT:    br label [[TMP19]]
-; CHECK:       19:
+; CHECK-NEXT:    br label %[[BB19]]
+; CHECK:       [[BB19]]:
 ; CHECK-NEXT:    store ptr @__tysan_v1_int_o_0, ptr [[SHADOW_PTR]], align 8
 ; CHECK-NEXT:    [[SHADOW_BYTE_1_OFFSET:%.*]] = add i64 [[SHADOW_PTR_INT]], 8
 ; CHECK-NEXT:    [[SHADOW_BYTE_1_PTR:%.*]] = inttoptr i64 [[SHADOW_BYTE_1_OFFSET]] to ptr
@@ -67,13 +68,13 @@ define i32 @test_load(ptr %a) sanitize_type {
 ; CHECK-NEXT:    [[SHADOW_BYTE_3_OFFSET:%.*]] = add i64 [[SHADOW_PTR_INT]], 24
 ; CHECK-NEXT:    [[SHADOW_BYTE_3_PTR:%.*]] = inttoptr i64 [[SHADOW_BYTE_3_OFFSET]] to ptr
 ; CHECK-NEXT:    store ptr inttoptr (i64 -3 to ptr), ptr [[SHADOW_BYTE_3_PTR]], align 8
-; CHECK-NEXT:    br label [[TMP21:%.*]]
-; CHECK:       20:
+; CHECK-NEXT:    br label %[[BB21:.*]]
+; CHECK:       [[BB20]]:
 ; CHECK-NEXT:    call void @__tysan_check(ptr [[A]], i32 4, ptr @__tysan_v1_int_o_0, i32 1)
-; CHECK-NEXT:    br label [[TMP21]]
-; CHECK:       21:
-; CHECK-NEXT:    br label [[TMP43:%.*]]
-; CHECK:       22:
+; CHECK-NEXT:    br label %[[BB21]]
+; CHECK:       [[BB21]]:
+; CHECK-NEXT:    br label %[[BB43:.*]]
+; CHECK:       [[BB22]]:
 ; CHECK-NEXT:    [[TMP23:%.*]] = add i64 [[SHADOW_PTR_INT]], 8
 ; CHECK-NEXT:    [[TMP24:%.*]] = inttoptr i64 [[TMP23]] to ptr
 ; CHECK-NEXT:    [[TMP25:%.*]] = load ptr, ptr [[TMP24]], align 8
@@ -92,14 +93,14 @@ define i32 @test_load(ptr %a) sanitize_type {
 ; CHECK-NEXT:    [[TMP38:%.*]] = ptrtoint ptr [[TMP37]] to i64
 ; CHECK-NEXT:    [[TMP39:%.*]] = icmp sge i64 [[TMP38]], 0
 ; CHECK-NEXT:    [[TMP40:%.*]] = or i1 [[TMP34]], [[TMP39]]
-; CHECK-NEXT:    br i1 [[TMP40]], label [[TMP41:%.*]], label [[TMP42:%.*]], !prof [[PROF0]]
-; CHECK:       41:
+; CHECK-NEXT:    br i1 [[TMP40]], label %[[BB41:.*]], label %[[BB42:.*]], !prof [[PROF0]]
+; CHECK:       [[BB41]]:
 ; CHECK-NEXT:    call void @__tysan_check(ptr [[A]], i32 4, ptr @__tysan_v1_int_o_0, i32 1)
-; CHECK-NEXT:    br label [[TMP42]]
-; CHECK:       42:
-; CHECK-NEXT:    br label [[TMP43]]
-; CHECK:       43:
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A]], align 4, !tbaa [[TBAA1:![0-9]+]]
+; CHECK-NEXT:    br label %[[BB42]]
+; CHECK:       [[BB42]]:
+; CHECK-NEXT:    br label %[[BB43]]
+; CHECK:       [[BB43]]:
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A]], align 4, !tbaa [[INT_TBAA1:![0-9]+]]
 ; CHECK-NEXT:    ret i32 [[TMP1]]
 ;
 entry:
@@ -108,22 +109,23 @@ entry:
 }
 
 define void @test_store(ptr %a) sanitize_type {
-; CHECK-LABEL: @test_store(
-; CHECK-NEXT:  entry:
+; CHECK-LABEL: define void @test_store(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[APP_MEM_MASK:%.*]] = load i64, ptr @__tysan_app_memory_mask, align 8
 ; CHECK-NEXT:    [[SHADOW_BASE:%.*]] = load i64, ptr @__tysan_shadow_memory_address, align 8
-; CHECK-NEXT:    [[APP_PTR_INT:%.*]] = ptrtoint ptr [[A:%.*]] to i64
+; CHECK-NEXT:    [[APP_PTR_INT:%.*]] = ptrtoint ptr [[A]] to i64
 ; CHECK-NEXT:    [[APP_PTR_MASKED:%.*]] = and i64 [[APP_PTR_INT]], [[APP_MEM_MASK]]
 ; CHECK-NEXT:    [[APP_PTR_SHIFTED:%.*]] = shl i64 [[APP_PTR_MASKED]], 3
 ; CHECK-NEXT:    [[SHADOW_PTR_INT:%.*]] = add i64 [[APP_PTR_SHIFTED]], [[SHADOW_BASE]]
 ; CHECK-NEXT:    [[SHADOW_PTR:%.*]] = inttoptr i64 [[SHADOW_PTR_INT]] to ptr
 ; CHECK-NEXT:    [[SHADOW_DESC:%.*]] = load ptr, ptr [[SHADOW_PTR]], align 8
 ; CHECK-NEXT:    [[BAD_DESC:%.*]] = icmp ne ptr [[SHADOW_DESC]], @__tysan_v1___ZTS1v_o_12
-; CHECK-NEXT:    br i1 [[BAD_DESC]], label [[TMP0:%.*]], label [[TMP22:%.*]], !prof [[PROF0]]
-; CHECK:       0:
+; CHECK-NEXT:    br i1 [[BAD_DESC]], label %[[BB0:.*]], label %[[BB22:.*]], !prof [[PROF0]]
+; CHECK:       [[BB0]]:
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq ptr [[SHADOW_DESC]], null
-; CHECK-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP20:%.*]]
-; CHECK:       2:
+; CHECK-NEXT:    br i1 [[TMP1]], label %[[BB2:.*]], label %[[BB20:.*]]
+; CHECK:       [[BB2]]:
 ; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[SHADOW_PTR_INT]], 8
 ; CHECK-NEXT:    [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr
 ; CHECK-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
@@ -139,11 +141,11 @@ define void @test_store(ptr %a) sanitize_type {
 ; CHECK-NEXT:    [[TMP15:%.*]] = load ptr, ptr [[TMP14]], align 8
 ; CHECK-NEXT:    [[TMP16:%.*]] = icmp ne ptr [[TMP15]], null
 ; CHECK-NEXT:    [[TMP17:%.*]] = or i1 [[TMP12]], [[TMP16]]
-; CHECK-NEXT:    br i1 [[TMP17]], label [[TMP18:%.*]], label [[TMP19:%.*]], !prof [[PROF0]]
-; CHECK:       18:
+; CHECK-NEXT:    br i1 [[TMP17]], label %[[BB18:.*]], label %[[BB19:.*]], !prof [[PROF0]]
+; CHECK:       [[BB18]]:
 ; CHECK-NEXT:    call void @__tysan_check(ptr [[A]], i32 4, ptr @__tysan_v1___ZTS1v_o_12, i32 2)
-; CHECK-NEXT:    br label [[TMP19]]
-; CHECK:       19:
+; CHECK-NEXT:    br label %[[BB19]]
+; CHECK:       [[BB19]]:
 ; CHECK-NEXT:    store ptr @__tysan_v1___ZTS1v_o_12, ptr [[SHADOW_PTR]], align 8
 ; CHECK-NEXT:    [[SHADOW_BYTE_1_OFFSET:%.*]] = add i64 [[SHADOW_PTR_INT]], 8
 ; CHECK-NEXT:    [[SHADOW_BYTE_1_PTR:%.*]] = inttoptr i64 [[SHADOW_BYTE_1_OFFSET]] to ptr
@@ -154,13 +156,13 @@ define void @test_store(ptr %a) sanitize_type {
 ; CHECK-NEXT:    [[SHADOW_BYTE_3_OFFSET:%.*]] = add i64 [[SHADOW_PTR_INT]], 24
 ; CHECK-NEXT:    [[SHADOW_BYTE_3_PTR:%.*]] = inttoptr i64 [[SHADOW_BYTE_3_OFFSET]] to ptr
 ; CHECK-NEXT:    store ptr inttoptr (i64 -3 to ptr), ptr [[SHADOW_BYTE_3_PTR]], align 8
-; CHECK-NEXT:    br label [[TMP21:%.*]]
-; CHECK:       20:
+; CHECK-NEXT:    br label %[[BB21:.*]]
+; CHECK:       [[BB20]]:
 ; CHECK-NEXT:    call void @__tysan_check(ptr [[A]], i32 4, ptr @__tysan_v1___ZTS1v_o_12, i32 2)
-; CHECK-NEXT:    br label [[TMP21]]
-; CHECK:       21:
-; CHECK-NEXT:    br label [[TMP43:%.*]]
-; CHECK:       22:
+; CHECK-NEXT:    br label %[[BB21]]
+; CHECK:       [[BB21]]:
+; CHECK-NEXT:    br label %[[BB43:.*]]
+; CHECK:       [[BB22]]:
 ; CHECK-NEXT:    [[TMP23:%.*]] = add i64 [[SHADOW_PTR_INT]], 8
 ; CHECK-NEXT:    [[TMP24:%.*]] = inttoptr i64 [[TMP23]] to ptr
 ; CHECK-NEXT:    [[TMP25:%.*]] = load ptr, ptr [[TMP24]], align 8
@@ -179,14 +181,14 @@ define void @test_store(ptr %a) sanitize_type {
 ; CHECK-NEXT:    [[TMP38:%.*]] = ptrtoint ptr [[TMP37]] to i64
 ; CHECK-NEXT:    [[TMP39:%.*]] = icmp sge i64 [[TMP38]], 0
 ; CHECK-NEXT:    [[TMP40:%.*]] = or i1 [[TMP34]], [[TMP39]]
-; CHECK-NEXT:    br i1 [[TMP40]], label [[TMP41:%.*]], label [[TMP42:%.*]], !prof [[PROF0]]
-; CHECK:       41:
+; CHECK-NEXT:    br i1 [[TMP40]], label %[[BB41:.*]], label %[[BB42:.*]], !prof [[PROF0]]
+; CHECK:       [[BB41]]:
 ; CHECK-NEXT:    call void @__tysan_check(ptr [[A]], i32 4, ptr @__tysan_v1___ZTS1v_o_12, i32 2)
-; CHECK-NEXT:    br label [[TMP42]]
-; CHECK:       42:
-; CHECK-NEXT:    br label [[TMP43]]
-; CHECK:       43:
-; CHECK-NEXT:    store i32 42, ptr [[A]], align 4, !tbaa [[TBAA5:![0-9]+]]
+; CHECK-NEXT:    br label %[[BB42]]
+; CHECK:       [[BB42]]:
+; CHECK-NEXT:    br label %[[BB43]]
+; CHECK:       [[BB43]]:
+; CHECK-NEXT:    store i32 42, ptr [[A]], align 4, !tbaa [[INT_TBAA5:![0-9]+]]
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -202,15 +204,15 @@ entry:
 !5 = !{!"_ZTS1v", !2, i64 8, !2, i64 12, !4, i64 16}
 !6 = !{!5, !2, i64 12}
 ;.
-; CHECK: attributes #[[ATTR0:[0-9]+]] = { sanitize_type }
+; CHECK: attributes #[[ATTR0]] = { sanitize_type }
 ; CHECK: attributes #[[ATTR1:[0-9]+]] = { nounwind }
 ;.
 ; CHECK: [[PROF0]] = !{!"branch_weights", i32 1, i32 100000}
-; CHECK: [[TBAA1]] = !{[[META2:![0-9]+]], [[META2]], i64 0}
+; CHECK: [[INT_TBAA1]] = !{[[META2:![0-9]+]], [[META2]], i64 0}
 ; CHECK: [[META2]] = !{!"int", [[META3:![0-9]+]], i64 0}
 ; CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0}
 ; CHECK: [[META4]] = !{!"Simple C++ TBAA"}
-; CHECK: [[TBAA5]] = !{[[META6:![0-9]+]], [[META2]], i64 12}
+; CHECK: [[INT_TBAA5]] = !{[[META6:![0-9]+]], [[META2]], i64 12}
 ; CHECK: [[META6]] = !{!"_ZTS1v", [[META2]], i64 8, [[META2]], i64 12, [[META7:![0-9]+]], i64 16}
 ; CHECK: [[META7]] = !{!"_ZTS1x", [[META2]], i64 0, [[META2]], i64 4}
 ;.
diff --git a/llvm/test/Instrumentation/TypeSanitizer/nosanitize.ll b/llvm/test/Instrumentation/TypeSanitizer/nosanitize.ll
index c7c153e..d0ae3bcb 100644
--- a/llvm/test/Instrumentation/TypeSanitizer/nosanitize.ll
+++ b/llvm/test/Instrumentation/TypeSanitizer/nosanitize.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 6
 ; Test basic type sanitizer instrumentation.
 ;
 ; RUN: opt -passes='tysan' -S %s | FileCheck %s
@@ -10,9 +10,10 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 ; CHECK: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 0, ptr @tysan.module_ctor, ptr null }]
 ;.
 define i32 @test_load(ptr %a) sanitize_type {
-; CHECK-LABEL: @test_load(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A:%.*]], align 4, !tbaa [[TBAA0:![0-9]+]], !nosanitize [[META4:![0-9]+]]
+; CHECK-LABEL: define i32 @test_load(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A]], align 4, !tbaa [[INT_TBAA0:![0-9]+]], !nosanitize [[META4:![0-9]+]]
 ; CHECK-NEXT:    ret i32 [[TMP1]]
 ;
 entry:
@@ -28,10 +29,10 @@ entry:
 !5 = !{!"_ZTS1v", !2, i64 8, !2, i64 12, !4, i64 16}
 !6 = !{!5, !2, i64 12}
 ;.
-; CHECK: attributes #[[ATTR0:[0-9]+]] = { sanitize_type }
+; CHECK: attributes #[[ATTR0]] = { sanitize_type }
 ; CHECK: attributes #[[ATTR1:[0-9]+]] = { nounwind }
 ;.
-; CHECK: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
+; CHECK: [[INT_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
 ; CHECK: [[META1]] = !{!"int", [[META2:![0-9]+]], i64 0}
 ; CHECK: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0}
 ; CHECK: [[META3]] = !{!"Simple C++ TBAA"}
diff --git a/llvm/test/MC/RISCV/zibi-invalid.s b/llvm/test/MC/RISCV/zibi-invalid.s
new file mode 100644
index 0000000..50e5f07
--- /dev/null
+++ b/llvm/test/MC/RISCV/zibi-invalid.s
@@ -0,0 +1,34 @@
+# RUN: not llvm-mc -triple=riscv32 --mattr=+experimental-zibi %s 2>&1 \
+# RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+# RUN: not llvm-mc -triple=riscv64 --mattr=+experimental-zibi %s 2>&1 \
+# RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+beqi a0, 0x0, 0x400
+# CHECK-ERROR: [[@LINE-1]]:10: error: immediate must be non-zero in the range [-1, 31]
+# CHECK-ERROR-LABEL: beqi a0, 0x0, 0x400
+beqi a0, 0x21, 0x400
+# CHECK-ERROR: [[@LINE-1]]:10: error: immediate must be non-zero in the range [-1, 31]
+# CHECK-ERROR-LABEL: beqi a0, 0x21, 0x400
+beqi a2, 0x10, -0x1f000
+# CHECK-ERROR: [[@LINE-1]]:16: error: immediate must be a multiple of 2 bytes in the range [-4096, 4094]
+# CHECK-ERROR-LABEL: beqi a2, 0x10, -0x1f000
+beqi a2, 0x10, 0x1000
+# CHECK-ERROR: [[@LINE-1]]:16: error: immediate must be a multiple of 2 bytes in the range [-4096, 4094]
+# CHECK-ERROR-LABEL: beqi a2, 0x10, 0x1000
+beqi a2, 0x10, 0x111
+# CHECK-ERROR: [[@LINE-1]]:16: error: immediate must be a multiple of 2 bytes in the range [-4096, 4094]
+# CHECK-ERROR-LABEL: beqi a2, 0x10, 0x111
+bnei a0, 0x0, 0x400
+# CHECK-ERROR: [[@LINE-1]]:10: error: immediate must be non-zero in the range [-1, 31]
+# CHECK-ERROR-LABEL: bnei a0, 0x0, 0x400
+bnei a0, 0x21, 0x400
+# CHECK-ERROR: [[@LINE-1]]:10: error: immediate must be non-zero in the range [-1, 31]
+# CHECK-ERROR-LABEL: bnei a0, 0x21, 0x400
+bnei a2, 0x10, -0x1f000
+# CHECK-ERROR: [[@LINE-1]]:16: error: immediate must be a multiple of 2 bytes in the range [-4096, 4094]
+# CHECK-ERROR-LABEL: bnei a2, 0x10, -0x1f000
+bnei a2, 0x10, 0x1000
+# CHECK-ERROR: [[@LINE-1]]:16: error: immediate must be a multiple of 2 bytes in the range [-4096, 4094]
+# CHECK-ERROR-LABEL: bnei a2, 0x10, 0x1000
+bnei a2, 0x10, 0x111
+# CHECK-ERROR: [[@LINE-1]]:16: error: immediate must be a multiple of 2 bytes in the range [-4096, 4094]
+# CHECK-ERROR-LABEL: bnei a2, 0x10, 0x111
diff --git a/llvm/test/MC/RISCV/zibi-valid.s b/llvm/test/MC/RISCV/zibi-valid.s
new file mode 100644
index 0000000..b062c4c
--- /dev/null
+++ b/llvm/test/MC/RISCV/zibi-valid.s
@@ -0,0 +1,63 @@
+# RUN: llvm-mc -triple=riscv32 -show-encoding --mattr=+experimental-zibi %s \
+# RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-ASM
+# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+experimental-zibi %s \
+# RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-ASM
+# RUN: not llvm-mc -triple=riscv32 -show-encoding %s 2>&1 \
+# RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+# RUN: not llvm-mc -triple=riscv64 -show-encoding %s 2>&1 \
+# RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+# RUN: llvm-mc -triple=riscv32 -filetype=obj --mattr=+experimental-zibi %s \
+# RUN:        | llvm-objdump -d --mattr=+experimental-zibi --no-print-imm-hex  - \
+# RUN:        | FileCheck %s --check-prefix=CHECK-OBJ
+# RUN: llvm-mc -triple=riscv32 -filetype=obj --mattr=+experimental-zibi %s \
+# RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-zibi %s \
+# RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+beqi a0, 1, 1024
+# CHECK-OBJ: beqi a0, 1, 0x400
+# CHECK-ASM: beqi a0, 1, 1024
+# CHECK-ENCODING: [0x63,0x20,0x15,0x40]
+# CHECK-ERROR: instruction requires the following: 'Zibi' (Branch with Immediate){{$}}
+# CHECK-UNKNOWN: 40152063 <unknown>
+beqi a5, -1, -1024
+# CHECK-OBJ: beqi a5, -1, 0xfffffc04
+# CHECK-ASM: beqi a5, -1, -1024
+# CHECK-ENCODING: [0xe3,0xa0,0x07,0xc0]
+# CHECK-ERROR: instruction requires the following: 'Zibi' (Branch with Immediate){{$}}
+# CHECK-UNKNOWN: c007a0e3 <unknown>
+beqi s0, 22, 0xffe
+# CHECK-OBJ: beqi s0, 22, 0x1006
+# CHECK-ASM: beqi s0, 22, 4094
+# CHECK-ENCODING: [0xe3,0x2f,0x64,0x7f]
+# CHECK-ERROR: instruction requires the following: 'Zibi' (Branch with Immediate){{$}}
+# CHECK-UNKNOWN: 7f642fe3 <unknown>
+beqi s1, 11, -4096
+# CHECK-OBJ: beqi s1, 11, 0xfffff00c
+# CHECK-ASM: beqi s1, 11, -4096
+# CHECK-ENCODING: [0x63,0xa0,0xb4,0x80]
+# CHECK-ERROR: instruction requires the following: 'Zibi' (Branch with Immediate){{$}}
+# CHECK-UNKNOWN: 80b4a063 <unknown>
+bnei a0, 1, 1024
+# CHECK-OBJ: bnei a0, 1, 0x410
+# CHECK-ASM: bnei a0, 1, 1024
+# CHECK-ENCODING: [0x63,0x30,0x15,0x40]
+# CHECK-ERROR: instruction requires the following: 'Zibi' (Branch with Immediate){{$}}
+# CHECK-UNKNOWN: 40153063 <unknown>
+bnei a5, -1, -1024
+# CHECK-OBJ: bnei a5, -1, 0xfffffc14
+# CHECK-ASM: bnei a5, -1, -1024
+# CHECK-ENCODING: [0xe3,0xb0,0x07,0xc0]
+# CHECK-ERROR: instruction requires the following: 'Zibi' (Branch with Immediate){{$}}
+# CHECK-UNKNOWN: c007b0e3 <unknown>
+bnei s0, 22, 0xffe
+# CHECK-OBJ: bnei s0, 22, 0x1016
+# CHECK-ASM: bnei s0, 22, 4094
+# CHECK-ENCODING: [0xe3,0x3f,0x64,0x7f]
+# CHECK-ERROR: instruction requires the following: 'Zibi' (Branch with Immediate){{$}}
+# CHECK-UNKNOWN: 7f643fe3 <unknown>
+bnei s1, 11, -4096
+# CHECK-OBJ: bnei s1, 11, 0xfffff01c
+# CHECK-ASM: bnei s1, 11, -4096
+# CHECK-ENCODING: [0x63,0xb0,0xb4,0x80]
+# CHECK-ERROR: instruction requires the following: 'Zibi' (Branch with Immediate){{$}}
+# CHECK-UNKNOWN: 80b4b063 <unknown>
diff --git a/llvm/test/ObjectYAML/DXContainer/RootSignature-Descriptor1.0.yaml b/llvm/test/ObjectYAML/DXContainer/RootSignature-Descriptor1.0.yaml
index 70dc3528..530ed79 100644
--- a/llvm/test/ObjectYAML/DXContainer/RootSignature-Descriptor1.0.yaml
+++ b/llvm/test/ObjectYAML/DXContainer/RootSignature-Descriptor1.0.yaml
@@ -19,8 +19,8 @@ Parts:
       NumStaticSamplers: 0
       StaticSamplersOffset: 44
       Parameters:         
-      - ParameterType: 2 # SRV
-        ShaderVisibility: 3 # Domain
+      - ParameterType: CBV
+        ShaderVisibility: Domain
         Descriptor:
           ShaderRegister: 31
           RegisterSpace: 32
@@ -36,8 +36,8 @@ Parts:
 # CHECK-NEXT:      NumStaticSamplers: 0
 # CHECK-NEXT:      StaticSamplersOffset: 44
 # CHECK-NEXT:      Parameters:         
-# CHECK-NEXT:      - ParameterType: 2
-# CHECK-NEXT:        ShaderVisibility: 3
+# CHECK-NEXT:      - ParameterType: CBV
+# CHECK-NEXT:        ShaderVisibility: Domain
 # CHECK-NEXT:        Descriptor:
 # CHECK-NEXT:          RegisterSpace: 32
 # CHECK-NEXT:          ShaderRegister: 31
diff --git a/llvm/test/ObjectYAML/DXContainer/RootSignature-Descriptor1.1.yaml b/llvm/test/ObjectYAML/DXContainer/RootSignature-Descriptor1.1.yaml
index 33a74db..2e8df2e 100644
--- a/llvm/test/ObjectYAML/DXContainer/RootSignature-Descriptor1.1.yaml
+++ b/llvm/test/ObjectYAML/DXContainer/RootSignature-Descriptor1.1.yaml
@@ -19,8 +19,8 @@ Parts:
       NumStaticSamplers: 0
       StaticSamplersOffset: 48
       Parameters:         
-      - ParameterType: 2 # SRV
-        ShaderVisibility: 3 # Domain
+      - ParameterType: CBV
+        ShaderVisibility: Domain
         Descriptor:
           ShaderRegister: 31
           RegisterSpace: 32
@@ -37,8 +37,8 @@ Parts:
 # CHECK-NEXT:      NumStaticSamplers: 0
 # CHECK-NEXT:      StaticSamplersOffset: 48
 # CHECK-NEXT:      Parameters:         
-# CHECK-NEXT:      - ParameterType: 2
-# CHECK-NEXT:        ShaderVisibility: 3
+# CHECK-NEXT:      - ParameterType: CBV
+# CHECK-NEXT:        ShaderVisibility: Domain
 # CHECK-NEXT:        Descriptor:
 # CHECK-NEXT:          RegisterSpace: 32
 # CHECK-NEXT:          ShaderRegister: 31
diff --git a/llvm/test/ObjectYAML/DXContainer/RootSignature-DescriptorTable1.0.yaml b/llvm/test/ObjectYAML/DXContainer/RootSignature-DescriptorTable1.0.yaml
index b04549f..88d941f 100644
--- a/llvm/test/ObjectYAML/DXContainer/RootSignature-DescriptorTable1.0.yaml
+++ b/llvm/test/ObjectYAML/DXContainer/RootSignature-DescriptorTable1.0.yaml
@@ -20,12 +20,12 @@ Parts:
     NumStaticSamplers: 0
     StaticSamplersOffset: 64
     Parameters:         
-    - ParameterType: 0 # SRV
-      ShaderVisibility: 3 # Domain
+    - ParameterType: DescriptorTable
+      ShaderVisibility: Domain
       Table:
         NumRanges: 1
         Ranges:
-          - RangeType: 0
+          - RangeType: SRV
             NumDescriptors: -1
             BaseShaderRegister: 42
             RegisterSpace: 43
@@ -42,13 +42,13 @@ Parts:
 # CHECK-NEXT:     NumStaticSamplers: 0
 # CHECK-NEXT:     StaticSamplersOffset: 64
 # CHECK-NEXT:     Parameters:         
-# CHECK-NEXT:     - ParameterType: 0
-# CHECK-NEXT:       ShaderVisibility: 3
+# CHECK-NEXT:     - ParameterType: DescriptorTable
+# CHECK-NEXT:       ShaderVisibility: Domain
 # CHECK-NEXT:       Table:
 # CHECK-NEXT:         NumRanges: 1
 # CHECK-NEXT:         RangesOffset: 44
 # CHECK-NEXT:         Ranges:
-# CHECK-NEXT:           - RangeType: 0
+# CHECK-NEXT:           - RangeType: SRV
 # CHECK-NEXT:             NumDescriptors: -1
 # CHECK-NEXT:             BaseShaderRegister: 42
 # CHECK-NEXT:             RegisterSpace: 43
diff --git a/llvm/test/ObjectYAML/DXContainer/RootSignature-DescriptorTable1.1.yaml b/llvm/test/ObjectYAML/DXContainer/RootSignature-DescriptorTable1.1.yaml
index d8f3990..c09726d 100644
--- a/llvm/test/ObjectYAML/DXContainer/RootSignature-DescriptorTable1.1.yaml
+++ b/llvm/test/ObjectYAML/DXContainer/RootSignature-DescriptorTable1.1.yaml
@@ -20,12 +20,12 @@ Parts:
     NumStaticSamplers: 0
     StaticSamplersOffset: 68
     Parameters:         
-    - ParameterType: 0 # SRV
-      ShaderVisibility: 3 # Domain
+    - ParameterType: DescriptorTable
+      ShaderVisibility: Domain
       Table:
         NumRanges: 1
         Ranges:
-          - RangeType: 0
+          - RangeType: SRV
             NumDescriptors: -1
             BaseShaderRegister: 42
             RegisterSpace: 43
@@ -43,13 +43,13 @@ Parts:
 # CHECK-NEXT:       NumStaticSamplers: 0
 # CHECK-NEXT:       StaticSamplersOffset: 68
 # CHECK-NEXT:       Parameters:
-# CHECK-NEXT:         - ParameterType:   0
-# CHECK-NEXT:           ShaderVisibility: 3
+# CHECK-NEXT:         - ParameterType: DescriptorTable
+# CHECK-NEXT:           ShaderVisibility: Domain
 # CHECK-NEXT:           Table:
 # CHECK-NEXT:             NumRanges:       1
 # CHECK-NEXT:             RangesOffset:    44
 # CHECK-NEXT:             Ranges:
-# CHECK-NEXT:               - RangeType:       0
+# CHECK-NEXT:               - RangeType:       SRV
 # CHECK-NEXT:                 NumDescriptors:  -1
 # CHECK-NEXT:                 BaseShaderRegister: 42
 # CHECK-NEXT:                 RegisterSpace:   43
diff --git a/llvm/test/ObjectYAML/DXContainer/RootSignature-Invalid-StaticSamplersOffset.yaml b/llvm/test/ObjectYAML/DXContainer/RootSignature-Invalid-StaticSamplersOffset.yaml
index e805526..1322a4e 100644
--- a/llvm/test/ObjectYAML/DXContainer/RootSignature-Invalid-StaticSamplersOffset.yaml
+++ b/llvm/test/ObjectYAML/DXContainer/RootSignature-Invalid-StaticSamplersOffset.yaml
@@ -21,8 +21,8 @@ Parts:
       NumStaticSamplers: 0
       StaticSamplersOffset: 0
       Parameters:
-      - ParameterType: 2
-        ShaderVisibility: 3
+      - ParameterType: SRV
+        ShaderVisibility: Domain
         Descriptor:
           ShaderRegister: 31
           RegisterSpace: 32
diff --git a/llvm/test/ObjectYAML/DXContainer/RootSignature-MultipleParameters.yaml b/llvm/test/ObjectYAML/DXContainer/RootSignature-MultipleParameters.yaml
index 26d5653..684ada4 100644
--- a/llvm/test/ObjectYAML/DXContainer/RootSignature-MultipleParameters.yaml
+++ b/llvm/test/ObjectYAML/DXContainer/RootSignature-MultipleParameters.yaml
@@ -19,30 +19,30 @@ Parts:
       NumStaticSamplers: 0
       StaticSamplersOffset: 140
       Parameters:         
-      - ParameterType: 1 # Constants32Bit
-        ShaderVisibility: 2 # Hull
+      - ParameterType: Constants32Bit
+        ShaderVisibility: Hull
         Constants:
           Num32BitValues: 16
           ShaderRegister: 15
           RegisterSpace: 14
-      - ParameterType: 1 # Constants32Bit
-        ShaderVisibility: 4 # Geometry
+      - ParameterType: Constants32Bit
+        ShaderVisibility: Geometry
         Constants:
           Num32BitValues: 21
           ShaderRegister: 22
           RegisterSpace: 23     
-      - ParameterType: 2 # SRV
-        ShaderVisibility: 3 # Domain
+      - ParameterType: SRV
+        ShaderVisibility: Domain
         Descriptor:
           ShaderRegister: 31
           RegisterSpace: 32
           DATA_STATIC_WHILE_SET_AT_EXECUTE: true
-      - ParameterType: 0 # SRV
-        ShaderVisibility: 3 # Domain
+      - ParameterType: DescriptorTable
+        ShaderVisibility: Domain
         Table:
           NumRanges: 1
           Ranges:
-            - RangeType: 0
+            - RangeType: SRV
               NumDescriptors: -1
               BaseShaderRegister: 42
               RegisterSpace: 43
@@ -60,31 +60,31 @@ Parts:
 # CHECK-NEXT:      NumStaticSamplers: 0
 # CHECK-NEXT:      StaticSamplersOffset: 140
 # CHECK-NEXT:      Parameters:
-# CHECK-NEXT:        - ParameterType:   1
-# CHECK-NEXT:          ShaderVisibility: 2
+# CHECK-NEXT:        - ParameterType:   Constants32Bit
+# CHECK-NEXT:          ShaderVisibility: Hull
 # CHECK-NEXT:          Constants:
 # CHECK-NEXT:            Num32BitValues:  16
 # CHECK-NEXT:            RegisterSpace:   14
 # CHECK-NEXT:            ShaderRegister:  15
-# CHECK-NEXT:        - ParameterType:   1
-# CHECK-NEXT:          ShaderVisibility: 4
+# CHECK-NEXT:        - ParameterType:   Constants32Bit
+# CHECK-NEXT:          ShaderVisibility: Geometry
 # CHECK-NEXT:          Constants:
 # CHECK-NEXT:            Num32BitValues:  21
 # CHECK-NEXT:            RegisterSpace:   23
 # CHECK-NEXT:            ShaderRegister:  22
-# CHECK-NEXT:        - ParameterType:   2
-# CHECK-NEXT:          ShaderVisibility: 3
+# CHECK-NEXT:        - ParameterType:   SRV
+# CHECK-NEXT:          ShaderVisibility: Domain
 # CHECK-NEXT:          Descriptor:
 # CHECK-NEXT:            RegisterSpace:   32
 # CHECK-NEXT:            ShaderRegister:  31
 # CHECK-NEXT:            DATA_STATIC_WHILE_SET_AT_EXECUTE: true
-# CHECK-NEXT:        - ParameterType:   0
-# CHECK-NEXT:          ShaderVisibility: 3
+# CHECK-NEXT:        - ParameterType:   DescriptorTable
+# CHECK-NEXT:          ShaderVisibility: Domain
 # CHECK-NEXT:          Table:
 # CHECK-NEXT:            NumRanges:       1
 # CHECK-NEXT:            RangesOffset:    116
 # CHECK-NEXT:            Ranges:
-# CHECK-NEXT:              - RangeType:       0
+# CHECK-NEXT:              - RangeType:       SRV
 # CHECK-NEXT:                NumDescriptors:  -1
 # CHECK-NEXT:                BaseShaderRegister: 42
 # CHECK-NEXT:                RegisterSpace:   43
diff --git a/llvm/test/ObjectYAML/DXContainer/RootSignature-OptionalOffsets.yaml b/llvm/test/ObjectYAML/DXContainer/RootSignature-OptionalOffsets.yaml
index 88d7c63..00bc190 100644
--- a/llvm/test/ObjectYAML/DXContainer/RootSignature-OptionalOffsets.yaml
+++ b/llvm/test/ObjectYAML/DXContainer/RootSignature-OptionalOffsets.yaml
@@ -31,24 +31,24 @@ Parts:
       NumRootParameters: 3
       NumStaticSamplers: 0
       Parameters:
-      - ParameterType: 1 # RootConstants
-        ShaderVisibility: 0
+      - ParameterType: Constants32Bit
+        ShaderVisibility: All
         Constants:
           Num32BitValues: 16
           ShaderRegister: 15
           RegisterSpace: 14
-      - ParameterType: 2 # SRV
-        ShaderVisibility: 0
+      - ParameterType: SRV
+        ShaderVisibility: All
         Descriptor:
           ShaderRegister: 31
           RegisterSpace: 32
           DATA_STATIC_WHILE_SET_AT_EXECUTE: true
-      - ParameterType: 0 # Descriptor Table
-        ShaderVisibility: 0
+      - ParameterType: DescriptorTable
+        ShaderVisibility: All
         Table:
           NumRanges: 1
           Ranges:
-            - RangeType: 0 # CBV
+            - RangeType: CBuffer
               NumDescriptors: -1
               BaseShaderRegister: 42
               RegisterSpace: 43
diff --git a/llvm/test/ObjectYAML/DXContainer/RootSignature-StaticSamplerOffset1.0.yaml b/llvm/test/ObjectYAML/DXContainer/RootSignature-StaticSamplerOffset1.0.yaml
index 347d8f3..eb94086 100644
--- a/llvm/test/ObjectYAML/DXContainer/RootSignature-StaticSamplerOffset1.0.yaml
+++ b/llvm/test/ObjectYAML/DXContainer/RootSignature-StaticSamplerOffset1.0.yaml
@@ -33,24 +33,24 @@ Parts:
       NumStaticSamplers: 0
       StaticSamplersOffset: 108
       Parameters:
-      - ParameterType: 1 # RootConstants
-        ShaderVisibility: 0
+      - ParameterType: Constants32Bit
+        ShaderVisibility: All
         Constants:
           Num32BitValues: 16
           ShaderRegister: 15
           RegisterSpace: 14
-      - ParameterType: 2 # SRV
-        ShaderVisibility: 0
+      - ParameterType: CBV
+        ShaderVisibility: All
         Descriptor:
           ShaderRegister: 31
           RegisterSpace: 32
           DATA_STATIC_WHILE_SET_AT_EXECUTE: true
-      - ParameterType: 0 # Descriptor Table
-        ShaderVisibility: 0
+      - ParameterType: DescriptorTable
+        ShaderVisibility: All
         Table:
           NumRanges: 1
           Ranges:
-            - RangeType: 0 # CBV
+            - RangeType: CBuffer
               NumDescriptors: -1
               BaseShaderRegister: 42
               RegisterSpace: 43
diff --git a/llvm/test/ObjectYAML/DXContainer/RootSignature-StaticSamplerOffset1.1.yaml b/llvm/test/ObjectYAML/DXContainer/RootSignature-StaticSamplerOffset1.1.yaml
index 8e03e1a..73e89c2 100644
--- a/llvm/test/ObjectYAML/DXContainer/RootSignature-StaticSamplerOffset1.1.yaml
+++ b/llvm/test/ObjectYAML/DXContainer/RootSignature-StaticSamplerOffset1.1.yaml
@@ -33,24 +33,24 @@ Parts:
       NumStaticSamplers: 0
       StaticSamplersOffset: 116
       Parameters:
-      - ParameterType: 1 # RootConstants
-        ShaderVisibility: 0
+      - ParameterType: Constants32Bit
+        ShaderVisibility: All
         Constants:
           Num32BitValues: 16
           ShaderRegister: 15
           RegisterSpace: 14
-      - ParameterType: 2 # SRV
-        ShaderVisibility: 0
+      - ParameterType: SRV
+        ShaderVisibility: All
         Descriptor:
           ShaderRegister: 31
           RegisterSpace: 32
           DATA_STATIC_WHILE_SET_AT_EXECUTE: true
-      - ParameterType: 0 # Descriptor Table
-        ShaderVisibility: 0
+      - ParameterType: DescriptorTable
+        ShaderVisibility: All
         Table:
           NumRanges: 1
           Ranges:
-            - RangeType: 0 # CBV
+            - RangeType: CBuffer
               NumDescriptors: -1
               BaseShaderRegister: 42
               RegisterSpace: 43
diff --git a/llvm/test/ObjectYAML/DXContainer/RootSignature-StaticSamplers-Defaults.yaml b/llvm/test/ObjectYAML/DXContainer/RootSignature-StaticSamplers-Defaults.yaml
index 5df7da8..a45e3b0 100644
--- a/llvm/test/ObjectYAML/DXContainer/RootSignature-StaticSamplers-Defaults.yaml
+++ b/llvm/test/ObjectYAML/DXContainer/RootSignature-StaticSamplers-Defaults.yaml
@@ -22,7 +22,7 @@ Parts:
       Samplers: 
         - ShaderRegister: 31 
           RegisterSpace: 32
-          ShaderVisibility:  7
+          ShaderVisibility:  Mesh
       AllowInputAssemblerInputLayout: true
       DenyGeometryShaderRootAccess: true
 
@@ -36,18 +36,18 @@ Parts:
 #CHECK-NEXT:      StaticSamplersOffset: 24
 #CHECK-NEXT:      Parameters:      []
 #CHECK-NEXT:      Samplers:
-#CHECK-NEXT:      - Filter:          85
-#CHECK-NEXT:        AddressU:        1
-#CHECK-NEXT:        AddressV:        1
-#CHECK-NEXT:        AddressW:        1
+#CHECK-NEXT:      - Filter:          Anisotropic
+#CHECK-NEXT:        AddressU:        Wrap
+#CHECK-NEXT:        AddressV:        Wrap
+#CHECK-NEXT:        AddressW:        Wrap
 #CHECK-NEXT:        MipLODBias:      0
 #CHECK-NEXT:        MaxAnisotropy:   16
-#CHECK-NEXT:        ComparisonFunc:  4
-#CHECK-NEXT:        BorderColor:     2
+#CHECK-NEXT:        ComparisonFunc:  LessEqual
+#CHECK-NEXT:        BorderColor:     OpaqueWhite
 #CHECK-NEXT:        MinLOD:          0
 #CHECK-NEXT:        MaxLOD:          3.40282e+38
 #CHECK-NEXT:        ShaderRegister:  31
 #CHECK-NEXT:        RegisterSpace:   32
-#CHECK-NEXT:        ShaderVisibility: 7
+#CHECK-NEXT:        ShaderVisibility: Mesh
 #CHECK-NEXT:      AllowInputAssemblerInputLayout: true
 #CHECK-NEXT:      DenyGeometryShaderRootAccess: true
diff --git a/llvm/test/ObjectYAML/DXContainer/RootSignature-StaticSamplers.yaml b/llvm/test/ObjectYAML/DXContainer/RootSignature-StaticSamplers.yaml
index 888a32b..7454731 100644
--- a/llvm/test/ObjectYAML/DXContainer/RootSignature-StaticSamplers.yaml
+++ b/llvm/test/ObjectYAML/DXContainer/RootSignature-StaticSamplers.yaml
@@ -20,19 +20,19 @@ Parts:
       StaticSamplersOffset: 24
       Parameters: []
       Samplers: 
-        - Filter: 16 
-          AddressU: 1
-          AddressV: 2
-          AddressW: 5
+        - Filter: MinLinearMagMipPoint 
+          AddressU: Wrap
+          AddressV: Mirror
+          AddressW: MirrorOnce
           MipLODBias: 1.23
           MaxAnisotropy: 20
-          ComparisonFunc: 4
-          BorderColor: 0
+          ComparisonFunc: LessEqual
+          BorderColor: TransparentBlack
           MinLOD: 4.56
           MaxLOD: 8.90
           ShaderRegister: 31 
           RegisterSpace: 32
-          ShaderVisibility:  7
+          ShaderVisibility:  Mesh
       AllowInputAssemblerInputLayout: true
       DenyGeometryShaderRootAccess: true
 
@@ -46,18 +46,18 @@ Parts:
 #CHECK-NEXT:      StaticSamplersOffset: 24
 #CHECK-NEXT:      Parameters:      []
 #CHECK-NEXT:      Samplers:
-#CHECK-NEXT:        - Filter:          16
-#CHECK-NEXT:          AddressU:        1
-#CHECK-NEXT:          AddressV:        2
-#CHECK-NEXT:          AddressW:        5
+#CHECK-NEXT:        - Filter:          MinLinearMagMipPoint
+#CHECK-NEXT:          AddressU:        Wrap
+#CHECK-NEXT:          AddressV:        Mirror
+#CHECK-NEXT:          AddressW:        MirrorOnce
 #CHECK-NEXT:          MipLODBias:      1.23
 #CHECK-NEXT:          MaxAnisotropy:   20
-#CHECK-NEXT:          ComparisonFunc:  4
-#CHECK-NEXT:          BorderColor:     0
+#CHECK-NEXT:          ComparisonFunc:  LessEqual
+#CHECK-NEXT:          BorderColor:     TransparentBlack
 #CHECK-NEXT:          MinLOD:          4.56
 #CHECK-NEXT:          MaxLOD:          8.9
 #CHECK-NEXT:          ShaderRegister:  31
 #CHECK-NEXT:          RegisterSpace:   32
-#CHECK-NEXT:          ShaderVisibility: 7
+#CHECK-NEXT:          ShaderVisibility: Mesh
 #CHECK-NEXT:      AllowInputAssemblerInputLayout: true
 #CHECK-NEXT:      DenyGeometryShaderRootAccess: true
diff --git a/llvm/test/TableGen/FixedLenDecoderEmitter/InitValue.td b/llvm/test/TableGen/FixedLenDecoderEmitter/InitValue.td
deleted file mode 100644
index 0384743..0000000
--- a/llvm/test/TableGen/FixedLenDecoderEmitter/InitValue.td
+++ /dev/null
@@ -1,46 +0,0 @@
-// RUN: llvm-tblgen -gen-disassembler -I %p/../../../include %s | FileCheck %s
-
-include "llvm/Target/Target.td"
-
-def archInstrInfo : InstrInfo { }
-
-def arch : Target {
-    let InstructionSet = archInstrInfo;
-}
-
-let OutOperandList = (outs), Size = 2 in {
-
-def foo : Instruction {
-    let InOperandList = (ins i32imm:$factor);
-    field bits<16> Inst;
-    field bits<16> SoftFail = 0;
-    bits<8> factor;
-    let factor{0} = 0; // zero initial value
-    let Inst{15...8} = factor{7...0};
-    }
-
-def bar : Instruction {
-    let InOperandList = (ins i32imm:$factor);
-    field bits<16> Inst;
-    field bits<16> SoftFail = 0;
-    bits<8> factor;
-    let factor{0} = 1; // non-zero initial value
-    let Inst{15...8} = factor{7...0};
-    }
-
-def bax : Instruction {
-    let InOperandList = (ins i32imm:$factor);
-    field bits<16> Inst;
-    field bits<16> SoftFail = 0;
-    bits<33> factor;
-    let factor{32} = 1; // non-zero initial value
-    let Inst{15...8} = factor{32...25};
-    }
-
-}
-
-// CHECK: tmp = fieldFromInstruction(insn, 9, 7) << 1;
-// CHECK: tmp = 0x1;
-// CHECK: insertBits(tmp, fieldFromInstruction(insn, 9, 7), 1, 7);
-// CHECK: tmp = 0x100000000;
-// CHECK: insertBits(tmp, fieldFromInstruction(insn, 8, 7), 25, 7);
diff --git a/llvm/test/TableGen/FixedLenDecoderEmitter/operand-decoder.td b/llvm/test/TableGen/FixedLenDecoderEmitter/operand-decoder.td
new file mode 100644
index 0000000..f281996
--- /dev/null
+++ b/llvm/test/TableGen/FixedLenDecoderEmitter/operand-decoder.td
@@ -0,0 +1,66 @@
+// RUN: llvm-tblgen -gen-disassembler -I %p/../../../include %s | FileCheck %s
+
+include "llvm/Target/Target.td"
+
+def R0 : Register<"r0">;
+def RC : RegisterClass<"MyTarget", [i32], 32, (add R0)>;
+
+def MyInstrInfo : InstrInfo;
+
+def MyTarget : Target {
+  let InstructionSet = MyInstrInfo;
+}
+
+// CHECK-LABEL: case 0:
+// CHECK-NEXT:    if (!Check(S, DecodeRCRegisterClass(MI, Decoder)))
+// CHECK-NEXT:      return MCDisassembler::Fail;
+// CHECK-NEXT:    tmp = fieldFromInstruction(insn, 2, 4);
+// CHECK-NEXT:    MI.addOperand(MCOperand::createImm(tmp));
+// CHECK-NEXT:    tmp = 0x0;
+// CHECK-NEXT:    insertBits(tmp, fieldFromInstruction(insn, 0, 2), 0, 2);
+// CHECK-NEXT:    insertBits(tmp, fieldFromInstruction(insn, 6, 2), 2, 2);
+// CHECK-NEXT:    MI.addOperand(MCOperand::createImm(tmp));
+// CHECK-NEXT:    tmp = 0x0;
+// CHECK-NEXT:    MI.addOperand(MCOperand::createImm(tmp));
+// CHECK-NEXT:    tmp = fieldFromInstruction(insn, 13, 2) << 1;
+// CHECK-NEXT:    MI.addOperand(MCOperand::createImm(tmp));
+// CHECK-NEXT:    tmp = 0x0;
+// CHECK-NEXT:    insertBits(tmp, fieldFromInstruction(insn, 17, 1), 1, 1);
+// CHECK-NEXT:    insertBits(tmp, fieldFromInstruction(insn, 19, 1), 3, 1);
+// CHECK-NEXT:    MI.addOperand(MCOperand::createImm(tmp));
+// CHECK-NEXT:    tmp = 0x5;
+// CHECK-NEXT:    MI.addOperand(MCOperand::createImm(tmp));
+// CHECK-NEXT:    tmp = 0x2;
+// CHECK-NEXT:    insertBits(tmp, fieldFromInstruction(insn, 26, 2), 2, 2);
+// CHECK-NEXT:    MI.addOperand(MCOperand::createImm(tmp));
+// CHECK-NEXT:    tmp = 0xa;
+// CHECK-NEXT:    insertBits(tmp, fieldFromInstruction(insn, 28, 1), 0, 1);
+// CHECK-NEXT:    insertBits(tmp, fieldFromInstruction(insn, 30, 1), 2, 1);
+// CHECK-NEXT:    MI.addOperand(MCOperand::createImm(tmp));
+// CHECK-NEXT:    return S;
+
+def I : Instruction {
+  let OutOperandList = (outs RC:$op0);
+  let InOperandList = (ins i32imm:$op1, i32imm:$op2, i32imm:$op3, i32imm:$op4,
+                           i32imm:$op5, i32imm:$op6, i32imm:$op7, i32imm:$op8);
+  let Size = 4;
+  bits<32> Inst;
+  bits<0> op0;                  // no init, no variable parts
+  bits<4> op1;                  // no init, 1 variable part
+  bits<4> op2;                  // no init, 2 variable parts
+  bits<4> op3 = 0b0000;         // zero init, no variable parts
+  bits<4> op4 = {0, ?, ?, 0};   // zero init, 1 variable part
+  bits<4> op5 = {?, 0, ?, 0};   // zero init, 2 variable parts
+  bits<4> op6 = 0b0101;         // non-zero init, no variable parts
+  bits<4> op7 = {?, ?, 1, 0};   // non-zero init, 1 variable part
+  bits<4> op8 = {1, ?, 1, ?};   // non-zero init, 2 variable parts
+  let Inst{5...2} = op1;
+  let Inst{1...0} = op2{1...0};
+  let Inst{7...6} = op2{3...2};
+  let Inst{11...8} = op3;
+  let Inst{15...12} = op4;
+  let Inst{19...16} = op5;
+  let Inst{23...20} = op6;
+  let Inst{27...24} = op7;
+  let Inst{31...28} = op8;
+}
diff --git a/llvm/test/TableGen/intrinsic-attrs.td b/llvm/test/TableGen/intrinsic-attrs.td
index bcded0cd2..ab808445 100644
--- a/llvm/test/TableGen/intrinsic-attrs.td
+++ b/llvm/test/TableGen/intrinsic-attrs.td
@@ -25,8 +25,8 @@ def int_deref_ptr_ret : Intrinsic<[llvm_ptr_ty], [], [Dereferenceable<RetIndex,
 // CHECK-NEXT: });
 
 // CHECK: static constexpr uint16_t IntrinsicsToAttributesMap[] = {
-// CHECK: 0 << 8 | 0, // llvm.deref.ptr.ret
-// CHECK: 1 << 8 | 1, // llvm.random.gen
+// CHECK: 0 << 2 | 0, // llvm.deref.ptr.ret
+// CHECK: 1 << 2 | 1, // llvm.random.gen
 // CHECK: }; // IntrinsicsToAttributesMap
 
 // CHECK: static constexpr ArgNoAttrIDPair ArgAttrIdTable[] = {
diff --git a/llvm/test/Transforms/ArgumentPromotion/reserve-tbaa.ll b/llvm/test/Transforms/ArgumentPromotion/reserve-tbaa.ll
index f60dd48..a18c3ba 100644
--- a/llvm/test/Transforms/ArgumentPromotion/reserve-tbaa.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/reserve-tbaa.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --version 6
 ; RUN: opt < %s -passes=argpromotion -S | FileCheck %s
 
 ; PR17906
@@ -14,12 +14,12 @@
 @d = global i8 0, align 1
 
 define internal fastcc void @fn(ptr nocapture readonly %p1, ptr nocapture readonly %p2) {
-; CHECK-LABEL: define {{[^@]+}}@fn
-; CHECK-SAME: (i32 [[P1_0_VAL:%.*]], i64 [[P2_0_VAL:%.*]]) {
-; CHECK-NEXT:  entry:
+; CHECK-LABEL: define internal fastcc void @fn(
+; CHECK-SAME: i32 [[P1_0_VAL:%.*]], i64 [[P2_0_VAL:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[CONV:%.*]] = trunc i64 [[P2_0_VAL]] to i32
 ; CHECK-NEXT:    [[CONV1:%.*]] = trunc i32 [[P1_0_VAL]] to i8
-; CHECK-NEXT:    store i8 [[CONV1]], ptr @d, align 1, !tbaa [[TBAA0:![0-9]+]]
+; CHECK-NEXT:    store i8 [[CONV1]], ptr @d, align 1, !tbaa [[CHAR_TBAA0:![0-9]+]]
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -32,14 +32,14 @@ entry:
 }
 
 define i32 @main() {
-; CHECK-LABEL: define {{[^@]+}}@main() {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr @e, align 8, !tbaa [[TBAA3:![0-9]+]]
-; CHECK-NEXT:    store ptr @g, ptr [[TMP0]], align 8, !tbaa [[TBAA3]]
-; CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr @a, align 8, !tbaa [[TBAA3]]
-; CHECK-NEXT:    store i32 1, ptr [[TMP1]], align 4, !tbaa [[TBAA5:![0-9]+]]
-; CHECK-NEXT:    [[G_VAL:%.*]] = load i32, ptr @g, align 4, !tbaa [[TBAA5]]
-; CHECK-NEXT:    [[C_VAL:%.*]] = load i64, ptr @c, align 8, !tbaa [[TBAA7:![0-9]+]]
+; CHECK-LABEL: define i32 @main() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr @e, align 8, !tbaa [[ANYPTR_TBAA3:![0-9]+]]
+; CHECK-NEXT:    store ptr @g, ptr [[TMP0]], align 8, !tbaa [[ANYPTR_TBAA3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr @a, align 8, !tbaa [[ANYPTR_TBAA3]]
+; CHECK-NEXT:    store i32 1, ptr [[TMP1]], align 4, !tbaa [[INT_TBAA5:![0-9]+]]
+; CHECK-NEXT:    [[G_VAL:%.*]] = load i32, ptr @g, align 4, !tbaa [[INT_TBAA5]]
+; CHECK-NEXT:    [[C_VAL:%.*]] = load i64, ptr @c, align 8, !tbaa [[LONG_TBAA7:![0-9]+]]
 ; CHECK-NEXT:    call fastcc void @fn(i32 [[G_VAL]], i64 [[C_VAL]])
 ; CHECK-NEXT:    ret i32 0
 ;
@@ -63,3 +63,14 @@ entry:
 !8 = !{!9, !9, i64 0}
 !9 = !{!"any pointer", !3, i64 0}
 
+;.
+; CHECK: [[CHAR_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
+; CHECK: [[META1]] = !{!"omnipotent char", [[META2:![0-9]+]], i64 0}
+; CHECK: [[META2]] = !{!"Simple C/C++ TBAA"}
+; CHECK: [[ANYPTR_TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0}
+; CHECK: [[META4]] = !{!"any pointer", [[META1]], i64 0}
+; CHECK: [[INT_TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0}
+; CHECK: [[META6]] = !{!"int", [[META1]], i64 0}
+; CHECK: [[LONG_TBAA7]] = !{[[META8:![0-9]+]], [[META8]], i64 0}
+; CHECK: [[META8]] = !{!"long", [[META1]], i64 0}
+;.
diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/reserve-tbaa.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/reserve-tbaa.ll
index bed0389..c27f827 100644
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/reserve-tbaa.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/reserve-tbaa.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 6
 ; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal  -attributor-annotate-decl-cs  -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT
 ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal  -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC
 
@@ -23,12 +23,12 @@
 ;.
 define internal fastcc void @fn(ptr nocapture readonly %p1, ptr nocapture readonly %p2) {
 ; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(readwrite, argmem: none)
-; CHECK-LABEL: define {{[^@]+}}@fn
-; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr @g, align 4, !tbaa [[TBAA0:![0-9]+]]
+; CHECK-LABEL: define internal fastcc void @fn(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr @g, align 4, !tbaa [[INT_TBAA0:![0-9]+]]
 ; CHECK-NEXT:    [[CONV1:%.*]] = trunc i32 [[TMP0]] to i8
-; CHECK-NEXT:    store i8 [[CONV1]], ptr @d, align 1, !tbaa [[TBAA4:![0-9]+]]
+; CHECK-NEXT:    store i8 [[CONV1]], ptr @d, align 1, !tbaa [[CHAR_TBAA4:![0-9]+]]
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -42,24 +42,24 @@ entry:
 
 define i32 @main() {
 ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn
-; TUNIT-LABEL: define {{[^@]+}}@main
-; TUNIT-SAME: () #[[ATTR1:[0-9]+]] {
-; TUNIT-NEXT:  entry:
-; TUNIT-NEXT:    [[TMP0:%.*]] = load ptr, ptr @e, align 8, !tbaa [[TBAA5:![0-9]+]]
-; TUNIT-NEXT:    store ptr @g, ptr [[TMP0]], align 8, !tbaa [[TBAA5]]
-; TUNIT-NEXT:    [[TMP1:%.*]] = load ptr, ptr @a, align 8, !tbaa [[TBAA5]]
-; TUNIT-NEXT:    store i32 1, ptr [[TMP1]], align 4, !tbaa [[TBAA0]]
+; TUNIT-LABEL: define noundef i32 @main(
+; TUNIT-SAME: ) #[[ATTR1:[0-9]+]] {
+; TUNIT-NEXT:  [[ENTRY:.*:]]
+; TUNIT-NEXT:    [[TMP0:%.*]] = load ptr, ptr @e, align 8, !tbaa [[ANYPTR_TBAA5:![0-9]+]]
+; TUNIT-NEXT:    store ptr @g, ptr [[TMP0]], align 8, !tbaa [[ANYPTR_TBAA5]]
+; TUNIT-NEXT:    [[TMP1:%.*]] = load ptr, ptr @a, align 8, !tbaa [[ANYPTR_TBAA5]]
+; TUNIT-NEXT:    store i32 1, ptr [[TMP1]], align 4, !tbaa [[INT_TBAA0]]
 ; TUNIT-NEXT:    call fastcc void @fn() #[[ATTR2:[0-9]+]]
 ; TUNIT-NEXT:    ret i32 0
 ;
 ; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn
-; CGSCC-LABEL: define {{[^@]+}}@main
-; CGSCC-SAME: () #[[ATTR1:[0-9]+]] {
-; CGSCC-NEXT:  entry:
-; CGSCC-NEXT:    [[TMP0:%.*]] = load ptr, ptr @e, align 8, !tbaa [[TBAA5:![0-9]+]]
-; CGSCC-NEXT:    store ptr @g, ptr [[TMP0]], align 8, !tbaa [[TBAA5]]
-; CGSCC-NEXT:    [[TMP1:%.*]] = load ptr, ptr @a, align 8, !tbaa [[TBAA5]]
-; CGSCC-NEXT:    store i32 1, ptr [[TMP1]], align 4, !tbaa [[TBAA0]]
+; CGSCC-LABEL: define noundef i32 @main(
+; CGSCC-SAME: ) #[[ATTR1:[0-9]+]] {
+; CGSCC-NEXT:  [[ENTRY:.*:]]
+; CGSCC-NEXT:    [[TMP0:%.*]] = load ptr, ptr @e, align 8, !tbaa [[ANYPTR_TBAA5:![0-9]+]]
+; CGSCC-NEXT:    store ptr @g, ptr [[TMP0]], align 8, !tbaa [[ANYPTR_TBAA5]]
+; CGSCC-NEXT:    [[TMP1:%.*]] = load ptr, ptr @a, align 8, !tbaa [[ANYPTR_TBAA5]]
+; CGSCC-NEXT:    store i32 1, ptr [[TMP1]], align 4, !tbaa [[INT_TBAA0]]
 ; CGSCC-NEXT:    call fastcc void @fn() #[[ATTR2:[0-9]+]]
 ; CGSCC-NEXT:    ret i32 0
 ;
@@ -92,19 +92,19 @@ entry:
 ; CGSCC: attributes #[[ATTR1]] = { mustprogress nofree nosync nounwind willreturn }
 ; CGSCC: attributes #[[ATTR2]] = { nofree nounwind willreturn }
 ;.
-; TUNIT: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
+; TUNIT: [[INT_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
 ; TUNIT: [[META1]] = !{!"int", [[META2:![0-9]+]], i64 0}
 ; TUNIT: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0}
 ; TUNIT: [[META3]] = !{!"Simple C/C++ TBAA"}
-; TUNIT: [[TBAA4]] = !{[[META2]], [[META2]], i64 0}
-; TUNIT: [[TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0}
+; TUNIT: [[CHAR_TBAA4]] = !{[[META2]], [[META2]], i64 0}
+; TUNIT: [[ANYPTR_TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0}
 ; TUNIT: [[META6]] = !{!"any pointer", [[META2]], i64 0}
 ;.
-; CGSCC: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
+; CGSCC: [[INT_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
 ; CGSCC: [[META1]] = !{!"int", [[META2:![0-9]+]], i64 0}
 ; CGSCC: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0}
 ; CGSCC: [[META3]] = !{!"Simple C/C++ TBAA"}
-; CGSCC: [[TBAA4]] = !{[[META2]], [[META2]], i64 0}
-; CGSCC: [[TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0}
+; CGSCC: [[CHAR_TBAA4]] = !{[[META2]], [[META2]], i64 0}
+; CGSCC: [[ANYPTR_TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0}
 ; CGSCC: [[META6]] = !{!"any pointer", [[META2]], i64 0}
 ;.
diff --git a/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll b/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll
index 82bed0f..3e07fe4 100644
--- a/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll
+++ b/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 6
 ; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal  -attributor-annotate-decl-cs  -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT
 ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal  -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC
 ;
@@ -66,10 +66,10 @@
 ;.
 define void @write_arg(ptr %p, i32 %v) {
 ; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write)
-; CHECK-LABEL: define {{[^@]+}}@write_arg
-; CHECK-SAME: (ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(4) [[P:%.*]], i32 [[V:%.*]]) #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    store i32 [[V]], ptr [[P]], align 4, !tbaa [[TBAA3:![0-9]+]]
+; CHECK-LABEL: define void @write_arg(
+; CHECK-SAME: ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(4) [[P:%.*]], i32 [[V:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    store i32 [[V]], ptr [[P]], align 4, !tbaa [[INT_TBAA3:![0-9]+]]
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -78,11 +78,11 @@ entry:
 }
 
 define void @write_random(ptr %p) {
-; CHECK-LABEL: define {{[^@]+}}@write_random
-; CHECK-SAME: (ptr nofree writeonly captures(none) [[P:%.*]]) {
-; CHECK-NEXT:  entry:
+; CHECK-LABEL: define void @write_random(
+; CHECK-SAME: ptr nofree writeonly captures(none) [[P:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[CALL:%.*]] = call i32 (...) @random()
-; CHECK-NEXT:    store i32 [[CALL]], ptr [[P]], align 4, !tbaa [[TBAA3]]
+; CHECK-NEXT:    store i32 [[CALL]], ptr [[P]], align 4, !tbaa [[INT_TBAA3]]
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -112,9 +112,9 @@ declare i32 @random(...)
 ;    }
 define void @local_alloca_simplifiable_1(ptr noalias sret(%struct.S) align 4 %agg.result) {
 ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite)
-; TUNIT-LABEL: define {{[^@]+}}@local_alloca_simplifiable_1
-; TUNIT-SAME: (ptr noalias nofree writeonly sret([[STRUCT_S:%.*]]) align 4 captures(none) dereferenceable_or_null(24) [[AGG_RESULT:%.*]]) #[[ATTR1:[0-9]+]] {
-; TUNIT-NEXT:  entry:
+; TUNIT-LABEL: define void @local_alloca_simplifiable_1(
+; TUNIT-SAME: ptr noalias nofree writeonly sret([[STRUCT_S:%.*]]) align 4 captures(none) dereferenceable_or_null(24) [[AGG_RESULT:%.*]]) #[[ATTR1:[0-9]+]] {
+; TUNIT-NEXT:  [[ENTRY:.*:]]
 ; TUNIT-NEXT:    [[S:%.*]] = alloca [[STRUCT_S]], align 4
 ; TUNIT-NEXT:    call void @llvm.lifetime.start.p0(ptr noalias nofree noundef nonnull align 4 captures(none) dereferenceable(24) [[S]]) #[[ATTR17:[0-9]+]]
 ; TUNIT-NEXT:    [[F1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 3
@@ -126,65 +126,65 @@ define void @local_alloca_simplifiable_1(ptr noalias sret(%struct.S) align 4 %ag
 ; TUNIT-NEXT:    [[I3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 2
 ; TUNIT-NEXT:    call void @write_arg(ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(16) [[I3]], i32 noundef 3) #[[ATTR18]]
 ; TUNIT-NEXT:    [[F12:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 3
-; TUNIT-NEXT:    store float 0x3FF19999A0000000, ptr [[F12]], align 4, !tbaa [[TBAA7:![0-9]+]]
+; TUNIT-NEXT:    store float 0x3FF19999A0000000, ptr [[F12]], align 4, !tbaa [[FLOAT_TBAA7:![0-9]+]]
 ; TUNIT-NEXT:    [[F24:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 4
-; TUNIT-NEXT:    store float 0x40119999A0000000, ptr [[F24]], align 4, !tbaa [[TBAA10:![0-9]+]]
+; TUNIT-NEXT:    store float 0x40119999A0000000, ptr [[F24]], align 4, !tbaa [[FLOAT_TBAA10:![0-9]+]]
 ; TUNIT-NEXT:    [[F37:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 5
-; TUNIT-NEXT:    store float 0x40119999A0000000, ptr [[F37]], align 4, !tbaa [[TBAA11:![0-9]+]]
-; TUNIT-NEXT:    store i32 1, ptr [[AGG_RESULT]], align 4, !tbaa [[TBAA12:![0-9]+]]
+; TUNIT-NEXT:    store float 0x40119999A0000000, ptr [[F37]], align 4, !tbaa [[FLOAT_TBAA11:![0-9]+]]
+; TUNIT-NEXT:    store i32 1, ptr [[AGG_RESULT]], align 4, !tbaa [[INT_TBAA12:![0-9]+]]
 ; TUNIT-NEXT:    [[I212:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 1
-; TUNIT-NEXT:    store i32 4, ptr [[I212]], align 4, !tbaa [[TBAA13:![0-9]+]]
+; TUNIT-NEXT:    store i32 4, ptr [[I212]], align 4, !tbaa [[INT_TBAA13:![0-9]+]]
 ; TUNIT-NEXT:    [[I316:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 2
-; TUNIT-NEXT:    store i32 4, ptr [[I316]], align 4, !tbaa [[TBAA14:![0-9]+]]
+; TUNIT-NEXT:    store i32 4, ptr [[I316]], align 4, !tbaa [[INT_TBAA14:![0-9]+]]
 ; TUNIT-NEXT:    call void @llvm.lifetime.end.p0(ptr noalias nofree noundef nonnull align 4 captures(none) dereferenceable(24) [[S]]) #[[ATTR17]]
 ; TUNIT-NEXT:    ret void
 ;
 ; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(argmem: readwrite)
-; CGSCC-LABEL: define {{[^@]+}}@local_alloca_simplifiable_1
-; CGSCC-SAME: (ptr noalias nofree noundef nonnull writeonly sret([[STRUCT_S:%.*]]) align 4 captures(none) dereferenceable(24) [[AGG_RESULT:%.*]]) #[[ATTR1:[0-9]+]] {
-; CGSCC-NEXT:  entry:
+; CGSCC-LABEL: define void @local_alloca_simplifiable_1(
+; CGSCC-SAME: ptr noalias nofree noundef nonnull writeonly sret([[STRUCT_S:%.*]]) align 4 captures(none) dereferenceable(24) [[AGG_RESULT:%.*]]) #[[ATTR1:[0-9]+]] {
+; CGSCC-NEXT:  [[ENTRY:.*:]]
 ; CGSCC-NEXT:    [[S:%.*]] = alloca [[STRUCT_S]], align 4
 ; CGSCC-NEXT:    call void @llvm.lifetime.start.p0(ptr noalias nofree noundef nonnull align 4 captures(none) dereferenceable(24) [[S]]) #[[ATTR20:[0-9]+]]
 ; CGSCC-NEXT:    [[F1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 3
-; CGSCC-NEXT:    store float 0x3FF19999A0000000, ptr [[F1]], align 4, !tbaa [[TBAA7:![0-9]+]]
+; CGSCC-NEXT:    store float 0x3FF19999A0000000, ptr [[F1]], align 4, !tbaa [[FLOAT_TBAA7:![0-9]+]]
 ; CGSCC-NEXT:    [[F2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 4
-; CGSCC-NEXT:    store float 0x40019999A0000000, ptr [[F2]], align 4, !tbaa [[TBAA10:![0-9]+]]
+; CGSCC-NEXT:    store float 0x40019999A0000000, ptr [[F2]], align 4, !tbaa [[FLOAT_TBAA10:![0-9]+]]
 ; CGSCC-NEXT:    [[F3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 5
-; CGSCC-NEXT:    store float 0x400A666660000000, ptr [[F3]], align 4, !tbaa [[TBAA11:![0-9]+]]
+; CGSCC-NEXT:    store float 0x400A666660000000, ptr [[F3]], align 4, !tbaa [[FLOAT_TBAA11:![0-9]+]]
 ; CGSCC-NEXT:    call void @write_arg(ptr noalias nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(24) [[S]], i32 noundef 1) #[[ATTR21:[0-9]+]]
 ; CGSCC-NEXT:    [[I2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 1
 ; CGSCC-NEXT:    call void @write_arg(ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(20) [[I2]], i32 noundef 2) #[[ATTR21]]
 ; CGSCC-NEXT:    [[I3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 2
 ; CGSCC-NEXT:    call void @write_arg(ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(16) [[I3]], i32 noundef 3) #[[ATTR21]]
 ; CGSCC-NEXT:    [[F11:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 3
-; CGSCC-NEXT:    [[I4:%.*]] = load float, ptr [[F11]], align 4, !tbaa [[TBAA7]]
+; CGSCC-NEXT:    [[I4:%.*]] = load float, ptr [[F11]], align 4, !tbaa [[FLOAT_TBAA7]]
 ; CGSCC-NEXT:    [[F12:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 3
-; CGSCC-NEXT:    store float [[I4]], ptr [[F12]], align 4, !tbaa [[TBAA7]]
+; CGSCC-NEXT:    store float [[I4]], ptr [[F12]], align 4, !tbaa [[FLOAT_TBAA7]]
 ; CGSCC-NEXT:    [[F23:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 4
-; CGSCC-NEXT:    [[I5:%.*]] = load float, ptr [[F23]], align 4, !tbaa [[TBAA10]]
+; CGSCC-NEXT:    [[I5:%.*]] = load float, ptr [[F23]], align 4, !tbaa [[FLOAT_TBAA10]]
 ; CGSCC-NEXT:    [[MUL:%.*]] = fmul float [[I5]], 2.000000e+00
 ; CGSCC-NEXT:    [[F24:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 4
-; CGSCC-NEXT:    store float [[MUL]], ptr [[F24]], align 4, !tbaa [[TBAA10]]
+; CGSCC-NEXT:    store float [[MUL]], ptr [[F24]], align 4, !tbaa [[FLOAT_TBAA10]]
 ; CGSCC-NEXT:    [[F35:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 5
-; CGSCC-NEXT:    [[I6:%.*]] = load float, ptr [[F35]], align 4, !tbaa [[TBAA11]]
+; CGSCC-NEXT:    [[I6:%.*]] = load float, ptr [[F35]], align 4, !tbaa [[FLOAT_TBAA11]]
 ; CGSCC-NEXT:    [[F16:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 3
-; CGSCC-NEXT:    [[I7:%.*]] = load float, ptr [[F16]], align 4, !tbaa [[TBAA7]]
+; CGSCC-NEXT:    [[I7:%.*]] = load float, ptr [[F16]], align 4, !tbaa [[FLOAT_TBAA7]]
 ; CGSCC-NEXT:    [[ADD:%.*]] = fadd float [[I6]], [[I7]]
 ; CGSCC-NEXT:    [[F37:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 5
-; CGSCC-NEXT:    store float [[ADD]], ptr [[F37]], align 4, !tbaa [[TBAA11]]
-; CGSCC-NEXT:    [[I8:%.*]] = load i32, ptr [[S]], align 4, !tbaa [[TBAA12:![0-9]+]]
-; CGSCC-NEXT:    store i32 [[I8]], ptr [[AGG_RESULT]], align 4, !tbaa [[TBAA12]]
+; CGSCC-NEXT:    store float [[ADD]], ptr [[F37]], align 4, !tbaa [[FLOAT_TBAA11]]
+; CGSCC-NEXT:    [[I8:%.*]] = load i32, ptr [[S]], align 4, !tbaa [[INT_TBAA12:![0-9]+]]
+; CGSCC-NEXT:    store i32 [[I8]], ptr [[AGG_RESULT]], align 4, !tbaa [[INT_TBAA12]]
 ; CGSCC-NEXT:    [[I210:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 1
-; CGSCC-NEXT:    [[I9:%.*]] = load i32, ptr [[I210]], align 4, !tbaa [[TBAA13:![0-9]+]]
+; CGSCC-NEXT:    [[I9:%.*]] = load i32, ptr [[I210]], align 4, !tbaa [[INT_TBAA13:![0-9]+]]
 ; CGSCC-NEXT:    [[MUL11:%.*]] = shl nsw i32 [[I9]], 1
 ; CGSCC-NEXT:    [[I212:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 1
-; CGSCC-NEXT:    store i32 [[MUL11]], ptr [[I212]], align 4, !tbaa [[TBAA13]]
+; CGSCC-NEXT:    store i32 [[MUL11]], ptr [[I212]], align 4, !tbaa [[INT_TBAA13]]
 ; CGSCC-NEXT:    [[I313:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 2
-; CGSCC-NEXT:    [[I10:%.*]] = load i32, ptr [[I313]], align 4, !tbaa [[TBAA14:![0-9]+]]
-; CGSCC-NEXT:    [[I11:%.*]] = load i32, ptr [[S]], align 4, !tbaa [[TBAA12]]
+; CGSCC-NEXT:    [[I10:%.*]] = load i32, ptr [[I313]], align 4, !tbaa [[INT_TBAA14:![0-9]+]]
+; CGSCC-NEXT:    [[I11:%.*]] = load i32, ptr [[S]], align 4, !tbaa [[INT_TBAA12]]
 ; CGSCC-NEXT:    [[ADD15:%.*]] = add nsw i32 [[I10]], [[I11]]
 ; CGSCC-NEXT:    [[I316:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 2
-; CGSCC-NEXT:    store i32 [[ADD15]], ptr [[I316]], align 4, !tbaa [[TBAA14]]
+; CGSCC-NEXT:    store i32 [[ADD15]], ptr [[I316]], align 4, !tbaa [[INT_TBAA14]]
 ; CGSCC-NEXT:    call void @llvm.lifetime.end.p0(ptr noalias nofree noundef nonnull align 4 captures(none) dereferenceable(24) [[S]]) #[[ATTR20]]
 ; CGSCC-NEXT:    ret void
 ;
@@ -256,156 +256,156 @@ declare void @llvm.lifetime.end.p0(ptr nocapture)
 ;
 define void @local_alloca_simplifiable_2() {
 ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn
-; TUNIT-LABEL: define {{[^@]+}}@local_alloca_simplifiable_2
-; TUNIT-SAME: () #[[ATTR3:[0-9]+]] {
-; TUNIT-NEXT:  entry:
+; TUNIT-LABEL: define void @local_alloca_simplifiable_2(
+; TUNIT-SAME: ) #[[ATTR3:[0-9]+]] {
+; TUNIT-NEXT:  [[ENTRY:.*]]:
 ; TUNIT-NEXT:    [[BYTES:%.*]] = alloca [1024 x i8], align 16
 ; TUNIT-NEXT:    call void @llvm.lifetime.start.p0(ptr noalias nofree noundef nonnull align 16 captures(none) dereferenceable(1024) [[BYTES]]) #[[ATTR17]]
-; TUNIT-NEXT:    br label [[FOR_COND:%.*]]
-; TUNIT:       for.cond:
-; TUNIT-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 0, [[ENTRY:%.*]] ]
+; TUNIT-NEXT:    br label %[[FOR_COND:.*]]
+; TUNIT:       [[FOR_COND]]:
+; TUNIT-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_INC:.*]] ], [ 0, %[[ENTRY]] ]
 ; TUNIT-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV]], 100
-; TUNIT-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY:%.*]], label [[FOR_COND_CLEANUP:%.*]]
-; TUNIT:       for.cond.cleanup:
-; TUNIT-NEXT:    br label [[FOR_END:%.*]]
-; TUNIT:       for.body:
+; TUNIT-NEXT:    br i1 [[EXITCOND]], label %[[FOR_BODY:.*]], label %[[FOR_COND_CLEANUP:.*]]
+; TUNIT:       [[FOR_COND_CLEANUP]]:
+; TUNIT-NEXT:    br label %[[FOR_END:.*]]
+; TUNIT:       [[FOR_BODY]]:
 ; TUNIT-NEXT:    [[I15:%.*]] = mul nuw nsw i64 [[INDVARS_IV]], 10
 ; TUNIT-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 [[I15]]
-; TUNIT-NEXT:    br label [[FOR_INC]]
-; TUNIT:       for.inc:
+; TUNIT-NEXT:    br label %[[FOR_INC]]
+; TUNIT:       [[FOR_INC]]:
 ; TUNIT-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; TUNIT-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]]
-; TUNIT:       for.end:
-; TUNIT-NEXT:    br label [[FOR_COND2:%.*]]
-; TUNIT:       for.cond2:
-; TUNIT-NEXT:    [[INDVARS_IV2:%.*]] = phi i64 [ [[INDVARS_IV_NEXT3:%.*]], [[FOR_INC9:%.*]] ], [ 0, [[FOR_END]] ]
+; TUNIT-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]]
+; TUNIT:       [[FOR_END]]:
+; TUNIT-NEXT:    br label %[[FOR_COND2:.*]]
+; TUNIT:       [[FOR_COND2]]:
+; TUNIT-NEXT:    [[INDVARS_IV2:%.*]] = phi i64 [ [[INDVARS_IV_NEXT3:%.*]], %[[FOR_INC9:.*]] ], [ 0, %[[FOR_END]] ]
 ; TUNIT-NEXT:    [[EXITCOND6:%.*]] = icmp ne i64 [[INDVARS_IV2]], 10
-; TUNIT-NEXT:    br i1 [[EXITCOND6]], label [[FOR_BODY5:%.*]], label [[FOR_COND_CLEANUP4:%.*]]
-; TUNIT:       for.cond.cleanup4:
-; TUNIT-NEXT:    br label [[FOR_END11:%.*]]
-; TUNIT:       for.body5:
+; TUNIT-NEXT:    br i1 [[EXITCOND6]], label %[[FOR_BODY5:.*]], label %[[FOR_COND_CLEANUP4:.*]]
+; TUNIT:       [[FOR_COND_CLEANUP4]]:
+; TUNIT-NEXT:    br label %[[FOR_END11:.*]]
+; TUNIT:       [[FOR_BODY5]]:
 ; TUNIT-NEXT:    [[I17:%.*]] = mul nuw nsw i64 [[INDVARS_IV2]], 10
 ; TUNIT-NEXT:    [[I18:%.*]] = or i64 [[I17]], 1
 ; TUNIT-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr [[BYTES]], i64 [[I18]]
-; TUNIT-NEXT:    br label [[FOR_INC9]]
-; TUNIT:       for.inc9:
+; TUNIT-NEXT:    br label %[[FOR_INC9]]
+; TUNIT:       [[FOR_INC9]]:
 ; TUNIT-NEXT:    [[INDVARS_IV_NEXT3]] = add nuw nsw i64 [[INDVARS_IV2]], 1
-; TUNIT-NEXT:    br label [[FOR_COND2]], !llvm.loop [[LOOP17:![0-9]+]]
-; TUNIT:       for.end11:
-; TUNIT-NEXT:    br label [[FOR_COND13:%.*]]
-; TUNIT:       for.cond13:
-; TUNIT-NEXT:    [[INDVARS_IV7:%.*]] = phi i64 [ [[INDVARS_IV_NEXT8:%.*]], [[FOR_INC22:%.*]] ], [ 0, [[FOR_END11]] ]
+; TUNIT-NEXT:    br label %[[FOR_COND2]], !llvm.loop [[LOOP17:![0-9]+]]
+; TUNIT:       [[FOR_END11]]:
+; TUNIT-NEXT:    br label %[[FOR_COND13:.*]]
+; TUNIT:       [[FOR_COND13]]:
+; TUNIT-NEXT:    [[INDVARS_IV7:%.*]] = phi i64 [ [[INDVARS_IV_NEXT8:%.*]], %[[FOR_INC22:.*]] ], [ 0, %[[FOR_END11]] ]
 ; TUNIT-NEXT:    [[EXITCOND11:%.*]] = icmp ne i64 [[INDVARS_IV7]], 20
-; TUNIT-NEXT:    br i1 [[EXITCOND11]], label [[FOR_BODY16:%.*]], label [[FOR_COND_CLEANUP15:%.*]]
-; TUNIT:       for.cond.cleanup15:
-; TUNIT-NEXT:    br label [[FOR_END24:%.*]]
-; TUNIT:       for.body16:
+; TUNIT-NEXT:    br i1 [[EXITCOND11]], label %[[FOR_BODY16:.*]], label %[[FOR_COND_CLEANUP15:.*]]
+; TUNIT:       [[FOR_COND_CLEANUP15]]:
+; TUNIT-NEXT:    br label %[[FOR_END24:.*]]
+; TUNIT:       [[FOR_BODY16]]:
 ; TUNIT-NEXT:    [[I20:%.*]] = mul nuw nsw i64 [[INDVARS_IV7]], 10
 ; TUNIT-NEXT:    [[I21:%.*]] = add nuw nsw i64 [[I20]], 2
 ; TUNIT-NEXT:    [[ARRAYIDX21:%.*]] = getelementptr inbounds i64, ptr [[BYTES]], i64 [[I21]]
-; TUNIT-NEXT:    br label [[FOR_INC22]]
-; TUNIT:       for.inc22:
+; TUNIT-NEXT:    br label %[[FOR_INC22]]
+; TUNIT:       [[FOR_INC22]]:
 ; TUNIT-NEXT:    [[INDVARS_IV_NEXT8]] = add nuw nsw i64 [[INDVARS_IV7]], 1
-; TUNIT-NEXT:    br label [[FOR_COND13]], !llvm.loop [[LOOP18:![0-9]+]]
-; TUNIT:       for.end24:
+; TUNIT-NEXT:    br label %[[FOR_COND13]], !llvm.loop [[LOOP18:![0-9]+]]
+; TUNIT:       [[FOR_END24]]:
 ; TUNIT-NEXT:    [[ARRAYIDX25:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 1023
 ; TUNIT-NEXT:    [[ARRAYIDX26:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 500
 ; TUNIT-NEXT:    call void @write_arg(ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(524) [[ARRAYIDX26]], i32 noundef 0) #[[ATTR18]]
-; TUNIT-NEXT:    br label [[FOR_COND28:%.*]]
-; TUNIT:       for.cond28:
-; TUNIT-NEXT:    [[INDVARS_IV12:%.*]] = phi i64 [ [[INDVARS_IV_NEXT13:%.*]], [[FOR_INC36:%.*]] ], [ 0, [[FOR_END24]] ]
+; TUNIT-NEXT:    br label %[[FOR_COND28:.*]]
+; TUNIT:       [[FOR_COND28]]:
+; TUNIT-NEXT:    [[INDVARS_IV12:%.*]] = phi i64 [ [[INDVARS_IV_NEXT13:%.*]], %[[FOR_INC36:.*]] ], [ 0, %[[FOR_END24]] ]
 ; TUNIT-NEXT:    [[EXITCOND14:%.*]] = icmp ne i64 [[INDVARS_IV12]], 1024
-; TUNIT-NEXT:    br i1 [[EXITCOND14]], label [[FOR_BODY31:%.*]], label [[FOR_COND_CLEANUP30:%.*]]
-; TUNIT:       for.cond.cleanup30:
-; TUNIT-NEXT:    br label [[FOR_END38:%.*]]
-; TUNIT:       for.body31:
+; TUNIT-NEXT:    br i1 [[EXITCOND14]], label %[[FOR_BODY31:.*]], label %[[FOR_COND_CLEANUP30:.*]]
+; TUNIT:       [[FOR_COND_CLEANUP30]]:
+; TUNIT-NEXT:    br label %[[FOR_END38:.*]]
+; TUNIT:       [[FOR_BODY31]]:
 ; TUNIT-NEXT:    [[ARRAYIDX35:%.*]] = getelementptr inbounds [1024 x i8], ptr @globalBytes, i64 0, i64 [[INDVARS_IV12]]
-; TUNIT-NEXT:    store i8 0, ptr [[ARRAYIDX35]], align 1, !tbaa [[TBAA19:![0-9]+]]
-; TUNIT-NEXT:    br label [[FOR_INC36]]
-; TUNIT:       for.inc36:
+; TUNIT-NEXT:    store i8 0, ptr [[ARRAYIDX35]], align 1, !tbaa [[CHAR_TBAA19:![0-9]+]]
+; TUNIT-NEXT:    br label %[[FOR_INC36]]
+; TUNIT:       [[FOR_INC36]]:
 ; TUNIT-NEXT:    [[INDVARS_IV_NEXT13]] = add nuw nsw i64 [[INDVARS_IV12]], 1
-; TUNIT-NEXT:    br label [[FOR_COND28]], !llvm.loop [[LOOP20:![0-9]+]]
-; TUNIT:       for.end38:
+; TUNIT-NEXT:    br label %[[FOR_COND28]], !llvm.loop [[LOOP20:![0-9]+]]
+; TUNIT:       [[FOR_END38]]:
 ; TUNIT-NEXT:    call void @llvm.lifetime.end.p0(ptr noalias nofree noundef nonnull align 16 captures(none) dereferenceable(1024) [[BYTES]]) #[[ATTR17]]
 ; TUNIT-NEXT:    ret void
 ;
 ; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn
-; CGSCC-LABEL: define {{[^@]+}}@local_alloca_simplifiable_2
-; CGSCC-SAME: () #[[ATTR3:[0-9]+]] {
-; CGSCC-NEXT:  entry:
+; CGSCC-LABEL: define void @local_alloca_simplifiable_2(
+; CGSCC-SAME: ) #[[ATTR3:[0-9]+]] {
+; CGSCC-NEXT:  [[ENTRY:.*]]:
 ; CGSCC-NEXT:    [[BYTES:%.*]] = alloca [1024 x i8], align 16
 ; CGSCC-NEXT:    call void @llvm.lifetime.start.p0(ptr noalias nofree noundef nonnull align 16 captures(none) dereferenceable(1024) [[BYTES]]) #[[ATTR20]]
-; CGSCC-NEXT:    br label [[FOR_COND:%.*]]
-; CGSCC:       for.cond:
-; CGSCC-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 0, [[ENTRY:%.*]] ]
+; CGSCC-NEXT:    br label %[[FOR_COND:.*]]
+; CGSCC:       [[FOR_COND]]:
+; CGSCC-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_INC:.*]] ], [ 0, %[[ENTRY]] ]
 ; CGSCC-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV]], 100
-; CGSCC-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY:%.*]], label [[FOR_COND_CLEANUP:%.*]]
-; CGSCC:       for.cond.cleanup:
-; CGSCC-NEXT:    br label [[FOR_END:%.*]]
-; CGSCC:       for.body:
+; CGSCC-NEXT:    br i1 [[EXITCOND]], label %[[FOR_BODY:.*]], label %[[FOR_COND_CLEANUP:.*]]
+; CGSCC:       [[FOR_COND_CLEANUP]]:
+; CGSCC-NEXT:    br label %[[FOR_END:.*]]
+; CGSCC:       [[FOR_BODY]]:
 ; CGSCC-NEXT:    [[I15:%.*]] = mul nuw nsw i64 [[INDVARS_IV]], 10
 ; CGSCC-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 [[I15]]
-; CGSCC-NEXT:    store i8 0, ptr [[ARRAYIDX]], align 2, !tbaa [[TBAA15:![0-9]+]]
-; CGSCC-NEXT:    br label [[FOR_INC]]
-; CGSCC:       for.inc:
+; CGSCC-NEXT:    store i8 0, ptr [[ARRAYIDX]], align 2, !tbaa [[CHAR_TBAA15:![0-9]+]]
+; CGSCC-NEXT:    br label %[[FOR_INC]]
+; CGSCC:       [[FOR_INC]]:
 ; CGSCC-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; CGSCC-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
-; CGSCC:       for.end:
-; CGSCC-NEXT:    br label [[FOR_COND2:%.*]]
-; CGSCC:       for.cond2:
-; CGSCC-NEXT:    [[INDVARS_IV2:%.*]] = phi i64 [ [[INDVARS_IV_NEXT3:%.*]], [[FOR_INC9:%.*]] ], [ 0, [[FOR_END]] ]
+; CGSCC-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
+; CGSCC:       [[FOR_END]]:
+; CGSCC-NEXT:    br label %[[FOR_COND2:.*]]
+; CGSCC:       [[FOR_COND2]]:
+; CGSCC-NEXT:    [[INDVARS_IV2:%.*]] = phi i64 [ [[INDVARS_IV_NEXT3:%.*]], %[[FOR_INC9:.*]] ], [ 0, %[[FOR_END]] ]
 ; CGSCC-NEXT:    [[EXITCOND6:%.*]] = icmp ne i64 [[INDVARS_IV2]], 10
-; CGSCC-NEXT:    br i1 [[EXITCOND6]], label [[FOR_BODY5:%.*]], label [[FOR_COND_CLEANUP4:%.*]]
-; CGSCC:       for.cond.cleanup4:
-; CGSCC-NEXT:    br label [[FOR_END11:%.*]]
-; CGSCC:       for.body5:
+; CGSCC-NEXT:    br i1 [[EXITCOND6]], label %[[FOR_BODY5:.*]], label %[[FOR_COND_CLEANUP4:.*]]
+; CGSCC:       [[FOR_COND_CLEANUP4]]:
+; CGSCC-NEXT:    br label %[[FOR_END11:.*]]
+; CGSCC:       [[FOR_BODY5]]:
 ; CGSCC-NEXT:    [[I17:%.*]] = mul nuw nsw i64 [[INDVARS_IV2]], 10
 ; CGSCC-NEXT:    [[I18:%.*]] = or i64 [[I17]], 1
 ; CGSCC-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr [[BYTES]], i64 [[I18]]
-; CGSCC-NEXT:    store float 0.000000e+00, ptr [[ARRAYIDX8]], align 4, !tbaa [[TBAA18:![0-9]+]]
-; CGSCC-NEXT:    br label [[FOR_INC9]]
-; CGSCC:       for.inc9:
+; CGSCC-NEXT:    store float 0.000000e+00, ptr [[ARRAYIDX8]], align 4, !tbaa [[FLOAT_TBAA18:![0-9]+]]
+; CGSCC-NEXT:    br label %[[FOR_INC9]]
+; CGSCC:       [[FOR_INC9]]:
 ; CGSCC-NEXT:    [[INDVARS_IV_NEXT3]] = add nuw nsw i64 [[INDVARS_IV2]], 1
-; CGSCC-NEXT:    br label [[FOR_COND2]], !llvm.loop [[LOOP19:![0-9]+]]
-; CGSCC:       for.end11:
-; CGSCC-NEXT:    br label [[FOR_COND13:%.*]]
-; CGSCC:       for.cond13:
-; CGSCC-NEXT:    [[INDVARS_IV7:%.*]] = phi i64 [ [[INDVARS_IV_NEXT8:%.*]], [[FOR_INC22:%.*]] ], [ 0, [[FOR_END11]] ]
+; CGSCC-NEXT:    br label %[[FOR_COND2]], !llvm.loop [[LOOP19:![0-9]+]]
+; CGSCC:       [[FOR_END11]]:
+; CGSCC-NEXT:    br label %[[FOR_COND13:.*]]
+; CGSCC:       [[FOR_COND13]]:
+; CGSCC-NEXT:    [[INDVARS_IV7:%.*]] = phi i64 [ [[INDVARS_IV_NEXT8:%.*]], %[[FOR_INC22:.*]] ], [ 0, %[[FOR_END11]] ]
 ; CGSCC-NEXT:    [[EXITCOND11:%.*]] = icmp ne i64 [[INDVARS_IV7]], 20
-; CGSCC-NEXT:    br i1 [[EXITCOND11]], label [[FOR_BODY16:%.*]], label [[FOR_COND_CLEANUP15:%.*]]
-; CGSCC:       for.cond.cleanup15:
-; CGSCC-NEXT:    br label [[FOR_END24:%.*]]
-; CGSCC:       for.body16:
+; CGSCC-NEXT:    br i1 [[EXITCOND11]], label %[[FOR_BODY16:.*]], label %[[FOR_COND_CLEANUP15:.*]]
+; CGSCC:       [[FOR_COND_CLEANUP15]]:
+; CGSCC-NEXT:    br label %[[FOR_END24:.*]]
+; CGSCC:       [[FOR_BODY16]]:
 ; CGSCC-NEXT:    [[I20:%.*]] = mul nuw nsw i64 [[INDVARS_IV7]], 10
 ; CGSCC-NEXT:    [[I21:%.*]] = add nuw nsw i64 [[I20]], 2
 ; CGSCC-NEXT:    [[ARRAYIDX21:%.*]] = getelementptr inbounds i64, ptr [[BYTES]], i64 [[I21]]
-; CGSCC-NEXT:    store i64 0, ptr [[ARRAYIDX21]], align 16, !tbaa [[TBAA20:![0-9]+]]
-; CGSCC-NEXT:    br label [[FOR_INC22]]
-; CGSCC:       for.inc22:
+; CGSCC-NEXT:    store i64 0, ptr [[ARRAYIDX21]], align 16, !tbaa [[LONG_LONG_TBAA20:![0-9]+]]
+; CGSCC-NEXT:    br label %[[FOR_INC22]]
+; CGSCC:       [[FOR_INC22]]:
 ; CGSCC-NEXT:    [[INDVARS_IV_NEXT8]] = add nuw nsw i64 [[INDVARS_IV7]], 1
-; CGSCC-NEXT:    br label [[FOR_COND13]], !llvm.loop [[LOOP22:![0-9]+]]
-; CGSCC:       for.end24:
+; CGSCC-NEXT:    br label %[[FOR_COND13]], !llvm.loop [[LOOP22:![0-9]+]]
+; CGSCC:       [[FOR_END24]]:
 ; CGSCC-NEXT:    [[ARRAYIDX25:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 1023
-; CGSCC-NEXT:    store i8 0, ptr [[ARRAYIDX25]], align 1, !tbaa [[TBAA15]]
+; CGSCC-NEXT:    store i8 0, ptr [[ARRAYIDX25]], align 1, !tbaa [[CHAR_TBAA15]]
 ; CGSCC-NEXT:    [[ARRAYIDX26:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 500
 ; CGSCC-NEXT:    call void @write_arg(ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(524) [[ARRAYIDX26]], i32 noundef 0) #[[ATTR21]]
-; CGSCC-NEXT:    br label [[FOR_COND28:%.*]]
-; CGSCC:       for.cond28:
-; CGSCC-NEXT:    [[INDVARS_IV12:%.*]] = phi i64 [ [[INDVARS_IV_NEXT13:%.*]], [[FOR_INC36:%.*]] ], [ 0, [[FOR_END24]] ]
+; CGSCC-NEXT:    br label %[[FOR_COND28:.*]]
+; CGSCC:       [[FOR_COND28]]:
+; CGSCC-NEXT:    [[INDVARS_IV12:%.*]] = phi i64 [ [[INDVARS_IV_NEXT13:%.*]], %[[FOR_INC36:.*]] ], [ 0, %[[FOR_END24]] ]
 ; CGSCC-NEXT:    [[EXITCOND14:%.*]] = icmp ne i64 [[INDVARS_IV12]], 1024
-; CGSCC-NEXT:    br i1 [[EXITCOND14]], label [[FOR_BODY31:%.*]], label [[FOR_COND_CLEANUP30:%.*]]
-; CGSCC:       for.cond.cleanup30:
-; CGSCC-NEXT:    br label [[FOR_END38:%.*]]
-; CGSCC:       for.body31:
+; CGSCC-NEXT:    br i1 [[EXITCOND14]], label %[[FOR_BODY31:.*]], label %[[FOR_COND_CLEANUP30:.*]]
+; CGSCC:       [[FOR_COND_CLEANUP30]]:
+; CGSCC-NEXT:    br label %[[FOR_END38:.*]]
+; CGSCC:       [[FOR_BODY31]]:
 ; CGSCC-NEXT:    [[ARRAYIDX33:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 [[INDVARS_IV12]]
-; CGSCC-NEXT:    [[I23:%.*]] = load i8, ptr [[ARRAYIDX33]], align 1, !tbaa [[TBAA15]]
+; CGSCC-NEXT:    [[I23:%.*]] = load i8, ptr [[ARRAYIDX33]], align 1, !tbaa [[CHAR_TBAA15]]
 ; CGSCC-NEXT:    [[ARRAYIDX35:%.*]] = getelementptr inbounds [1024 x i8], ptr @globalBytes, i64 0, i64 [[INDVARS_IV12]]
-; CGSCC-NEXT:    store i8 [[I23]], ptr [[ARRAYIDX35]], align 1, !tbaa [[TBAA15]]
-; CGSCC-NEXT:    br label [[FOR_INC36]]
-; CGSCC:       for.inc36:
+; CGSCC-NEXT:    store i8 [[I23]], ptr [[ARRAYIDX35]], align 1, !tbaa [[CHAR_TBAA15]]
+; CGSCC-NEXT:    br label %[[FOR_INC36]]
+; CGSCC:       [[FOR_INC36]]:
 ; CGSCC-NEXT:    [[INDVARS_IV_NEXT13]] = add nuw nsw i64 [[INDVARS_IV12]], 1
-; CGSCC-NEXT:    br label [[FOR_COND28]], !llvm.loop [[LOOP23:![0-9]+]]
-; CGSCC:       for.end38:
+; CGSCC-NEXT:    br label %[[FOR_COND28]], !llvm.loop [[LOOP23:![0-9]+]]
+; CGSCC:       [[FOR_END38]]:
 ; CGSCC-NEXT:    call void @llvm.lifetime.end.p0(ptr noalias nofree noundef nonnull align 16 captures(none) dereferenceable(1024) [[BYTES]]) #[[ATTR20]]
 ; CGSCC-NEXT:    ret void
 ;
@@ -516,10 +516,10 @@ for.end38:                                        ; preds = %for.cond.cleanup30
 ;
 define i32 @local_alloca_simplifiable_3() {
 ; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
-; CHECK-LABEL: define {{[^@]+}}@local_alloca_simplifiable_3
-; CHECK-SAME: () #[[ATTR4:[0-9]+]] {
-; CHECK-NEXT:    br label [[SPLIT:%.*]]
-; CHECK:       split:
+; CHECK-LABEL: define noundef i32 @local_alloca_simplifiable_3(
+; CHECK-SAME: ) #[[ATTR4:[0-9]+]] {
+; CHECK-NEXT:    br label %[[SPLIT:.*]]
+; CHECK:       [[SPLIT]]:
 ; CHECK-NEXT:    ret i32 2
 ;
   %A = alloca i32, align 4
@@ -537,8 +537,8 @@ split:
 ;
 define i32 @local_alloca_simplifiable_4() {
 ; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
-; CHECK-LABEL: define {{[^@]+}}@local_alloca_simplifiable_4
-; CHECK-SAME: () #[[ATTR4]] {
+; CHECK-LABEL: define i32 @local_alloca_simplifiable_4(
+; CHECK-SAME: ) #[[ATTR4]] {
 ; CHECK-NEXT:    ret i32 undef
 ;
   %A = alloca i32, align 4
@@ -554,34 +554,34 @@ define i32 @local_alloca_simplifiable_4() {
 ;    }
 define i32 @multi_obj_simplifiable_1(i32 %cnd) {
 ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn
-; TUNIT-LABEL: define {{[^@]+}}@multi_obj_simplifiable_1
-; TUNIT-SAME: (i32 [[CND:%.*]]) #[[ATTR3]] {
-; TUNIT-NEXT:  entry:
+; TUNIT-LABEL: define noundef i32 @multi_obj_simplifiable_1(
+; TUNIT-SAME: i32 [[CND:%.*]]) #[[ATTR3]] {
+; TUNIT-NEXT:  [[ENTRY:.*:]]
 ; TUNIT-NEXT:    [[L:%.*]] = alloca i32, align 4
 ; TUNIT-NEXT:    call void @llvm.lifetime.start.p0(ptr noalias nofree noundef nonnull align 4 captures(none) dereferenceable(4) [[L]]) #[[ATTR17]]
 ; TUNIT-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[CND]], 0
-; TUNIT-NEXT:    br i1 [[TOBOOL_NOT]], label [[COND_FALSE:%.*]], label [[COND_TRUE:%.*]]
-; TUNIT:       cond.true:
-; TUNIT-NEXT:    br label [[COND_END:%.*]]
-; TUNIT:       cond.false:
-; TUNIT-NEXT:    br label [[COND_END]]
-; TUNIT:       cond.end:
+; TUNIT-NEXT:    br i1 [[TOBOOL_NOT]], label %[[COND_FALSE:.*]], label %[[COND_TRUE:.*]]
+; TUNIT:       [[COND_TRUE]]:
+; TUNIT-NEXT:    br label %[[COND_END:.*]]
+; TUNIT:       [[COND_FALSE]]:
+; TUNIT-NEXT:    br label %[[COND_END]]
+; TUNIT:       [[COND_END]]:
 ; TUNIT-NEXT:    call void @llvm.lifetime.end.p0(ptr noalias nofree noundef nonnull align 4 captures(none) dereferenceable(4) [[L]]) #[[ATTR17]]
 ; TUNIT-NEXT:    ret i32 5
 ;
 ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn
-; CGSCC-LABEL: define {{[^@]+}}@multi_obj_simplifiable_1
-; CGSCC-SAME: (i32 [[CND:%.*]]) #[[ATTR5:[0-9]+]] {
-; CGSCC-NEXT:  entry:
+; CGSCC-LABEL: define noundef i32 @multi_obj_simplifiable_1(
+; CGSCC-SAME: i32 [[CND:%.*]]) #[[ATTR5:[0-9]+]] {
+; CGSCC-NEXT:  [[ENTRY:.*:]]
 ; CGSCC-NEXT:    [[L:%.*]] = alloca i32, align 4
 ; CGSCC-NEXT:    call void @llvm.lifetime.start.p0(ptr noalias nofree noundef nonnull align 4 captures(none) dereferenceable(4) [[L]]) #[[ATTR20]]
 ; CGSCC-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[CND]], 0
-; CGSCC-NEXT:    br i1 [[TOBOOL_NOT]], label [[COND_FALSE:%.*]], label [[COND_TRUE:%.*]]
-; CGSCC:       cond.true:
-; CGSCC-NEXT:    br label [[COND_END:%.*]]
-; CGSCC:       cond.false:
-; CGSCC-NEXT:    br label [[COND_END]]
-; CGSCC:       cond.end:
+; CGSCC-NEXT:    br i1 [[TOBOOL_NOT]], label %[[COND_FALSE:.*]], label %[[COND_TRUE:.*]]
+; CGSCC:       [[COND_TRUE]]:
+; CGSCC-NEXT:    br label %[[COND_END:.*]]
+; CGSCC:       [[COND_FALSE]]:
+; CGSCC-NEXT:    br label %[[COND_END]]
+; CGSCC:       [[COND_END]]:
 ; CGSCC-NEXT:    call void @llvm.lifetime.end.p0(ptr noalias nofree noundef nonnull align 4 captures(none) dereferenceable(4) [[L]]) #[[ATTR20]]
 ; CGSCC-NEXT:    ret i32 5
 ;
@@ -616,34 +616,34 @@ cond.end:                                         ; preds = %cond.false, %cond.t
 ;
 define i32 @multi_obj_simplifiable_2(i32 %cnd) {
 ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn
-; TUNIT-LABEL: define {{[^@]+}}@multi_obj_simplifiable_2
-; TUNIT-SAME: (i32 [[CND:%.*]]) #[[ATTR3]] {
-; TUNIT-NEXT:  entry:
+; TUNIT-LABEL: define i32 @multi_obj_simplifiable_2(
+; TUNIT-SAME: i32 [[CND:%.*]]) #[[ATTR3]] {
+; TUNIT-NEXT:  [[ENTRY:.*:]]
 ; TUNIT-NEXT:    [[L:%.*]] = alloca i32, align 4
 ; TUNIT-NEXT:    call void @llvm.lifetime.start.p0(ptr noalias nofree noundef nonnull align 4 captures(none) dereferenceable(4) [[L]]) #[[ATTR17]]
 ; TUNIT-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[CND]], 0
-; TUNIT-NEXT:    br i1 [[TOBOOL_NOT]], label [[COND_FALSE:%.*]], label [[COND_TRUE:%.*]]
-; TUNIT:       cond.true:
-; TUNIT-NEXT:    br label [[COND_END:%.*]]
-; TUNIT:       cond.false:
-; TUNIT-NEXT:    br label [[COND_END]]
-; TUNIT:       cond.end:
+; TUNIT-NEXT:    br i1 [[TOBOOL_NOT]], label %[[COND_FALSE:.*]], label %[[COND_TRUE:.*]]
+; TUNIT:       [[COND_TRUE]]:
+; TUNIT-NEXT:    br label %[[COND_END:.*]]
+; TUNIT:       [[COND_FALSE]]:
+; TUNIT-NEXT:    br label %[[COND_END]]
+; TUNIT:       [[COND_END]]:
 ; TUNIT-NEXT:    call void @llvm.lifetime.end.p0(ptr noalias nofree noundef nonnull align 4 captures(none) dereferenceable(4) [[L]]) #[[ATTR17]]
 ; TUNIT-NEXT:    ret i32 5
 ;
 ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn
-; CGSCC-LABEL: define {{[^@]+}}@multi_obj_simplifiable_2
-; CGSCC-SAME: (i32 [[CND:%.*]]) #[[ATTR5]] {
-; CGSCC-NEXT:  entry:
+; CGSCC-LABEL: define i32 @multi_obj_simplifiable_2(
+; CGSCC-SAME: i32 [[CND:%.*]]) #[[ATTR5]] {
+; CGSCC-NEXT:  [[ENTRY:.*:]]
 ; CGSCC-NEXT:    [[L:%.*]] = alloca i32, align 4
 ; CGSCC-NEXT:    call void @llvm.lifetime.start.p0(ptr noalias nofree noundef nonnull align 4 captures(none) dereferenceable(4) [[L]]) #[[ATTR20]]
 ; CGSCC-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[CND]], 0
-; CGSCC-NEXT:    br i1 [[TOBOOL_NOT]], label [[COND_FALSE:%.*]], label [[COND_TRUE:%.*]]
-; CGSCC:       cond.true:
-; CGSCC-NEXT:    br label [[COND_END:%.*]]
-; CGSCC:       cond.false:
-; CGSCC-NEXT:    br label [[COND_END]]
-; CGSCC:       cond.end:
+; CGSCC-NEXT:    br i1 [[TOBOOL_NOT]], label %[[COND_FALSE:.*]], label %[[COND_TRUE:.*]]
+; CGSCC:       [[COND_TRUE]]:
+; CGSCC-NEXT:    br label %[[COND_END:.*]]
+; CGSCC:       [[COND_FALSE]]:
+; CGSCC-NEXT:    br label %[[COND_END]]
+; CGSCC:       [[COND_END]]:
 ; CGSCC-NEXT:    call void @llvm.lifetime.end.p0(ptr noalias nofree noundef nonnull align 4 captures(none) dereferenceable(4) [[L]]) #[[ATTR20]]
 ; CGSCC-NEXT:    ret i32 5
 ;
@@ -687,58 +687,58 @@ cond.end:                                         ; preds = %cond.false, %cond.t
 ;
 define void @static_global_simplifiable_1(ptr noalias sret(%struct.S) align 4 %agg.result) {
 ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write)
-; TUNIT-LABEL: define {{[^@]+}}@static_global_simplifiable_1
-; TUNIT-SAME: (ptr noalias nofree writeonly sret([[STRUCT_S:%.*]]) align 4 captures(none) dereferenceable_or_null(24) [[AGG_RESULT:%.*]]) #[[ATTR5:[0-9]+]] {
-; TUNIT-NEXT:  entry:
+; TUNIT-LABEL: define void @static_global_simplifiable_1(
+; TUNIT-SAME: ptr noalias nofree writeonly sret([[STRUCT_S:%.*]]) align 4 captures(none) dereferenceable_or_null(24) [[AGG_RESULT:%.*]]) #[[ATTR5:[0-9]+]] {
+; TUNIT-NEXT:  [[ENTRY:.*:]]
 ; TUNIT-NEXT:    call void @write_arg(ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(24) @Gs1, i32 noundef 1) #[[ATTR18]]
 ; TUNIT-NEXT:    call void @write_arg(ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(20) getelementptr inbounds ([[STRUCT_S]], ptr @Gs1, i64 0, i32 1), i32 noundef 2) #[[ATTR18]]
 ; TUNIT-NEXT:    call void @write_arg(ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(16) getelementptr inbounds ([[STRUCT_S]], ptr @Gs1, i64 0, i32 2), i32 noundef 3) #[[ATTR18]]
 ; TUNIT-NEXT:    [[F1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 3
-; TUNIT-NEXT:    store float 0x3FF19999A0000000, ptr [[F1]], align 4, !tbaa [[TBAA7]]
+; TUNIT-NEXT:    store float 0x3FF19999A0000000, ptr [[F1]], align 4, !tbaa [[FLOAT_TBAA7]]
 ; TUNIT-NEXT:    [[F2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 4
-; TUNIT-NEXT:    store float 0x40119999A0000000, ptr [[F2]], align 4, !tbaa [[TBAA10]]
+; TUNIT-NEXT:    store float 0x40119999A0000000, ptr [[F2]], align 4, !tbaa [[FLOAT_TBAA10]]
 ; TUNIT-NEXT:    [[F3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 5
-; TUNIT-NEXT:    store float 0x40119999A0000000, ptr [[F3]], align 4, !tbaa [[TBAA11]]
-; TUNIT-NEXT:    store i32 1, ptr [[AGG_RESULT]], align 4, !tbaa [[TBAA12]]
+; TUNIT-NEXT:    store float 0x40119999A0000000, ptr [[F3]], align 4, !tbaa [[FLOAT_TBAA11]]
+; TUNIT-NEXT:    store i32 1, ptr [[AGG_RESULT]], align 4, !tbaa [[INT_TBAA12]]
 ; TUNIT-NEXT:    [[I2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 1
-; TUNIT-NEXT:    store i32 4, ptr [[I2]], align 4, !tbaa [[TBAA13]]
+; TUNIT-NEXT:    store i32 4, ptr [[I2]], align 4, !tbaa [[INT_TBAA13]]
 ; TUNIT-NEXT:    [[I3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 2
-; TUNIT-NEXT:    store i32 4, ptr [[I3]], align 4, !tbaa [[TBAA14]]
+; TUNIT-NEXT:    store i32 4, ptr [[I3]], align 4, !tbaa [[INT_TBAA14]]
 ; TUNIT-NEXT:    ret void
 ;
 ; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn
-; CGSCC-LABEL: define {{[^@]+}}@static_global_simplifiable_1
-; CGSCC-SAME: (ptr noalias nofree noundef nonnull writeonly sret([[STRUCT_S:%.*]]) align 4 captures(none) dereferenceable(24) [[AGG_RESULT:%.*]]) #[[ATTR3]] {
-; CGSCC-NEXT:  entry:
-; CGSCC-NEXT:    store float 0x3FF19999A0000000, ptr getelementptr inbounds ([[STRUCT_S]], ptr @Gs1, i64 0, i32 3), align 4, !tbaa [[TBAA7]]
-; CGSCC-NEXT:    store float 0x40019999A0000000, ptr getelementptr inbounds ([[STRUCT_S]], ptr @Gs1, i64 0, i32 4), align 4, !tbaa [[TBAA10]]
-; CGSCC-NEXT:    store float 0x400A666660000000, ptr getelementptr inbounds ([[STRUCT_S]], ptr @Gs1, i64 0, i32 5), align 4, !tbaa [[TBAA11]]
+; CGSCC-LABEL: define void @static_global_simplifiable_1(
+; CGSCC-SAME: ptr noalias nofree noundef nonnull writeonly sret([[STRUCT_S:%.*]]) align 4 captures(none) dereferenceable(24) [[AGG_RESULT:%.*]]) #[[ATTR3]] {
+; CGSCC-NEXT:  [[ENTRY:.*:]]
+; CGSCC-NEXT:    store float 0x3FF19999A0000000, ptr getelementptr inbounds ([[STRUCT_S]], ptr @Gs1, i64 0, i32 3), align 4, !tbaa [[FLOAT_TBAA7]]
+; CGSCC-NEXT:    store float 0x40019999A0000000, ptr getelementptr inbounds ([[STRUCT_S]], ptr @Gs1, i64 0, i32 4), align 4, !tbaa [[FLOAT_TBAA10]]
+; CGSCC-NEXT:    store float 0x400A666660000000, ptr getelementptr inbounds ([[STRUCT_S]], ptr @Gs1, i64 0, i32 5), align 4, !tbaa [[FLOAT_TBAA11]]
 ; CGSCC-NEXT:    call void @write_arg(ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(24) @Gs1, i32 noundef 1) #[[ATTR21]]
 ; CGSCC-NEXT:    call void @write_arg(ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(20) getelementptr inbounds ([[STRUCT_S]], ptr @Gs1, i64 0, i32 1), i32 noundef 2) #[[ATTR21]]
 ; CGSCC-NEXT:    call void @write_arg(ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(16) getelementptr inbounds ([[STRUCT_S]], ptr @Gs1, i64 0, i32 2), i32 noundef 3) #[[ATTR21]]
-; CGSCC-NEXT:    [[I:%.*]] = load float, ptr getelementptr inbounds ([[STRUCT_S]], ptr @Gs1, i64 0, i32 3), align 4, !tbaa [[TBAA7]]
+; CGSCC-NEXT:    [[I:%.*]] = load float, ptr getelementptr inbounds ([[STRUCT_S]], ptr @Gs1, i64 0, i32 3), align 4, !tbaa [[FLOAT_TBAA7]]
 ; CGSCC-NEXT:    [[F1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 3
-; CGSCC-NEXT:    store float [[I]], ptr [[F1]], align 4, !tbaa [[TBAA7]]
-; CGSCC-NEXT:    [[I4:%.*]] = load float, ptr getelementptr inbounds ([[STRUCT_S]], ptr @Gs1, i64 0, i32 4), align 4, !tbaa [[TBAA10]]
+; CGSCC-NEXT:    store float [[I]], ptr [[F1]], align 4, !tbaa [[FLOAT_TBAA7]]
+; CGSCC-NEXT:    [[I4:%.*]] = load float, ptr getelementptr inbounds ([[STRUCT_S]], ptr @Gs1, i64 0, i32 4), align 4, !tbaa [[FLOAT_TBAA10]]
 ; CGSCC-NEXT:    [[MUL:%.*]] = fmul float [[I4]], 2.000000e+00
 ; CGSCC-NEXT:    [[F2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 4
-; CGSCC-NEXT:    store float [[MUL]], ptr [[F2]], align 4, !tbaa [[TBAA10]]
-; CGSCC-NEXT:    [[I5:%.*]] = load float, ptr getelementptr inbounds ([[STRUCT_S]], ptr @Gs1, i64 0, i32 5), align 4, !tbaa [[TBAA11]]
-; CGSCC-NEXT:    [[I6:%.*]] = load float, ptr getelementptr inbounds ([[STRUCT_S]], ptr @Gs1, i64 0, i32 3), align 4, !tbaa [[TBAA7]]
+; CGSCC-NEXT:    store float [[MUL]], ptr [[F2]], align 4, !tbaa [[FLOAT_TBAA10]]
+; CGSCC-NEXT:    [[I5:%.*]] = load float, ptr getelementptr inbounds ([[STRUCT_S]], ptr @Gs1, i64 0, i32 5), align 4, !tbaa [[FLOAT_TBAA11]]
+; CGSCC-NEXT:    [[I6:%.*]] = load float, ptr getelementptr inbounds ([[STRUCT_S]], ptr @Gs1, i64 0, i32 3), align 4, !tbaa [[FLOAT_TBAA7]]
 ; CGSCC-NEXT:    [[ADD:%.*]] = fadd float [[I5]], [[I6]]
 ; CGSCC-NEXT:    [[F3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 5
-; CGSCC-NEXT:    store float [[ADD]], ptr [[F3]], align 4, !tbaa [[TBAA11]]
-; CGSCC-NEXT:    [[I7:%.*]] = load i32, ptr @Gs1, align 4, !tbaa [[TBAA12]]
-; CGSCC-NEXT:    store i32 [[I7]], ptr [[AGG_RESULT]], align 4, !tbaa [[TBAA12]]
-; CGSCC-NEXT:    [[I8:%.*]] = load i32, ptr getelementptr inbounds ([[STRUCT_S]], ptr @Gs1, i64 0, i32 1), align 4, !tbaa [[TBAA13]]
+; CGSCC-NEXT:    store float [[ADD]], ptr [[F3]], align 4, !tbaa [[FLOAT_TBAA11]]
+; CGSCC-NEXT:    [[I7:%.*]] = load i32, ptr @Gs1, align 4, !tbaa [[INT_TBAA12]]
+; CGSCC-NEXT:    store i32 [[I7]], ptr [[AGG_RESULT]], align 4, !tbaa [[INT_TBAA12]]
+; CGSCC-NEXT:    [[I8:%.*]] = load i32, ptr getelementptr inbounds ([[STRUCT_S]], ptr @Gs1, i64 0, i32 1), align 4, !tbaa [[INT_TBAA13]]
 ; CGSCC-NEXT:    [[MUL1:%.*]] = shl nsw i32 [[I8]], 1
 ; CGSCC-NEXT:    [[I2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 1
-; CGSCC-NEXT:    store i32 [[MUL1]], ptr [[I2]], align 4, !tbaa [[TBAA13]]
-; CGSCC-NEXT:    [[I9:%.*]] = load i32, ptr getelementptr inbounds ([[STRUCT_S]], ptr @Gs1, i64 0, i32 2), align 4, !tbaa [[TBAA14]]
-; CGSCC-NEXT:    [[I10:%.*]] = load i32, ptr @Gs1, align 4, !tbaa [[TBAA12]]
+; CGSCC-NEXT:    store i32 [[MUL1]], ptr [[I2]], align 4, !tbaa [[INT_TBAA13]]
+; CGSCC-NEXT:    [[I9:%.*]] = load i32, ptr getelementptr inbounds ([[STRUCT_S]], ptr @Gs1, i64 0, i32 2), align 4, !tbaa [[INT_TBAA14]]
+; CGSCC-NEXT:    [[I10:%.*]] = load i32, ptr @Gs1, align 4, !tbaa [[INT_TBAA12]]
 ; CGSCC-NEXT:    [[ADD2:%.*]] = add nsw i32 [[I9]], [[I10]]
 ; CGSCC-NEXT:    [[I3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 2
-; CGSCC-NEXT:    store i32 [[ADD2]], ptr [[I3]], align 4, !tbaa [[TBAA14]]
+; CGSCC-NEXT:    store i32 [[ADD2]], ptr [[I3]], align 4, !tbaa [[INT_TBAA14]]
 ; CGSCC-NEXT:    ret void
 ;
 entry:
@@ -776,13 +776,13 @@ entry:
 
 define i32 @test_range_merge1() {
 ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write)
-; TUNIT-LABEL: define {{[^@]+}}@test_range_merge1
-; TUNIT-SAME: () #[[ATTR5]] {
+; TUNIT-LABEL: define noundef i32 @test_range_merge1(
+; TUNIT-SAME: ) #[[ATTR5]] {
 ; TUNIT-NEXT:    ret i32 2
 ;
 ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write)
-; CGSCC-LABEL: define {{[^@]+}}@test_range_merge1
-; CGSCC-SAME: () #[[ATTR6:[0-9]+]] {
+; CGSCC-LABEL: define noundef i32 @test_range_merge1(
+; CGSCC-SAME: ) #[[ATTR6:[0-9]+]] {
 ; CGSCC-NEXT:    ret i32 2
 ;
   store <2 x i32> <i32 1, i32 1>, ptr @Vs1
@@ -795,8 +795,8 @@ define i32 @test_range_merge1() {
 
 define i32 @test_range_merge2() {
 ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn
-; TUNIT-LABEL: define {{[^@]+}}@test_range_merge2
-; TUNIT-SAME: () #[[ATTR3]] {
+; TUNIT-LABEL: define i32 @test_range_merge2(
+; TUNIT-SAME: ) #[[ATTR3]] {
 ; TUNIT-NEXT:    store <2 x i32> <i32 3, i32 4>, ptr @Vs2, align 8
 ; TUNIT-NEXT:    [[L0:%.*]] = load i32, ptr @Vs2, align 4
 ; TUNIT-NEXT:    [[L1:%.*]] = load i32, ptr getelementptr inbounds ([[STRUCT_S:%.*]], ptr @Vs2, i64 0, i32 1), align 4
@@ -804,8 +804,8 @@ define i32 @test_range_merge2() {
 ; TUNIT-NEXT:    ret i32 [[ADD]]
 ;
 ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn
-; CGSCC-LABEL: define {{[^@]+}}@test_range_merge2
-; CGSCC-SAME: () #[[ATTR5]] {
+; CGSCC-LABEL: define i32 @test_range_merge2(
+; CGSCC-SAME: ) #[[ATTR5]] {
 ; CGSCC-NEXT:    store <2 x i32> <i32 3, i32 4>, ptr @Vs2, align 8
 ; CGSCC-NEXT:    [[L0:%.*]] = load i32, ptr @Vs2, align 4
 ; CGSCC-NEXT:    [[L1:%.*]] = load i32, ptr getelementptr inbounds ([[STRUCT_S:%.*]], ptr @Vs2, i64 0, i32 1), align 4
@@ -837,147 +837,147 @@ define i32 @test_range_merge2() {
 ;
 define void @static_global_simplifiable_2() {
 ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write)
-; TUNIT-LABEL: define {{[^@]+}}@static_global_simplifiable_2
-; TUNIT-SAME: () #[[ATTR5]] {
-; TUNIT-NEXT:  entry:
-; TUNIT-NEXT:    br label [[FOR_COND:%.*]]
-; TUNIT:       for.cond:
-; TUNIT-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 0, [[ENTRY:%.*]] ]
+; TUNIT-LABEL: define void @static_global_simplifiable_2(
+; TUNIT-SAME: ) #[[ATTR5]] {
+; TUNIT-NEXT:  [[ENTRY:.*]]:
+; TUNIT-NEXT:    br label %[[FOR_COND:.*]]
+; TUNIT:       [[FOR_COND]]:
+; TUNIT-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_INC:.*]] ], [ 0, %[[ENTRY]] ]
 ; TUNIT-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV]], 100
-; TUNIT-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY:%.*]], label [[FOR_COND_CLEANUP:%.*]]
-; TUNIT:       for.cond.cleanup:
-; TUNIT-NEXT:    br label [[FOR_END:%.*]]
-; TUNIT:       for.body:
+; TUNIT-NEXT:    br i1 [[EXITCOND]], label %[[FOR_BODY:.*]], label %[[FOR_COND_CLEANUP:.*]]
+; TUNIT:       [[FOR_COND_CLEANUP]]:
+; TUNIT-NEXT:    br label %[[FOR_END:.*]]
+; TUNIT:       [[FOR_BODY]]:
 ; TUNIT-NEXT:    [[I:%.*]] = mul nuw nsw i64 [[INDVARS_IV]], 10
 ; TUNIT-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [1024 x i8], ptr @GBytes, i64 0, i64 [[I]]
-; TUNIT-NEXT:    br label [[FOR_INC]]
-; TUNIT:       for.inc:
+; TUNIT-NEXT:    br label %[[FOR_INC]]
+; TUNIT:       [[FOR_INC]]:
 ; TUNIT-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; TUNIT-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]]
-; TUNIT:       for.end:
-; TUNIT-NEXT:    br label [[FOR_COND2:%.*]]
-; TUNIT:       for.cond2:
-; TUNIT-NEXT:    [[INDVARS_IV2:%.*]] = phi i64 [ [[INDVARS_IV_NEXT3:%.*]], [[FOR_INC9:%.*]] ], [ 0, [[FOR_END]] ]
+; TUNIT-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]]
+; TUNIT:       [[FOR_END]]:
+; TUNIT-NEXT:    br label %[[FOR_COND2:.*]]
+; TUNIT:       [[FOR_COND2]]:
+; TUNIT-NEXT:    [[INDVARS_IV2:%.*]] = phi i64 [ [[INDVARS_IV_NEXT3:%.*]], %[[FOR_INC9:.*]] ], [ 0, %[[FOR_END]] ]
 ; TUNIT-NEXT:    [[EXITCOND6:%.*]] = icmp ne i64 [[INDVARS_IV2]], 10
-; TUNIT-NEXT:    br i1 [[EXITCOND6]], label [[FOR_BODY5:%.*]], label [[FOR_COND_CLEANUP4:%.*]]
-; TUNIT:       for.cond.cleanup4:
-; TUNIT-NEXT:    br label [[FOR_END11:%.*]]
-; TUNIT:       for.body5:
+; TUNIT-NEXT:    br i1 [[EXITCOND6]], label %[[FOR_BODY5:.*]], label %[[FOR_COND_CLEANUP4:.*]]
+; TUNIT:       [[FOR_COND_CLEANUP4]]:
+; TUNIT-NEXT:    br label %[[FOR_END11:.*]]
+; TUNIT:       [[FOR_BODY5]]:
 ; TUNIT-NEXT:    [[I15:%.*]] = mul nuw nsw i64 [[INDVARS_IV2]], 10
 ; TUNIT-NEXT:    [[I16:%.*]] = or i64 [[I15]], 1
 ; TUNIT-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr @GBytes, i64 [[I16]]
-; TUNIT-NEXT:    br label [[FOR_INC9]]
-; TUNIT:       for.inc9:
+; TUNIT-NEXT:    br label %[[FOR_INC9]]
+; TUNIT:       [[FOR_INC9]]:
 ; TUNIT-NEXT:    [[INDVARS_IV_NEXT3]] = add nuw nsw i64 [[INDVARS_IV2]], 1
-; TUNIT-NEXT:    br label [[FOR_COND2]], !llvm.loop [[LOOP22:![0-9]+]]
-; TUNIT:       for.end11:
-; TUNIT-NEXT:    br label [[FOR_COND13:%.*]]
-; TUNIT:       for.cond13:
-; TUNIT-NEXT:    [[INDVARS_IV7:%.*]] = phi i64 [ [[INDVARS_IV_NEXT8:%.*]], [[FOR_INC21:%.*]] ], [ 0, [[FOR_END11]] ]
+; TUNIT-NEXT:    br label %[[FOR_COND2]], !llvm.loop [[LOOP22:![0-9]+]]
+; TUNIT:       [[FOR_END11]]:
+; TUNIT-NEXT:    br label %[[FOR_COND13:.*]]
+; TUNIT:       [[FOR_COND13]]:
+; TUNIT-NEXT:    [[INDVARS_IV7:%.*]] = phi i64 [ [[INDVARS_IV_NEXT8:%.*]], %[[FOR_INC21:.*]] ], [ 0, %[[FOR_END11]] ]
 ; TUNIT-NEXT:    [[EXITCOND11:%.*]] = icmp ne i64 [[INDVARS_IV7]], 20
-; TUNIT-NEXT:    br i1 [[EXITCOND11]], label [[FOR_BODY16:%.*]], label [[FOR_COND_CLEANUP15:%.*]]
-; TUNIT:       for.cond.cleanup15:
-; TUNIT-NEXT:    br label [[FOR_END23:%.*]]
-; TUNIT:       for.body16:
+; TUNIT-NEXT:    br i1 [[EXITCOND11]], label %[[FOR_BODY16:.*]], label %[[FOR_COND_CLEANUP15:.*]]
+; TUNIT:       [[FOR_COND_CLEANUP15]]:
+; TUNIT-NEXT:    br label %[[FOR_END23:.*]]
+; TUNIT:       [[FOR_BODY16]]:
 ; TUNIT-NEXT:    [[I17:%.*]] = mul nuw nsw i64 [[INDVARS_IV7]], 10
 ; TUNIT-NEXT:    [[I18:%.*]] = add nuw nsw i64 [[I17]], 2
 ; TUNIT-NEXT:    [[ARRAYIDX20:%.*]] = getelementptr inbounds i64, ptr @GBytes, i64 [[I18]]
-; TUNIT-NEXT:    br label [[FOR_INC21]]
-; TUNIT:       for.inc21:
+; TUNIT-NEXT:    br label %[[FOR_INC21]]
+; TUNIT:       [[FOR_INC21]]:
 ; TUNIT-NEXT:    [[INDVARS_IV_NEXT8]] = add nuw nsw i64 [[INDVARS_IV7]], 1
-; TUNIT-NEXT:    br label [[FOR_COND13]], !llvm.loop [[LOOP23:![0-9]+]]
-; TUNIT:       for.end23:
+; TUNIT-NEXT:    br label %[[FOR_COND13]], !llvm.loop [[LOOP23:![0-9]+]]
+; TUNIT:       [[FOR_END23]]:
 ; TUNIT-NEXT:    call void @write_arg(ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(524) getelementptr inbounds ([1024 x i8], ptr @GBytes, i64 0, i64 500), i32 noundef 0) #[[ATTR18]]
-; TUNIT-NEXT:    br label [[FOR_COND25:%.*]]
-; TUNIT:       for.cond25:
-; TUNIT-NEXT:    [[INDVARS_IV12:%.*]] = phi i64 [ [[INDVARS_IV_NEXT13:%.*]], [[FOR_INC33:%.*]] ], [ 0, [[FOR_END23]] ]
+; TUNIT-NEXT:    br label %[[FOR_COND25:.*]]
+; TUNIT:       [[FOR_COND25]]:
+; TUNIT-NEXT:    [[INDVARS_IV12:%.*]] = phi i64 [ [[INDVARS_IV_NEXT13:%.*]], %[[FOR_INC33:.*]] ], [ 0, %[[FOR_END23]] ]
 ; TUNIT-NEXT:    [[EXITCOND14:%.*]] = icmp ne i64 [[INDVARS_IV12]], 1024
-; TUNIT-NEXT:    br i1 [[EXITCOND14]], label [[FOR_BODY28:%.*]], label [[FOR_COND_CLEANUP27:%.*]]
-; TUNIT:       for.cond.cleanup27:
-; TUNIT-NEXT:    br label [[FOR_END35:%.*]]
-; TUNIT:       for.body28:
+; TUNIT-NEXT:    br i1 [[EXITCOND14]], label %[[FOR_BODY28:.*]], label %[[FOR_COND_CLEANUP27:.*]]
+; TUNIT:       [[FOR_COND_CLEANUP27]]:
+; TUNIT-NEXT:    br label %[[FOR_END35:.*]]
+; TUNIT:       [[FOR_BODY28]]:
 ; TUNIT-NEXT:    [[ARRAYIDX32:%.*]] = getelementptr inbounds [1024 x i8], ptr @globalBytes, i64 0, i64 [[INDVARS_IV12]]
-; TUNIT-NEXT:    store i8 0, ptr [[ARRAYIDX32]], align 1, !tbaa [[TBAA19]]
-; TUNIT-NEXT:    br label [[FOR_INC33]]
-; TUNIT:       for.inc33:
+; TUNIT-NEXT:    store i8 0, ptr [[ARRAYIDX32]], align 1, !tbaa [[CHAR_TBAA19]]
+; TUNIT-NEXT:    br label %[[FOR_INC33]]
+; TUNIT:       [[FOR_INC33]]:
 ; TUNIT-NEXT:    [[INDVARS_IV_NEXT13]] = add nuw nsw i64 [[INDVARS_IV12]], 1
-; TUNIT-NEXT:    br label [[FOR_COND25]], !llvm.loop [[LOOP24:![0-9]+]]
-; TUNIT:       for.end35:
+; TUNIT-NEXT:    br label %[[FOR_COND25]], !llvm.loop [[LOOP24:![0-9]+]]
+; TUNIT:       [[FOR_END35]]:
 ; TUNIT-NEXT:    ret void
 ;
 ; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn
-; CGSCC-LABEL: define {{[^@]+}}@static_global_simplifiable_2
-; CGSCC-SAME: () #[[ATTR3]] {
-; CGSCC-NEXT:  entry:
-; CGSCC-NEXT:    br label [[FOR_COND:%.*]]
-; CGSCC:       for.cond:
-; CGSCC-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 0, [[ENTRY:%.*]] ]
+; CGSCC-LABEL: define void @static_global_simplifiable_2(
+; CGSCC-SAME: ) #[[ATTR3]] {
+; CGSCC-NEXT:  [[ENTRY:.*]]:
+; CGSCC-NEXT:    br label %[[FOR_COND:.*]]
+; CGSCC:       [[FOR_COND]]:
+; CGSCC-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_INC:.*]] ], [ 0, %[[ENTRY]] ]
 ; CGSCC-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV]], 100
-; CGSCC-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY:%.*]], label [[FOR_COND_CLEANUP:%.*]]
-; CGSCC:       for.cond.cleanup:
-; CGSCC-NEXT:    br label [[FOR_END:%.*]]
-; CGSCC:       for.body:
+; CGSCC-NEXT:    br i1 [[EXITCOND]], label %[[FOR_BODY:.*]], label %[[FOR_COND_CLEANUP:.*]]
+; CGSCC:       [[FOR_COND_CLEANUP]]:
+; CGSCC-NEXT:    br label %[[FOR_END:.*]]
+; CGSCC:       [[FOR_BODY]]:
 ; CGSCC-NEXT:    [[I:%.*]] = mul nuw nsw i64 [[INDVARS_IV]], 10
 ; CGSCC-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [1024 x i8], ptr @GBytes, i64 0, i64 [[I]]
-; CGSCC-NEXT:    store i8 0, ptr [[ARRAYIDX]], align 2, !tbaa [[TBAA15]]
-; CGSCC-NEXT:    br label [[FOR_INC]]
-; CGSCC:       for.inc:
+; CGSCC-NEXT:    store i8 0, ptr [[ARRAYIDX]], align 2, !tbaa [[CHAR_TBAA15]]
+; CGSCC-NEXT:    br label %[[FOR_INC]]
+; CGSCC:       [[FOR_INC]]:
 ; CGSCC-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; CGSCC-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]]
-; CGSCC:       for.end:
-; CGSCC-NEXT:    br label [[FOR_COND2:%.*]]
-; CGSCC:       for.cond2:
-; CGSCC-NEXT:    [[INDVARS_IV2:%.*]] = phi i64 [ [[INDVARS_IV_NEXT3:%.*]], [[FOR_INC9:%.*]] ], [ 0, [[FOR_END]] ]
+; CGSCC-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]]
+; CGSCC:       [[FOR_END]]:
+; CGSCC-NEXT:    br label %[[FOR_COND2:.*]]
+; CGSCC:       [[FOR_COND2]]:
+; CGSCC-NEXT:    [[INDVARS_IV2:%.*]] = phi i64 [ [[INDVARS_IV_NEXT3:%.*]], %[[FOR_INC9:.*]] ], [ 0, %[[FOR_END]] ]
 ; CGSCC-NEXT:    [[EXITCOND6:%.*]] = icmp ne i64 [[INDVARS_IV2]], 10
-; CGSCC-NEXT:    br i1 [[EXITCOND6]], label [[FOR_BODY5:%.*]], label [[FOR_COND_CLEANUP4:%.*]]
-; CGSCC:       for.cond.cleanup4:
-; CGSCC-NEXT:    br label [[FOR_END11:%.*]]
-; CGSCC:       for.body5:
+; CGSCC-NEXT:    br i1 [[EXITCOND6]], label %[[FOR_BODY5:.*]], label %[[FOR_COND_CLEANUP4:.*]]
+; CGSCC:       [[FOR_COND_CLEANUP4]]:
+; CGSCC-NEXT:    br label %[[FOR_END11:.*]]
+; CGSCC:       [[FOR_BODY5]]:
 ; CGSCC-NEXT:    [[I15:%.*]] = mul nuw nsw i64 [[INDVARS_IV2]], 10
 ; CGSCC-NEXT:    [[I16:%.*]] = or i64 [[I15]], 1
 ; CGSCC-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr @GBytes, i64 [[I16]]
-; CGSCC-NEXT:    store float 0.000000e+00, ptr [[ARRAYIDX8]], align 4, !tbaa [[TBAA18]]
-; CGSCC-NEXT:    br label [[FOR_INC9]]
-; CGSCC:       for.inc9:
+; CGSCC-NEXT:    store float 0.000000e+00, ptr [[ARRAYIDX8]], align 4, !tbaa [[FLOAT_TBAA18]]
+; CGSCC-NEXT:    br label %[[FOR_INC9]]
+; CGSCC:       [[FOR_INC9]]:
 ; CGSCC-NEXT:    [[INDVARS_IV_NEXT3]] = add nuw nsw i64 [[INDVARS_IV2]], 1
-; CGSCC-NEXT:    br label [[FOR_COND2]], !llvm.loop [[LOOP25:![0-9]+]]
-; CGSCC:       for.end11:
-; CGSCC-NEXT:    br label [[FOR_COND13:%.*]]
-; CGSCC:       for.cond13:
-; CGSCC-NEXT:    [[INDVARS_IV7:%.*]] = phi i64 [ [[INDVARS_IV_NEXT8:%.*]], [[FOR_INC21:%.*]] ], [ 0, [[FOR_END11]] ]
+; CGSCC-NEXT:    br label %[[FOR_COND2]], !llvm.loop [[LOOP25:![0-9]+]]
+; CGSCC:       [[FOR_END11]]:
+; CGSCC-NEXT:    br label %[[FOR_COND13:.*]]
+; CGSCC:       [[FOR_COND13]]:
+; CGSCC-NEXT:    [[INDVARS_IV7:%.*]] = phi i64 [ [[INDVARS_IV_NEXT8:%.*]], %[[FOR_INC21:.*]] ], [ 0, %[[FOR_END11]] ]
 ; CGSCC-NEXT:    [[EXITCOND11:%.*]] = icmp ne i64 [[INDVARS_IV7]], 20
-; CGSCC-NEXT:    br i1 [[EXITCOND11]], label [[FOR_BODY16:%.*]], label [[FOR_COND_CLEANUP15:%.*]]
-; CGSCC:       for.cond.cleanup15:
-; CGSCC-NEXT:    br label [[FOR_END23:%.*]]
-; CGSCC:       for.body16:
+; CGSCC-NEXT:    br i1 [[EXITCOND11]], label %[[FOR_BODY16:.*]], label %[[FOR_COND_CLEANUP15:.*]]
+; CGSCC:       [[FOR_COND_CLEANUP15]]:
+; CGSCC-NEXT:    br label %[[FOR_END23:.*]]
+; CGSCC:       [[FOR_BODY16]]:
 ; CGSCC-NEXT:    [[I17:%.*]] = mul nuw nsw i64 [[INDVARS_IV7]], 10
 ; CGSCC-NEXT:    [[I18:%.*]] = add nuw nsw i64 [[I17]], 2
 ; CGSCC-NEXT:    [[ARRAYIDX20:%.*]] = getelementptr inbounds i64, ptr @GBytes, i64 [[I18]]
-; CGSCC-NEXT:    store i64 0, ptr [[ARRAYIDX20]], align 16, !tbaa [[TBAA20]]
-; CGSCC-NEXT:    br label [[FOR_INC21]]
-; CGSCC:       for.inc21:
+; CGSCC-NEXT:    store i64 0, ptr [[ARRAYIDX20]], align 16, !tbaa [[LONG_LONG_TBAA20]]
+; CGSCC-NEXT:    br label %[[FOR_INC21]]
+; CGSCC:       [[FOR_INC21]]:
 ; CGSCC-NEXT:    [[INDVARS_IV_NEXT8]] = add nuw nsw i64 [[INDVARS_IV7]], 1
-; CGSCC-NEXT:    br label [[FOR_COND13]], !llvm.loop [[LOOP26:![0-9]+]]
-; CGSCC:       for.end23:
-; CGSCC-NEXT:    store i8 0, ptr getelementptr inbounds ([1024 x i8], ptr @GBytes, i64 0, i64 1023), align 1, !tbaa [[TBAA15]]
+; CGSCC-NEXT:    br label %[[FOR_COND13]], !llvm.loop [[LOOP26:![0-9]+]]
+; CGSCC:       [[FOR_END23]]:
+; CGSCC-NEXT:    store i8 0, ptr getelementptr inbounds ([1024 x i8], ptr @GBytes, i64 0, i64 1023), align 1, !tbaa [[CHAR_TBAA15]]
 ; CGSCC-NEXT:    call void @write_arg(ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(524) getelementptr inbounds ([1024 x i8], ptr @GBytes, i64 0, i64 500), i32 noundef 0) #[[ATTR21]]
-; CGSCC-NEXT:    br label [[FOR_COND25:%.*]]
-; CGSCC:       for.cond25:
-; CGSCC-NEXT:    [[INDVARS_IV12:%.*]] = phi i64 [ [[INDVARS_IV_NEXT13:%.*]], [[FOR_INC33:%.*]] ], [ 0, [[FOR_END23]] ]
+; CGSCC-NEXT:    br label %[[FOR_COND25:.*]]
+; CGSCC:       [[FOR_COND25]]:
+; CGSCC-NEXT:    [[INDVARS_IV12:%.*]] = phi i64 [ [[INDVARS_IV_NEXT13:%.*]], %[[FOR_INC33:.*]] ], [ 0, %[[FOR_END23]] ]
 ; CGSCC-NEXT:    [[EXITCOND14:%.*]] = icmp ne i64 [[INDVARS_IV12]], 1024
-; CGSCC-NEXT:    br i1 [[EXITCOND14]], label [[FOR_BODY28:%.*]], label [[FOR_COND_CLEANUP27:%.*]]
-; CGSCC:       for.cond.cleanup27:
-; CGSCC-NEXT:    br label [[FOR_END35:%.*]]
-; CGSCC:       for.body28:
+; CGSCC-NEXT:    br i1 [[EXITCOND14]], label %[[FOR_BODY28:.*]], label %[[FOR_COND_CLEANUP27:.*]]
+; CGSCC:       [[FOR_COND_CLEANUP27]]:
+; CGSCC-NEXT:    br label %[[FOR_END35:.*]]
+; CGSCC:       [[FOR_BODY28]]:
 ; CGSCC-NEXT:    [[ARRAYIDX30:%.*]] = getelementptr inbounds [1024 x i8], ptr @GBytes, i64 0, i64 [[INDVARS_IV12]]
-; CGSCC-NEXT:    [[I19:%.*]] = load i8, ptr [[ARRAYIDX30]], align 1, !tbaa [[TBAA15]]
+; CGSCC-NEXT:    [[I19:%.*]] = load i8, ptr [[ARRAYIDX30]], align 1, !tbaa [[CHAR_TBAA15]]
 ; CGSCC-NEXT:    [[ARRAYIDX32:%.*]] = getelementptr inbounds [1024 x i8], ptr @globalBytes, i64 0, i64 [[INDVARS_IV12]]
-; CGSCC-NEXT:    store i8 [[I19]], ptr [[ARRAYIDX32]], align 1, !tbaa [[TBAA15]]
-; CGSCC-NEXT:    br label [[FOR_INC33]]
-; CGSCC:       for.inc33:
+; CGSCC-NEXT:    store i8 [[I19]], ptr [[ARRAYIDX32]], align 1, !tbaa [[CHAR_TBAA15]]
+; CGSCC-NEXT:    br label %[[FOR_INC33]]
+; CGSCC:       [[FOR_INC33]]:
 ; CGSCC-NEXT:    [[INDVARS_IV_NEXT13]] = add nuw nsw i64 [[INDVARS_IV12]], 1
-; CGSCC-NEXT:    br label [[FOR_COND25]], !llvm.loop [[LOOP27:![0-9]+]]
-; CGSCC:       for.end35:
+; CGSCC-NEXT:    br label %[[FOR_COND25]], !llvm.loop [[LOOP27:![0-9]+]]
+; CGSCC:       [[FOR_END35]]:
 ; CGSCC-NEXT:    ret void
 ;
 entry:
@@ -1080,15 +1080,15 @@ for.end35:                                        ; preds = %for.cond.cleanup27
 ;    }
 define i32 @static_global_simplifiable_3() {
 ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write)
-; TUNIT-LABEL: define {{[^@]+}}@static_global_simplifiable_3
-; TUNIT-SAME: () #[[ATTR5]] {
-; TUNIT-NEXT:    store i32 1, ptr @Flag3, align 4, !tbaa [[TBAA3]]
+; TUNIT-LABEL: define noundef i32 @static_global_simplifiable_3(
+; TUNIT-SAME: ) #[[ATTR5]] {
+; TUNIT-NEXT:    store i32 1, ptr @Flag3, align 4, !tbaa [[INT_TBAA3]]
 ; TUNIT-NEXT:    ret i32 1
 ;
 ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write)
-; CGSCC-LABEL: define {{[^@]+}}@static_global_simplifiable_3
-; CGSCC-SAME: () #[[ATTR6]] {
-; CGSCC-NEXT:    store i32 1, ptr @Flag3, align 4, !tbaa [[TBAA3]]
+; CGSCC-LABEL: define noundef i32 @static_global_simplifiable_3(
+; CGSCC-SAME: ) #[[ATTR6]] {
+; CGSCC-NEXT:    store i32 1, ptr @Flag3, align 4, !tbaa [[INT_TBAA3]]
 ; CGSCC-NEXT:    ret i32 1
 ;
   store i32 1, ptr @Flag3, align 4, !tbaa !3
@@ -1115,95 +1115,95 @@ define i32 @static_global_simplifiable_3() {
 ;
 define void @noalias_arg_simplifiable_1(ptr noalias sret(%struct.S) align 4 %agg.result, ptr byval(%struct.S) align 8 %s) {
 ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite)
-; TUNIT-LABEL: define {{[^@]+}}@noalias_arg_simplifiable_1
-; TUNIT-SAME: (ptr noalias nofree writeonly sret([[STRUCT_S:%.*]]) align 4 captures(none) dereferenceable_or_null(24) [[AGG_RESULT:%.*]], ptr noalias nofree nonnull byval([[STRUCT_S]]) align 8 captures(none) dereferenceable(24) [[S:%.*]]) #[[ATTR1]] {
-; TUNIT-NEXT:  entry:
+; TUNIT-LABEL: define void @noalias_arg_simplifiable_1(
+; TUNIT-SAME: ptr noalias nofree writeonly sret([[STRUCT_S:%.*]]) align 4 captures(none) dereferenceable_or_null(24) [[AGG_RESULT:%.*]], ptr noalias nofree nonnull byval([[STRUCT_S]]) align 8 captures(none) dereferenceable(24) [[S:%.*]]) #[[ATTR1]] {
+; TUNIT-NEXT:  [[ENTRY:.*:]]
 ; TUNIT-NEXT:    [[F1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 3
-; TUNIT-NEXT:    store float 0x3FF19999A0000000, ptr [[F1]], align 4, !tbaa [[TBAA7]]
+; TUNIT-NEXT:    store float 0x3FF19999A0000000, ptr [[F1]], align 4, !tbaa [[FLOAT_TBAA7]]
 ; TUNIT-NEXT:    [[F2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 4
-; TUNIT-NEXT:    store float 0x40019999A0000000, ptr [[F2]], align 8, !tbaa [[TBAA10]]
+; TUNIT-NEXT:    store float 0x40019999A0000000, ptr [[F2]], align 8, !tbaa [[FLOAT_TBAA10]]
 ; TUNIT-NEXT:    [[F3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 5
-; TUNIT-NEXT:    store float 0x400A666660000000, ptr [[F3]], align 4, !tbaa [[TBAA11]]
+; TUNIT-NEXT:    store float 0x400A666660000000, ptr [[F3]], align 4, !tbaa [[FLOAT_TBAA11]]
 ; TUNIT-NEXT:    call void @write_arg(ptr noalias nofree noundef nonnull writeonly align 8 captures(none) dereferenceable(24) [[S]], i32 noundef 1) #[[ATTR18]]
 ; TUNIT-NEXT:    [[I2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 1
 ; TUNIT-NEXT:    call void @write_arg(ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(20) [[I2]], i32 noundef 2) #[[ATTR18]]
 ; TUNIT-NEXT:    [[I3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 2
 ; TUNIT-NEXT:    call void @write_arg(ptr nofree noundef nonnull writeonly align 8 captures(none) dereferenceable(16) [[I3]], i32 noundef 3) #[[ATTR18]]
 ; TUNIT-NEXT:    [[F11:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 3
-; TUNIT-NEXT:    [[I:%.*]] = load float, ptr [[F11]], align 4, !tbaa [[TBAA7]]
+; TUNIT-NEXT:    [[I:%.*]] = load float, ptr [[F11]], align 4, !tbaa [[FLOAT_TBAA7]]
 ; TUNIT-NEXT:    [[F12:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 3
-; TUNIT-NEXT:    store float [[I]], ptr [[F12]], align 4, !tbaa [[TBAA7]]
+; TUNIT-NEXT:    store float [[I]], ptr [[F12]], align 4, !tbaa [[FLOAT_TBAA7]]
 ; TUNIT-NEXT:    [[F23:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 4
-; TUNIT-NEXT:    [[I4:%.*]] = load float, ptr [[F23]], align 8, !tbaa [[TBAA10]]
+; TUNIT-NEXT:    [[I4:%.*]] = load float, ptr [[F23]], align 8, !tbaa [[FLOAT_TBAA10]]
 ; TUNIT-NEXT:    [[MUL:%.*]] = fmul float [[I4]], 2.000000e+00
 ; TUNIT-NEXT:    [[F24:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 4
-; TUNIT-NEXT:    store float [[MUL]], ptr [[F24]], align 4, !tbaa [[TBAA10]]
+; TUNIT-NEXT:    store float [[MUL]], ptr [[F24]], align 4, !tbaa [[FLOAT_TBAA10]]
 ; TUNIT-NEXT:    [[F35:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 5
-; TUNIT-NEXT:    [[I5:%.*]] = load float, ptr [[F35]], align 4, !tbaa [[TBAA11]]
+; TUNIT-NEXT:    [[I5:%.*]] = load float, ptr [[F35]], align 4, !tbaa [[FLOAT_TBAA11]]
 ; TUNIT-NEXT:    [[F16:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 3
-; TUNIT-NEXT:    [[I6:%.*]] = load float, ptr [[F16]], align 4, !tbaa [[TBAA7]]
+; TUNIT-NEXT:    [[I6:%.*]] = load float, ptr [[F16]], align 4, !tbaa [[FLOAT_TBAA7]]
 ; TUNIT-NEXT:    [[ADD:%.*]] = fadd float [[I5]], [[I6]]
 ; TUNIT-NEXT:    [[F37:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 5
-; TUNIT-NEXT:    store float [[ADD]], ptr [[F37]], align 4, !tbaa [[TBAA11]]
-; TUNIT-NEXT:    [[I7:%.*]] = load i32, ptr [[S]], align 8, !tbaa [[TBAA12]]
-; TUNIT-NEXT:    store i32 [[I7]], ptr [[AGG_RESULT]], align 4, !tbaa [[TBAA12]]
+; TUNIT-NEXT:    store float [[ADD]], ptr [[F37]], align 4, !tbaa [[FLOAT_TBAA11]]
+; TUNIT-NEXT:    [[I7:%.*]] = load i32, ptr [[S]], align 8, !tbaa [[INT_TBAA12]]
+; TUNIT-NEXT:    store i32 [[I7]], ptr [[AGG_RESULT]], align 4, !tbaa [[INT_TBAA12]]
 ; TUNIT-NEXT:    [[I210:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 1
-; TUNIT-NEXT:    [[I8:%.*]] = load i32, ptr [[I210]], align 4, !tbaa [[TBAA13]]
+; TUNIT-NEXT:    [[I8:%.*]] = load i32, ptr [[I210]], align 4, !tbaa [[INT_TBAA13]]
 ; TUNIT-NEXT:    [[MUL11:%.*]] = shl nsw i32 [[I8]], 1
 ; TUNIT-NEXT:    [[I212:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 1
-; TUNIT-NEXT:    store i32 [[MUL11]], ptr [[I212]], align 4, !tbaa [[TBAA13]]
+; TUNIT-NEXT:    store i32 [[MUL11]], ptr [[I212]], align 4, !tbaa [[INT_TBAA13]]
 ; TUNIT-NEXT:    [[I313:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 2
-; TUNIT-NEXT:    [[I9:%.*]] = load i32, ptr [[I313]], align 8, !tbaa [[TBAA14]]
-; TUNIT-NEXT:    [[I10:%.*]] = load i32, ptr [[S]], align 8, !tbaa [[TBAA12]]
+; TUNIT-NEXT:    [[I9:%.*]] = load i32, ptr [[I313]], align 8, !tbaa [[INT_TBAA14]]
+; TUNIT-NEXT:    [[I10:%.*]] = load i32, ptr [[S]], align 8, !tbaa [[INT_TBAA12]]
 ; TUNIT-NEXT:    [[ADD15:%.*]] = add nsw i32 [[I9]], [[I10]]
 ; TUNIT-NEXT:    [[I316:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 2
-; TUNIT-NEXT:    store i32 [[ADD15]], ptr [[I316]], align 4, !tbaa [[TBAA14]]
+; TUNIT-NEXT:    store i32 [[ADD15]], ptr [[I316]], align 4, !tbaa [[INT_TBAA14]]
 ; TUNIT-NEXT:    ret void
 ;
 ; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(argmem: readwrite)
-; CGSCC-LABEL: define {{[^@]+}}@noalias_arg_simplifiable_1
-; CGSCC-SAME: (ptr noalias nofree noundef nonnull writeonly sret([[STRUCT_S:%.*]]) align 4 captures(none) dereferenceable(24) [[AGG_RESULT:%.*]], ptr noalias nofree noundef nonnull byval([[STRUCT_S]]) align 8 captures(none) dereferenceable(24) [[S:%.*]]) #[[ATTR1]] {
-; CGSCC-NEXT:  entry:
+; CGSCC-LABEL: define void @noalias_arg_simplifiable_1(
+; CGSCC-SAME: ptr noalias nofree noundef nonnull writeonly sret([[STRUCT_S:%.*]]) align 4 captures(none) dereferenceable(24) [[AGG_RESULT:%.*]], ptr noalias nofree noundef nonnull byval([[STRUCT_S]]) align 8 captures(none) dereferenceable(24) [[S:%.*]]) #[[ATTR1]] {
+; CGSCC-NEXT:  [[ENTRY:.*:]]
 ; CGSCC-NEXT:    [[F1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 3
-; CGSCC-NEXT:    store float 0x3FF19999A0000000, ptr [[F1]], align 4, !tbaa [[TBAA7]]
+; CGSCC-NEXT:    store float 0x3FF19999A0000000, ptr [[F1]], align 4, !tbaa [[FLOAT_TBAA7]]
 ; CGSCC-NEXT:    [[F2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 4
-; CGSCC-NEXT:    store float 0x40019999A0000000, ptr [[F2]], align 8, !tbaa [[TBAA10]]
+; CGSCC-NEXT:    store float 0x40019999A0000000, ptr [[F2]], align 8, !tbaa [[FLOAT_TBAA10]]
 ; CGSCC-NEXT:    [[F3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 5
-; CGSCC-NEXT:    store float 0x400A666660000000, ptr [[F3]], align 4, !tbaa [[TBAA11]]
+; CGSCC-NEXT:    store float 0x400A666660000000, ptr [[F3]], align 4, !tbaa [[FLOAT_TBAA11]]
 ; CGSCC-NEXT:    call void @write_arg(ptr noalias nofree noundef nonnull writeonly align 8 captures(none) dereferenceable(24) [[S]], i32 noundef 1) #[[ATTR21]]
 ; CGSCC-NEXT:    [[I2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 1
 ; CGSCC-NEXT:    call void @write_arg(ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(20) [[I2]], i32 noundef 2) #[[ATTR21]]
 ; CGSCC-NEXT:    [[I3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 2
 ; CGSCC-NEXT:    call void @write_arg(ptr nofree noundef nonnull writeonly align 8 captures(none) dereferenceable(16) [[I3]], i32 noundef 3) #[[ATTR21]]
 ; CGSCC-NEXT:    [[F11:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 3
-; CGSCC-NEXT:    [[I:%.*]] = load float, ptr [[F11]], align 4, !tbaa [[TBAA7]]
+; CGSCC-NEXT:    [[I:%.*]] = load float, ptr [[F11]], align 4, !tbaa [[FLOAT_TBAA7]]
 ; CGSCC-NEXT:    [[F12:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 3
-; CGSCC-NEXT:    store float [[I]], ptr [[F12]], align 4, !tbaa [[TBAA7]]
+; CGSCC-NEXT:    store float [[I]], ptr [[F12]], align 4, !tbaa [[FLOAT_TBAA7]]
 ; CGSCC-NEXT:    [[F23:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 4
-; CGSCC-NEXT:    [[I4:%.*]] = load float, ptr [[F23]], align 8, !tbaa [[TBAA10]]
+; CGSCC-NEXT:    [[I4:%.*]] = load float, ptr [[F23]], align 8, !tbaa [[FLOAT_TBAA10]]
 ; CGSCC-NEXT:    [[MUL:%.*]] = fmul float [[I4]], 2.000000e+00
 ; CGSCC-NEXT:    [[F24:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 4
-; CGSCC-NEXT:    store float [[MUL]], ptr [[F24]], align 4, !tbaa [[TBAA10]]
+; CGSCC-NEXT:    store float [[MUL]], ptr [[F24]], align 4, !tbaa [[FLOAT_TBAA10]]
 ; CGSCC-NEXT:    [[F35:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 5
-; CGSCC-NEXT:    [[I5:%.*]] = load float, ptr [[F35]], align 4, !tbaa [[TBAA11]]
+; CGSCC-NEXT:    [[I5:%.*]] = load float, ptr [[F35]], align 4, !tbaa [[FLOAT_TBAA11]]
 ; CGSCC-NEXT:    [[F16:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 3
-; CGSCC-NEXT:    [[I6:%.*]] = load float, ptr [[F16]], align 4, !tbaa [[TBAA7]]
+; CGSCC-NEXT:    [[I6:%.*]] = load float, ptr [[F16]], align 4, !tbaa [[FLOAT_TBAA7]]
 ; CGSCC-NEXT:    [[ADD:%.*]] = fadd float [[I5]], [[I6]]
 ; CGSCC-NEXT:    [[F37:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 5
-; CGSCC-NEXT:    store float [[ADD]], ptr [[F37]], align 4, !tbaa [[TBAA11]]
-; CGSCC-NEXT:    [[I7:%.*]] = load i32, ptr [[S]], align 8, !tbaa [[TBAA12]]
-; CGSCC-NEXT:    store i32 [[I7]], ptr [[AGG_RESULT]], align 4, !tbaa [[TBAA12]]
+; CGSCC-NEXT:    store float [[ADD]], ptr [[F37]], align 4, !tbaa [[FLOAT_TBAA11]]
+; CGSCC-NEXT:    [[I7:%.*]] = load i32, ptr [[S]], align 8, !tbaa [[INT_TBAA12]]
+; CGSCC-NEXT:    store i32 [[I7]], ptr [[AGG_RESULT]], align 4, !tbaa [[INT_TBAA12]]
 ; CGSCC-NEXT:    [[I210:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 1
-; CGSCC-NEXT:    [[I8:%.*]] = load i32, ptr [[I210]], align 4, !tbaa [[TBAA13]]
+; CGSCC-NEXT:    [[I8:%.*]] = load i32, ptr [[I210]], align 4, !tbaa [[INT_TBAA13]]
 ; CGSCC-NEXT:    [[MUL11:%.*]] = shl nsw i32 [[I8]], 1
 ; CGSCC-NEXT:    [[I212:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 1
-; CGSCC-NEXT:    store i32 [[MUL11]], ptr [[I212]], align 4, !tbaa [[TBAA13]]
+; CGSCC-NEXT:    store i32 [[MUL11]], ptr [[I212]], align 4, !tbaa [[INT_TBAA13]]
 ; CGSCC-NEXT:    [[I313:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 2
-; CGSCC-NEXT:    [[I9:%.*]] = load i32, ptr [[I313]], align 8, !tbaa [[TBAA14]]
-; CGSCC-NEXT:    [[I10:%.*]] = load i32, ptr [[S]], align 8, !tbaa [[TBAA12]]
+; CGSCC-NEXT:    [[I9:%.*]] = load i32, ptr [[I313]], align 8, !tbaa [[INT_TBAA14]]
+; CGSCC-NEXT:    [[I10:%.*]] = load i32, ptr [[S]], align 8, !tbaa [[INT_TBAA12]]
 ; CGSCC-NEXT:    [[ADD15:%.*]] = add nsw i32 [[I9]], [[I10]]
 ; CGSCC-NEXT:    [[I316:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 2
-; CGSCC-NEXT:    store i32 [[ADD15]], ptr [[I316]], align 4, !tbaa [[TBAA14]]
+; CGSCC-NEXT:    store i32 [[ADD15]], ptr [[I316]], align 4, !tbaa [[INT_TBAA14]]
 ; CGSCC-NEXT:    ret void
 ;
 entry:
@@ -1266,157 +1266,157 @@ entry:
 ;
 define void @noalias_arg_simplifiable_2(ptr %Bytes) {
 ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn
-; TUNIT-LABEL: define {{[^@]+}}@noalias_arg_simplifiable_2
-; TUNIT-SAME: (ptr nofree captures(none) [[BYTES:%.*]]) #[[ATTR3]] {
-; TUNIT-NEXT:  entry:
-; TUNIT-NEXT:    br label [[FOR_COND:%.*]]
-; TUNIT:       for.cond:
-; TUNIT-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 0, [[ENTRY:%.*]] ]
+; TUNIT-LABEL: define void @noalias_arg_simplifiable_2(
+; TUNIT-SAME: ptr nofree captures(none) [[BYTES:%.*]]) #[[ATTR3]] {
+; TUNIT-NEXT:  [[ENTRY:.*]]:
+; TUNIT-NEXT:    br label %[[FOR_COND:.*]]
+; TUNIT:       [[FOR_COND]]:
+; TUNIT-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_INC:.*]] ], [ 0, %[[ENTRY]] ]
 ; TUNIT-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV]], 100
-; TUNIT-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY:%.*]], label [[FOR_COND_CLEANUP:%.*]]
-; TUNIT:       for.cond.cleanup:
-; TUNIT-NEXT:    br label [[FOR_END:%.*]]
-; TUNIT:       for.body:
+; TUNIT-NEXT:    br i1 [[EXITCOND]], label %[[FOR_BODY:.*]], label %[[FOR_COND_CLEANUP:.*]]
+; TUNIT:       [[FOR_COND_CLEANUP]]:
+; TUNIT-NEXT:    br label %[[FOR_END:.*]]
+; TUNIT:       [[FOR_BODY]]:
 ; TUNIT-NEXT:    [[I:%.*]] = mul nuw nsw i64 [[INDVARS_IV]], 10
 ; TUNIT-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[BYTES]], i64 [[I]]
-; TUNIT-NEXT:    store i8 0, ptr [[ARRAYIDX]], align 1, !tbaa [[TBAA19]]
-; TUNIT-NEXT:    br label [[FOR_INC]]
-; TUNIT:       for.inc:
+; TUNIT-NEXT:    store i8 0, ptr [[ARRAYIDX]], align 1, !tbaa [[CHAR_TBAA19]]
+; TUNIT-NEXT:    br label %[[FOR_INC]]
+; TUNIT:       [[FOR_INC]]:
 ; TUNIT-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; TUNIT-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]]
-; TUNIT:       for.end:
-; TUNIT-NEXT:    br label [[FOR_COND2:%.*]]
-; TUNIT:       for.cond2:
-; TUNIT-NEXT:    [[INDVARS_IV2:%.*]] = phi i64 [ [[INDVARS_IV_NEXT3:%.*]], [[FOR_INC9:%.*]] ], [ 0, [[FOR_END]] ]
+; TUNIT-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]]
+; TUNIT:       [[FOR_END]]:
+; TUNIT-NEXT:    br label %[[FOR_COND2:.*]]
+; TUNIT:       [[FOR_COND2]]:
+; TUNIT-NEXT:    [[INDVARS_IV2:%.*]] = phi i64 [ [[INDVARS_IV_NEXT3:%.*]], %[[FOR_INC9:.*]] ], [ 0, %[[FOR_END]] ]
 ; TUNIT-NEXT:    [[EXITCOND6:%.*]] = icmp ne i64 [[INDVARS_IV2]], 10
-; TUNIT-NEXT:    br i1 [[EXITCOND6]], label [[FOR_BODY5:%.*]], label [[FOR_COND_CLEANUP4:%.*]]
-; TUNIT:       for.cond.cleanup4:
-; TUNIT-NEXT:    br label [[FOR_END11:%.*]]
-; TUNIT:       for.body5:
+; TUNIT-NEXT:    br i1 [[EXITCOND6]], label %[[FOR_BODY5:.*]], label %[[FOR_COND_CLEANUP4:.*]]
+; TUNIT:       [[FOR_COND_CLEANUP4]]:
+; TUNIT-NEXT:    br label %[[FOR_END11:.*]]
+; TUNIT:       [[FOR_BODY5]]:
 ; TUNIT-NEXT:    [[I16:%.*]] = mul nuw nsw i64 [[INDVARS_IV2]], 10
 ; TUNIT-NEXT:    [[I17:%.*]] = or i64 [[I16]], 1
 ; TUNIT-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr [[BYTES]], i64 [[I17]]
-; TUNIT-NEXT:    store float 0.000000e+00, ptr [[ARRAYIDX8]], align 4, !tbaa [[TBAA26:![0-9]+]]
-; TUNIT-NEXT:    br label [[FOR_INC9]]
-; TUNIT:       for.inc9:
+; TUNIT-NEXT:    store float 0.000000e+00, ptr [[ARRAYIDX8]], align 4, !tbaa [[FLOAT_TBAA26:![0-9]+]]
+; TUNIT-NEXT:    br label %[[FOR_INC9]]
+; TUNIT:       [[FOR_INC9]]:
 ; TUNIT-NEXT:    [[INDVARS_IV_NEXT3]] = add nuw nsw i64 [[INDVARS_IV2]], 1
-; TUNIT-NEXT:    br label [[FOR_COND2]], !llvm.loop [[LOOP27:![0-9]+]]
-; TUNIT:       for.end11:
-; TUNIT-NEXT:    br label [[FOR_COND13:%.*]]
-; TUNIT:       for.cond13:
-; TUNIT-NEXT:    [[INDVARS_IV7:%.*]] = phi i64 [ [[INDVARS_IV_NEXT8:%.*]], [[FOR_INC21:%.*]] ], [ 0, [[FOR_END11]] ]
+; TUNIT-NEXT:    br label %[[FOR_COND2]], !llvm.loop [[LOOP27:![0-9]+]]
+; TUNIT:       [[FOR_END11]]:
+; TUNIT-NEXT:    br label %[[FOR_COND13:.*]]
+; TUNIT:       [[FOR_COND13]]:
+; TUNIT-NEXT:    [[INDVARS_IV7:%.*]] = phi i64 [ [[INDVARS_IV_NEXT8:%.*]], %[[FOR_INC21:.*]] ], [ 0, %[[FOR_END11]] ]
 ; TUNIT-NEXT:    [[EXITCOND11:%.*]] = icmp ne i64 [[INDVARS_IV7]], 20
-; TUNIT-NEXT:    br i1 [[EXITCOND11]], label [[FOR_BODY16:%.*]], label [[FOR_COND_CLEANUP15:%.*]]
-; TUNIT:       for.cond.cleanup15:
-; TUNIT-NEXT:    br label [[FOR_END23:%.*]]
-; TUNIT:       for.body16:
+; TUNIT-NEXT:    br i1 [[EXITCOND11]], label %[[FOR_BODY16:.*]], label %[[FOR_COND_CLEANUP15:.*]]
+; TUNIT:       [[FOR_COND_CLEANUP15]]:
+; TUNIT-NEXT:    br label %[[FOR_END23:.*]]
+; TUNIT:       [[FOR_BODY16]]:
 ; TUNIT-NEXT:    [[I19:%.*]] = mul nuw nsw i64 [[INDVARS_IV7]], 10
 ; TUNIT-NEXT:    [[I20:%.*]] = add nuw nsw i64 [[I19]], 2
 ; TUNIT-NEXT:    [[ARRAYIDX20:%.*]] = getelementptr inbounds i64, ptr [[BYTES]], i64 [[I20]]
-; TUNIT-NEXT:    store i64 0, ptr [[ARRAYIDX20]], align 8, !tbaa [[TBAA28:![0-9]+]]
-; TUNIT-NEXT:    br label [[FOR_INC21]]
-; TUNIT:       for.inc21:
+; TUNIT-NEXT:    store i64 0, ptr [[ARRAYIDX20]], align 8, !tbaa [[LONG_LONG_TBAA28:![0-9]+]]
+; TUNIT-NEXT:    br label %[[FOR_INC21]]
+; TUNIT:       [[FOR_INC21]]:
 ; TUNIT-NEXT:    [[INDVARS_IV_NEXT8]] = add nuw nsw i64 [[INDVARS_IV7]], 1
-; TUNIT-NEXT:    br label [[FOR_COND13]], !llvm.loop [[LOOP30:![0-9]+]]
-; TUNIT:       for.end23:
+; TUNIT-NEXT:    br label %[[FOR_COND13]], !llvm.loop [[LOOP30:![0-9]+]]
+; TUNIT:       [[FOR_END23]]:
 ; TUNIT-NEXT:    [[ARRAYIDX24:%.*]] = getelementptr inbounds i8, ptr [[BYTES]], i64 1023
-; TUNIT-NEXT:    store i8 0, ptr [[ARRAYIDX24]], align 1, !tbaa [[TBAA19]]
+; TUNIT-NEXT:    store i8 0, ptr [[ARRAYIDX24]], align 1, !tbaa [[CHAR_TBAA19]]
 ; TUNIT-NEXT:    [[ARRAYIDX25:%.*]] = getelementptr inbounds i8, ptr [[BYTES]], i64 500
 ; TUNIT-NEXT:    call void @write_arg(ptr nofree noundef nonnull writeonly align 4 captures(none) [[ARRAYIDX25]], i32 noundef 0) #[[ATTR18]]
-; TUNIT-NEXT:    br label [[FOR_COND27:%.*]]
-; TUNIT:       for.cond27:
-; TUNIT-NEXT:    [[INDVARS_IV12:%.*]] = phi i64 [ [[INDVARS_IV_NEXT13:%.*]], [[FOR_INC35:%.*]] ], [ 0, [[FOR_END23]] ]
+; TUNIT-NEXT:    br label %[[FOR_COND27:.*]]
+; TUNIT:       [[FOR_COND27]]:
+; TUNIT-NEXT:    [[INDVARS_IV12:%.*]] = phi i64 [ [[INDVARS_IV_NEXT13:%.*]], %[[FOR_INC35:.*]] ], [ 0, %[[FOR_END23]] ]
 ; TUNIT-NEXT:    [[EXITCOND14:%.*]] = icmp ne i64 [[INDVARS_IV12]], 1024
-; TUNIT-NEXT:    br i1 [[EXITCOND14]], label [[FOR_BODY30:%.*]], label [[FOR_COND_CLEANUP29:%.*]]
-; TUNIT:       for.cond.cleanup29:
-; TUNIT-NEXT:    br label [[FOR_END37:%.*]]
-; TUNIT:       for.body30:
+; TUNIT-NEXT:    br i1 [[EXITCOND14]], label %[[FOR_BODY30:.*]], label %[[FOR_COND_CLEANUP29:.*]]
+; TUNIT:       [[FOR_COND_CLEANUP29]]:
+; TUNIT-NEXT:    br label %[[FOR_END37:.*]]
+; TUNIT:       [[FOR_BODY30]]:
 ; TUNIT-NEXT:    [[ARRAYIDX32:%.*]] = getelementptr inbounds i8, ptr [[BYTES]], i64 [[INDVARS_IV12]]
-; TUNIT-NEXT:    [[I22:%.*]] = load i8, ptr [[ARRAYIDX32]], align 1, !tbaa [[TBAA19]]
+; TUNIT-NEXT:    [[I22:%.*]] = load i8, ptr [[ARRAYIDX32]], align 1, !tbaa [[CHAR_TBAA19]]
 ; TUNIT-NEXT:    [[ARRAYIDX34:%.*]] = getelementptr inbounds [1024 x i8], ptr @globalBytes, i64 0, i64 [[INDVARS_IV12]]
-; TUNIT-NEXT:    store i8 [[I22]], ptr [[ARRAYIDX34]], align 1, !tbaa [[TBAA19]]
-; TUNIT-NEXT:    br label [[FOR_INC35]]
-; TUNIT:       for.inc35:
+; TUNIT-NEXT:    store i8 [[I22]], ptr [[ARRAYIDX34]], align 1, !tbaa [[CHAR_TBAA19]]
+; TUNIT-NEXT:    br label %[[FOR_INC35]]
+; TUNIT:       [[FOR_INC35]]:
 ; TUNIT-NEXT:    [[INDVARS_IV_NEXT13]] = add nuw nsw i64 [[INDVARS_IV12]], 1
-; TUNIT-NEXT:    br label [[FOR_COND27]], !llvm.loop [[LOOP31:![0-9]+]]
-; TUNIT:       for.end37:
+; TUNIT-NEXT:    br label %[[FOR_COND27]], !llvm.loop [[LOOP31:![0-9]+]]
+; TUNIT:       [[FOR_END37]]:
 ; TUNIT-NEXT:    ret void
 ;
 ; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn
-; CGSCC-LABEL: define {{[^@]+}}@noalias_arg_simplifiable_2
-; CGSCC-SAME: (ptr nofree captures(none) [[BYTES:%.*]]) #[[ATTR3]] {
-; CGSCC-NEXT:  entry:
-; CGSCC-NEXT:    br label [[FOR_COND:%.*]]
-; CGSCC:       for.cond:
-; CGSCC-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 0, [[ENTRY:%.*]] ]
+; CGSCC-LABEL: define void @noalias_arg_simplifiable_2(
+; CGSCC-SAME: ptr nofree captures(none) [[BYTES:%.*]]) #[[ATTR3]] {
+; CGSCC-NEXT:  [[ENTRY:.*]]:
+; CGSCC-NEXT:    br label %[[FOR_COND:.*]]
+; CGSCC:       [[FOR_COND]]:
+; CGSCC-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_INC:.*]] ], [ 0, %[[ENTRY]] ]
 ; CGSCC-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV]], 100
-; CGSCC-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY:%.*]], label [[FOR_COND_CLEANUP:%.*]]
-; CGSCC:       for.cond.cleanup:
-; CGSCC-NEXT:    br label [[FOR_END:%.*]]
-; CGSCC:       for.body:
+; CGSCC-NEXT:    br i1 [[EXITCOND]], label %[[FOR_BODY:.*]], label %[[FOR_COND_CLEANUP:.*]]
+; CGSCC:       [[FOR_COND_CLEANUP]]:
+; CGSCC-NEXT:    br label %[[FOR_END:.*]]
+; CGSCC:       [[FOR_BODY]]:
 ; CGSCC-NEXT:    [[I:%.*]] = mul nuw nsw i64 [[INDVARS_IV]], 10
 ; CGSCC-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[BYTES]], i64 [[I]]
-; CGSCC-NEXT:    store i8 0, ptr [[ARRAYIDX]], align 1, !tbaa [[TBAA15]]
-; CGSCC-NEXT:    br label [[FOR_INC]]
-; CGSCC:       for.inc:
+; CGSCC-NEXT:    store i8 0, ptr [[ARRAYIDX]], align 1, !tbaa [[CHAR_TBAA15]]
+; CGSCC-NEXT:    br label %[[FOR_INC]]
+; CGSCC:       [[FOR_INC]]:
 ; CGSCC-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; CGSCC-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]]
-; CGSCC:       for.end:
-; CGSCC-NEXT:    br label [[FOR_COND2:%.*]]
-; CGSCC:       for.cond2:
-; CGSCC-NEXT:    [[INDVARS_IV2:%.*]] = phi i64 [ [[INDVARS_IV_NEXT3:%.*]], [[FOR_INC9:%.*]] ], [ 0, [[FOR_END]] ]
+; CGSCC-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]]
+; CGSCC:       [[FOR_END]]:
+; CGSCC-NEXT:    br label %[[FOR_COND2:.*]]
+; CGSCC:       [[FOR_COND2]]:
+; CGSCC-NEXT:    [[INDVARS_IV2:%.*]] = phi i64 [ [[INDVARS_IV_NEXT3:%.*]], %[[FOR_INC9:.*]] ], [ 0, %[[FOR_END]] ]
 ; CGSCC-NEXT:    [[EXITCOND6:%.*]] = icmp ne i64 [[INDVARS_IV2]], 10
-; CGSCC-NEXT:    br i1 [[EXITCOND6]], label [[FOR_BODY5:%.*]], label [[FOR_COND_CLEANUP4:%.*]]
-; CGSCC:       for.cond.cleanup4:
-; CGSCC-NEXT:    br label [[FOR_END11:%.*]]
-; CGSCC:       for.body5:
+; CGSCC-NEXT:    br i1 [[EXITCOND6]], label %[[FOR_BODY5:.*]], label %[[FOR_COND_CLEANUP4:.*]]
+; CGSCC:       [[FOR_COND_CLEANUP4]]:
+; CGSCC-NEXT:    br label %[[FOR_END11:.*]]
+; CGSCC:       [[FOR_BODY5]]:
 ; CGSCC-NEXT:    [[I16:%.*]] = mul nuw nsw i64 [[INDVARS_IV2]], 10
 ; CGSCC-NEXT:    [[I17:%.*]] = or i64 [[I16]], 1
 ; CGSCC-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr [[BYTES]], i64 [[I17]]
-; CGSCC-NEXT:    store float 0.000000e+00, ptr [[ARRAYIDX8]], align 4, !tbaa [[TBAA18]]
-; CGSCC-NEXT:    br label [[FOR_INC9]]
-; CGSCC:       for.inc9:
+; CGSCC-NEXT:    store float 0.000000e+00, ptr [[ARRAYIDX8]], align 4, !tbaa [[FLOAT_TBAA18]]
+; CGSCC-NEXT:    br label %[[FOR_INC9]]
+; CGSCC:       [[FOR_INC9]]:
 ; CGSCC-NEXT:    [[INDVARS_IV_NEXT3]] = add nuw nsw i64 [[INDVARS_IV2]], 1
-; CGSCC-NEXT:    br label [[FOR_COND2]], !llvm.loop [[LOOP29:![0-9]+]]
-; CGSCC:       for.end11:
-; CGSCC-NEXT:    br label [[FOR_COND13:%.*]]
-; CGSCC:       for.cond13:
-; CGSCC-NEXT:    [[INDVARS_IV7:%.*]] = phi i64 [ [[INDVARS_IV_NEXT8:%.*]], [[FOR_INC21:%.*]] ], [ 0, [[FOR_END11]] ]
+; CGSCC-NEXT:    br label %[[FOR_COND2]], !llvm.loop [[LOOP29:![0-9]+]]
+; CGSCC:       [[FOR_END11]]:
+; CGSCC-NEXT:    br label %[[FOR_COND13:.*]]
+; CGSCC:       [[FOR_COND13]]:
+; CGSCC-NEXT:    [[INDVARS_IV7:%.*]] = phi i64 [ [[INDVARS_IV_NEXT8:%.*]], %[[FOR_INC21:.*]] ], [ 0, %[[FOR_END11]] ]
 ; CGSCC-NEXT:    [[EXITCOND11:%.*]] = icmp ne i64 [[INDVARS_IV7]], 20
-; CGSCC-NEXT:    br i1 [[EXITCOND11]], label [[FOR_BODY16:%.*]], label [[FOR_COND_CLEANUP15:%.*]]
-; CGSCC:       for.cond.cleanup15:
-; CGSCC-NEXT:    br label [[FOR_END23:%.*]]
-; CGSCC:       for.body16:
+; CGSCC-NEXT:    br i1 [[EXITCOND11]], label %[[FOR_BODY16:.*]], label %[[FOR_COND_CLEANUP15:.*]]
+; CGSCC:       [[FOR_COND_CLEANUP15]]:
+; CGSCC-NEXT:    br label %[[FOR_END23:.*]]
+; CGSCC:       [[FOR_BODY16]]:
 ; CGSCC-NEXT:    [[I19:%.*]] = mul nuw nsw i64 [[INDVARS_IV7]], 10
 ; CGSCC-NEXT:    [[I20:%.*]] = add nuw nsw i64 [[I19]], 2
 ; CGSCC-NEXT:    [[ARRAYIDX20:%.*]] = getelementptr inbounds i64, ptr [[BYTES]], i64 [[I20]]
-; CGSCC-NEXT:    store i64 0, ptr [[ARRAYIDX20]], align 8, !tbaa [[TBAA20]]
-; CGSCC-NEXT:    br label [[FOR_INC21]]
-; CGSCC:       for.inc21:
+; CGSCC-NEXT:    store i64 0, ptr [[ARRAYIDX20]], align 8, !tbaa [[LONG_LONG_TBAA20]]
+; CGSCC-NEXT:    br label %[[FOR_INC21]]
+; CGSCC:       [[FOR_INC21]]:
 ; CGSCC-NEXT:    [[INDVARS_IV_NEXT8]] = add nuw nsw i64 [[INDVARS_IV7]], 1
-; CGSCC-NEXT:    br label [[FOR_COND13]], !llvm.loop [[LOOP30:![0-9]+]]
-; CGSCC:       for.end23:
+; CGSCC-NEXT:    br label %[[FOR_COND13]], !llvm.loop [[LOOP30:![0-9]+]]
+; CGSCC:       [[FOR_END23]]:
 ; CGSCC-NEXT:    [[ARRAYIDX24:%.*]] = getelementptr inbounds i8, ptr [[BYTES]], i64 1023
-; CGSCC-NEXT:    store i8 0, ptr [[ARRAYIDX24]], align 1, !tbaa [[TBAA15]]
+; CGSCC-NEXT:    store i8 0, ptr [[ARRAYIDX24]], align 1, !tbaa [[CHAR_TBAA15]]
 ; CGSCC-NEXT:    [[ARRAYIDX25:%.*]] = getelementptr inbounds i8, ptr [[BYTES]], i64 500
 ; CGSCC-NEXT:    call void @write_arg(ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(4) [[ARRAYIDX25]], i32 noundef 0) #[[ATTR21]]
-; CGSCC-NEXT:    br label [[FOR_COND27:%.*]]
-; CGSCC:       for.cond27:
-; CGSCC-NEXT:    [[INDVARS_IV12:%.*]] = phi i64 [ [[INDVARS_IV_NEXT13:%.*]], [[FOR_INC35:%.*]] ], [ 0, [[FOR_END23]] ]
+; CGSCC-NEXT:    br label %[[FOR_COND27:.*]]
+; CGSCC:       [[FOR_COND27]]:
+; CGSCC-NEXT:    [[INDVARS_IV12:%.*]] = phi i64 [ [[INDVARS_IV_NEXT13:%.*]], %[[FOR_INC35:.*]] ], [ 0, %[[FOR_END23]] ]
 ; CGSCC-NEXT:    [[EXITCOND14:%.*]] = icmp ne i64 [[INDVARS_IV12]], 1024
-; CGSCC-NEXT:    br i1 [[EXITCOND14]], label [[FOR_BODY30:%.*]], label [[FOR_COND_CLEANUP29:%.*]]
-; CGSCC:       for.cond.cleanup29:
-; CGSCC-NEXT:    br label [[FOR_END37:%.*]]
-; CGSCC:       for.body30:
+; CGSCC-NEXT:    br i1 [[EXITCOND14]], label %[[FOR_BODY30:.*]], label %[[FOR_COND_CLEANUP29:.*]]
+; CGSCC:       [[FOR_COND_CLEANUP29]]:
+; CGSCC-NEXT:    br label %[[FOR_END37:.*]]
+; CGSCC:       [[FOR_BODY30]]:
 ; CGSCC-NEXT:    [[ARRAYIDX32:%.*]] = getelementptr inbounds i8, ptr [[BYTES]], i64 [[INDVARS_IV12]]
-; CGSCC-NEXT:    [[I22:%.*]] = load i8, ptr [[ARRAYIDX32]], align 1, !tbaa [[TBAA15]]
+; CGSCC-NEXT:    [[I22:%.*]] = load i8, ptr [[ARRAYIDX32]], align 1, !tbaa [[CHAR_TBAA15]]
 ; CGSCC-NEXT:    [[ARRAYIDX34:%.*]] = getelementptr inbounds [1024 x i8], ptr @globalBytes, i64 0, i64 [[INDVARS_IV12]]
-; CGSCC-NEXT:    store i8 [[I22]], ptr [[ARRAYIDX34]], align 1, !tbaa [[TBAA15]]
-; CGSCC-NEXT:    br label [[FOR_INC35]]
-; CGSCC:       for.inc35:
+; CGSCC-NEXT:    store i8 [[I22]], ptr [[ARRAYIDX34]], align 1, !tbaa [[CHAR_TBAA15]]
+; CGSCC-NEXT:    br label %[[FOR_INC35]]
+; CGSCC:       [[FOR_INC35]]:
 ; CGSCC-NEXT:    [[INDVARS_IV_NEXT13]] = add nuw nsw i64 [[INDVARS_IV12]], 1
-; CGSCC-NEXT:    br label [[FOR_COND27]], !llvm.loop [[LOOP31:![0-9]+]]
-; CGSCC:       for.end37:
+; CGSCC-NEXT:    br label %[[FOR_COND27]], !llvm.loop [[LOOP31:![0-9]+]]
+; CGSCC:       [[FOR_END37]]:
 ; CGSCC-NEXT:    ret void
 ;
 entry:
@@ -1524,40 +1524,40 @@ for.end37:                                        ; preds = %for.cond.cleanup29
 ;    }
 ;
 define i32 @local_alloca_not_simplifiable_1() {
-; TUNIT-LABEL: define {{[^@]+}}@local_alloca_not_simplifiable_1() {
-; TUNIT-NEXT:  entry:
+; TUNIT-LABEL: define i32 @local_alloca_not_simplifiable_1() {
+; TUNIT-NEXT:  [[ENTRY:.*:]]
 ; TUNIT-NEXT:    [[X:%.*]] = alloca i32, align 4
 ; TUNIT-NEXT:    [[Y:%.*]] = alloca i32, align 4
 ; TUNIT-NEXT:    call void @llvm.lifetime.start.p0(ptr nofree noundef nonnull align 4 captures(none) dereferenceable(4) [[X]]) #[[ATTR17]]
 ; TUNIT-NEXT:    call void @llvm.lifetime.start.p0(ptr noalias nofree noundef nonnull align 4 captures(none) dereferenceable(4) [[Y]]) #[[ATTR17]]
-; TUNIT-NEXT:    store i32 1, ptr [[Y]], align 4, !tbaa [[TBAA3]]
-; TUNIT-NEXT:    store i32 1, ptr [[X]], align 4, !tbaa [[TBAA3]]
+; TUNIT-NEXT:    store i32 1, ptr [[Y]], align 4, !tbaa [[INT_TBAA3]]
+; TUNIT-NEXT:    store i32 1, ptr [[X]], align 4, !tbaa [[INT_TBAA3]]
 ; TUNIT-NEXT:    call void @escape(ptr noundef nonnull align 4 dereferenceable(4) [[X]])
 ; TUNIT-NEXT:    call void @write_random(ptr noalias nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(4) [[Y]])
-; TUNIT-NEXT:    [[I3:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA3]]
+; TUNIT-NEXT:    [[I3:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[INT_TBAA3]]
 ; TUNIT-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[I3]], 0
 ; TUNIT-NEXT:    [[COND:%.*]] = select i1 [[TOBOOL_NOT]], i32 2, i32 1
-; TUNIT-NEXT:    [[I4:%.*]] = load i32, ptr [[Y]], align 4, !tbaa [[TBAA3]]
+; TUNIT-NEXT:    [[I4:%.*]] = load i32, ptr [[Y]], align 4, !tbaa [[INT_TBAA3]]
 ; TUNIT-NEXT:    [[ADD:%.*]] = add nsw i32 [[I3]], [[I4]]
 ; TUNIT-NEXT:    [[ADD1:%.*]] = add nsw i32 [[ADD]], [[COND]]
 ; TUNIT-NEXT:    call void @llvm.lifetime.end.p0(ptr noalias nofree noundef nonnull align 4 captures(none) dereferenceable(4) [[Y]])
 ; TUNIT-NEXT:    call void @llvm.lifetime.end.p0(ptr nofree noundef nonnull align 4 captures(none) dereferenceable(4) [[X]])
 ; TUNIT-NEXT:    ret i32 [[ADD1]]
 ;
-; CGSCC-LABEL: define {{[^@]+}}@local_alloca_not_simplifiable_1() {
-; CGSCC-NEXT:  entry:
+; CGSCC-LABEL: define i32 @local_alloca_not_simplifiable_1() {
+; CGSCC-NEXT:  [[ENTRY:.*:]]
 ; CGSCC-NEXT:    [[X:%.*]] = alloca i32, align 4
 ; CGSCC-NEXT:    [[Y:%.*]] = alloca i32, align 4
 ; CGSCC-NEXT:    call void @llvm.lifetime.start.p0(ptr nofree noundef nonnull align 4 captures(none) dereferenceable(4) [[X]]) #[[ATTR20]]
 ; CGSCC-NEXT:    call void @llvm.lifetime.start.p0(ptr noalias nofree noundef nonnull align 4 captures(none) dereferenceable(4) [[Y]]) #[[ATTR20]]
-; CGSCC-NEXT:    store i32 1, ptr [[Y]], align 4, !tbaa [[TBAA3]]
-; CGSCC-NEXT:    store i32 1, ptr [[X]], align 4, !tbaa [[TBAA3]]
+; CGSCC-NEXT:    store i32 1, ptr [[Y]], align 4, !tbaa [[INT_TBAA3]]
+; CGSCC-NEXT:    store i32 1, ptr [[X]], align 4, !tbaa [[INT_TBAA3]]
 ; CGSCC-NEXT:    call void @escape(ptr noundef nonnull align 4 dereferenceable(4) [[X]])
 ; CGSCC-NEXT:    call void @write_random(ptr noalias nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(4) [[Y]])
-; CGSCC-NEXT:    [[I3:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA3]]
+; CGSCC-NEXT:    [[I3:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[INT_TBAA3]]
 ; CGSCC-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[I3]], 0
 ; CGSCC-NEXT:    [[COND:%.*]] = select i1 [[TOBOOL_NOT]], i32 2, i32 1
-; CGSCC-NEXT:    [[I4:%.*]] = load i32, ptr [[Y]], align 4, !tbaa [[TBAA3]]
+; CGSCC-NEXT:    [[I4:%.*]] = load i32, ptr [[Y]], align 4, !tbaa [[INT_TBAA3]]
 ; CGSCC-NEXT:    [[ADD:%.*]] = add nsw i32 [[I3]], [[I4]]
 ; CGSCC-NEXT:    [[ADD1:%.*]] = add nsw i32 [[ADD]], [[COND]]
 ; CGSCC-NEXT:    call void @llvm.lifetime.end.p0(ptr noalias nofree noundef nonnull align 4 captures(none) dereferenceable(4) [[Y]])
@@ -1586,20 +1586,20 @@ entry:
 
 define i8 @local_alloca_not_simplifiable_2(i64 %index1, i64 %index2, i1 %cnd) {
 ; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
-; CHECK-LABEL: define {{[^@]+}}@local_alloca_not_simplifiable_2
-; CHECK-SAME: (i64 [[INDEX1:%.*]], i64 [[INDEX2:%.*]], i1 noundef [[CND:%.*]]) #[[ATTR4]] {
-; CHECK-NEXT:  entry:
+; CHECK-LABEL: define i8 @local_alloca_not_simplifiable_2(
+; CHECK-SAME: i64 [[INDEX1:%.*]], i64 [[INDEX2:%.*]], i1 noundef [[CND:%.*]]) #[[ATTR4]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[BYTES:%.*]] = alloca [1024 x i8], align 16
 ; CHECK-NEXT:    store i8 7, ptr [[BYTES]], align 16
-; CHECK-NEXT:    br i1 [[CND]], label [[LEFT:%.*]], label [[RIGHT:%.*]]
-; CHECK:       left:
+; CHECK-NEXT:    br i1 [[CND]], label %[[LEFT:.*]], label %[[RIGHT:.*]]
+; CHECK:       [[LEFT]]:
 ; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 [[INDEX1]]
-; CHECK-NEXT:    br label [[JOIN:%.*]]
-; CHECK:       right:
+; CHECK-NEXT:    br label %[[JOIN:.*]]
+; CHECK:       [[RIGHT]]:
 ; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 [[INDEX2]]
-; CHECK-NEXT:    br label [[JOIN]]
-; CHECK:       join:
-; CHECK-NEXT:    [[GEP_JOIN:%.*]] = phi ptr [ [[GEP1]], [[LEFT]] ], [ [[GEP2]], [[RIGHT]] ]
+; CHECK-NEXT:    br label %[[JOIN]]
+; CHECK:       [[JOIN]]:
+; CHECK-NEXT:    [[GEP_JOIN:%.*]] = phi ptr [ [[GEP1]], %[[LEFT]] ], [ [[GEP2]], %[[RIGHT]] ]
 ; CHECK-NEXT:    store i8 9, ptr [[GEP_JOIN]], align 4
 ; CHECK-NEXT:    [[I:%.*]] = load i8, ptr [[BYTES]], align 16
 ; CHECK-NEXT:    ret i8 [[I]]
@@ -1630,9 +1630,9 @@ join:                                             ; preds = %right, %left
 ; We could simplify these if we separate accessed bins wrt. alignment (here mod 4).
 define i32 @unknown_access_mixed_simplifiable(i32 %arg1, i32 %arg2) {
 ; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
-; CHECK-LABEL: define {{[^@]+}}@unknown_access_mixed_simplifiable
-; CHECK-SAME: (i32 [[ARG1:%.*]], i32 [[ARG2:%.*]]) #[[ATTR4]] {
-; CHECK-NEXT:  entry:
+; CHECK-LABEL: define i32 @unknown_access_mixed_simplifiable(
+; CHECK-SAME: i32 [[ARG1:%.*]], i32 [[ARG2:%.*]]) #[[ATTR4]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[S:%.*]] = alloca [[STRUCT_S:%.*]], align 4
 ; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 2
 ; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds i32, ptr [[S]], i32 [[ARG1]]
@@ -1666,9 +1666,9 @@ entry:
 ; The access to bc4b could go anywhere, nothing is simplifiable.
 define i32 @unknown_access_mixed_not_simplifiable(i32 %arg1, i32 %arg2, i32 %arg3) {
 ; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
-; CHECK-LABEL: define {{[^@]+}}@unknown_access_mixed_not_simplifiable
-; CHECK-SAME: (i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 [[ARG3:%.*]]) #[[ATTR4]] {
-; CHECK-NEXT:  entry:
+; CHECK-LABEL: define i32 @unknown_access_mixed_not_simplifiable(
+; CHECK-SAME: i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 [[ARG3:%.*]]) #[[ATTR4]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[S:%.*]] = alloca [[STRUCT_S:%.*]], align 4
 ; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 2
 ; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds i32, ptr [[S]], i32 [[ARG1]]
@@ -1716,17 +1716,17 @@ declare void @escape(ptr)
 ;
 define i32 @global_not_simplifiable_1(i32 %cnd) {
 ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(read)
-; TUNIT-LABEL: define {{[^@]+}}@global_not_simplifiable_1
-; TUNIT-SAME: (i32 [[CND:%.*]]) #[[ATTR6:[0-9]+]] {
-; TUNIT-NEXT:  entry:
-; TUNIT-NEXT:    [[I:%.*]] = load i32, ptr @Flag0, align 4, !tbaa [[TBAA3]]
+; TUNIT-LABEL: define i32 @global_not_simplifiable_1(
+; TUNIT-SAME: i32 [[CND:%.*]]) #[[ATTR6:[0-9]+]] {
+; TUNIT-NEXT:  [[ENTRY:.*:]]
+; TUNIT-NEXT:    [[I:%.*]] = load i32, ptr @Flag0, align 4, !tbaa [[INT_TBAA3]]
 ; TUNIT-NEXT:    ret i32 [[I]]
 ;
 ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(read)
-; CGSCC-LABEL: define {{[^@]+}}@global_not_simplifiable_1
-; CGSCC-SAME: (i32 [[CND:%.*]]) #[[ATTR7:[0-9]+]] {
-; CGSCC-NEXT:  entry:
-; CGSCC-NEXT:    [[I:%.*]] = load i32, ptr @Flag0, align 4, !tbaa [[TBAA3]]
+; CGSCC-LABEL: define i32 @global_not_simplifiable_1(
+; CGSCC-SAME: i32 [[CND:%.*]]) #[[ATTR7:[0-9]+]] {
+; CGSCC-NEXT:  [[ENTRY:.*:]]
+; CGSCC-NEXT:    [[I:%.*]] = load i32, ptr @Flag0, align 4, !tbaa [[INT_TBAA3]]
 ; CGSCC-NEXT:    ret i32 [[I]]
 ;
 entry:
@@ -1744,15 +1744,15 @@ entry:
 ;    }
 ;
 define i32 @static_global_not_simplifiable_1(i32 %cnd) {
-; CHECK-LABEL: define {{[^@]+}}@static_global_not_simplifiable_1
-; CHECK-SAME: (i32 [[CND:%.*]]) {
-; CHECK-NEXT:  entry:
+; CHECK-LABEL: define i32 @static_global_not_simplifiable_1(
+; CHECK-SAME: i32 [[CND:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    call void @sync()
 ; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[CND]], 0
-; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
-; CHECK:       if.then:
-; CHECK-NEXT:    br label [[IF_END]]
-; CHECK:       if.end:
+; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label %[[IF_END:.*]], label %[[IF_THEN:.*]]
+; CHECK:       [[IF_THEN]]:
+; CHECK-NEXT:    br label %[[IF_END]]
+; CHECK:       [[IF_END]]:
 ; CHECK-NEXT:    ret i32 1
 ;
 entry:
@@ -1780,13 +1780,13 @@ declare void @sync()
 ;      return v;
 ;    }
 define i32 @static_global_simplifiable_4(i32 %cnd) {
-; CHECK-LABEL: define {{[^@]+}}@static_global_simplifiable_4
-; CHECK-SAME: (i32 [[CND:%.*]]) {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    store i32 1, ptr @Flag2, align 4, !tbaa [[TBAA3]]
+; CHECK-LABEL: define noundef i32 @static_global_simplifiable_4(
+; CHECK-SAME: i32 [[CND:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    store i32 1, ptr @Flag2, align 4, !tbaa [[INT_TBAA3]]
 ; CHECK-NEXT:    call void @sync()
-; CHECK-NEXT:    [[I:%.*]] = load i32, ptr @Flag2, align 4, !tbaa [[TBAA3]]
-; CHECK-NEXT:    store i32 2, ptr @Flag2, align 4, !tbaa [[TBAA3]]
+; CHECK-NEXT:    [[I:%.*]] = load i32, ptr @Flag2, align 4, !tbaa [[INT_TBAA3]]
+; CHECK-NEXT:    store i32 2, ptr @Flag2, align 4, !tbaa [[INT_TBAA3]]
 ; CHECK-NEXT:    ret i32 [[I]]
 ;
 entry:
@@ -1806,22 +1806,22 @@ entry:
 ;      return v;
 ;    }
 define i32 @static_global_not_simplifiable_2(i32 %cnd) {
-; TUNIT-LABEL: define {{[^@]+}}@static_global_not_simplifiable_2
-; TUNIT-SAME: (i32 [[CND:%.*]]) {
-; TUNIT-NEXT:  entry:
-; TUNIT-NEXT:    store i32 1, ptr @Flag4, align 4, !tbaa [[TBAA3]]
+; TUNIT-LABEL: define noundef i32 @static_global_not_simplifiable_2(
+; TUNIT-SAME: i32 [[CND:%.*]]) {
+; TUNIT-NEXT:  [[ENTRY:.*:]]
+; TUNIT-NEXT:    store i32 1, ptr @Flag4, align 4, !tbaa [[INT_TBAA3]]
 ; TUNIT-NEXT:    call void @sync() #[[ATTR19:[0-9]+]]
-; TUNIT-NEXT:    [[I:%.*]] = load i32, ptr @Flag4, align 4, !tbaa [[TBAA3]]
-; TUNIT-NEXT:    store i32 2, ptr @Flag4, align 4, !tbaa [[TBAA3]]
+; TUNIT-NEXT:    [[I:%.*]] = load i32, ptr @Flag4, align 4, !tbaa [[INT_TBAA3]]
+; TUNIT-NEXT:    store i32 2, ptr @Flag4, align 4, !tbaa [[INT_TBAA3]]
 ; TUNIT-NEXT:    ret i32 [[I]]
 ;
-; CGSCC-LABEL: define {{[^@]+}}@static_global_not_simplifiable_2
-; CGSCC-SAME: (i32 [[CND:%.*]]) {
-; CGSCC-NEXT:  entry:
-; CGSCC-NEXT:    store i32 1, ptr @Flag4, align 4, !tbaa [[TBAA3]]
+; CGSCC-LABEL: define noundef i32 @static_global_not_simplifiable_2(
+; CGSCC-SAME: i32 [[CND:%.*]]) {
+; CGSCC-NEXT:  [[ENTRY:.*:]]
+; CGSCC-NEXT:    store i32 1, ptr @Flag4, align 4, !tbaa [[INT_TBAA3]]
 ; CGSCC-NEXT:    call void @sync() #[[ATTR22:[0-9]+]]
-; CGSCC-NEXT:    [[I:%.*]] = load i32, ptr @Flag4, align 4, !tbaa [[TBAA3]]
-; CGSCC-NEXT:    store i32 2, ptr @Flag4, align 4, !tbaa [[TBAA3]]
+; CGSCC-NEXT:    [[I:%.*]] = load i32, ptr @Flag4, align 4, !tbaa [[INT_TBAA3]]
+; CGSCC-NEXT:    store i32 2, ptr @Flag4, align 4, !tbaa [[INT_TBAA3]]
 ; CGSCC-NEXT:    ret i32 [[I]]
 ;
 entry:
@@ -1833,15 +1833,15 @@ entry:
 }
 define void @static_global_not_simplifiable_2_helper() {
 ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write)
-; TUNIT-LABEL: define {{[^@]+}}@static_global_not_simplifiable_2_helper
-; TUNIT-SAME: () #[[ATTR5]] {
-; TUNIT-NEXT:    store i32 2, ptr @Flag4, align 4, !tbaa [[TBAA3]]
+; TUNIT-LABEL: define void @static_global_not_simplifiable_2_helper(
+; TUNIT-SAME: ) #[[ATTR5]] {
+; TUNIT-NEXT:    store i32 2, ptr @Flag4, align 4, !tbaa [[INT_TBAA3]]
 ; TUNIT-NEXT:    ret void
 ;
 ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write)
-; CGSCC-LABEL: define {{[^@]+}}@static_global_not_simplifiable_2_helper
-; CGSCC-SAME: () #[[ATTR6]] {
-; CGSCC-NEXT:    store i32 2, ptr @Flag4, align 4, !tbaa [[TBAA3]]
+; CGSCC-LABEL: define void @static_global_not_simplifiable_2_helper(
+; CGSCC-SAME: ) #[[ATTR6]] {
+; CGSCC-NEXT:    store i32 2, ptr @Flag4, align 4, !tbaa [[INT_TBAA3]]
 ; CGSCC-NEXT:    ret void
 ;
   store i32 2, ptr @Flag4, align 4, !tbaa !3
@@ -1851,19 +1851,19 @@ define void @static_global_not_simplifiable_2_helper() {
 ; Similiar to static_global_simplifiable_3 but with a may-store.
 define i32 @static_global_not_simplifiable_3(i1 %c, ptr %p) {
 ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn
-; TUNIT-LABEL: define {{[^@]+}}@static_global_not_simplifiable_3
-; TUNIT-SAME: (i1 [[C:%.*]], ptr nofree writeonly captures(none) [[P:%.*]]) #[[ATTR3]] {
+; TUNIT-LABEL: define noundef i32 @static_global_not_simplifiable_3(
+; TUNIT-SAME: i1 [[C:%.*]], ptr nofree writeonly captures(none) [[P:%.*]]) #[[ATTR3]] {
 ; TUNIT-NEXT:    [[SEL:%.*]] = select i1 [[C]], ptr @Flag3, ptr [[P]]
-; TUNIT-NEXT:    store i32 1, ptr [[SEL]], align 4, !tbaa [[TBAA3]]
-; TUNIT-NEXT:    [[I:%.*]] = load i32, ptr @Flag3, align 4, !tbaa [[TBAA3]]
+; TUNIT-NEXT:    store i32 1, ptr [[SEL]], align 4, !tbaa [[INT_TBAA3]]
+; TUNIT-NEXT:    [[I:%.*]] = load i32, ptr @Flag3, align 4, !tbaa [[INT_TBAA3]]
 ; TUNIT-NEXT:    ret i32 [[I]]
 ;
 ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn
-; CGSCC-LABEL: define {{[^@]+}}@static_global_not_simplifiable_3
-; CGSCC-SAME: (i1 [[C:%.*]], ptr nofree writeonly captures(none) [[P:%.*]]) #[[ATTR5]] {
+; CGSCC-LABEL: define noundef i32 @static_global_not_simplifiable_3(
+; CGSCC-SAME: i1 [[C:%.*]], ptr nofree writeonly captures(none) [[P:%.*]]) #[[ATTR5]] {
 ; CGSCC-NEXT:    [[SEL:%.*]] = select i1 [[C]], ptr @Flag3, ptr [[P]]
-; CGSCC-NEXT:    store i32 1, ptr [[SEL]], align 4, !tbaa [[TBAA3]]
-; CGSCC-NEXT:    [[I:%.*]] = load i32, ptr @Flag3, align 4, !tbaa [[TBAA3]]
+; CGSCC-NEXT:    store i32 1, ptr [[SEL]], align 4, !tbaa [[INT_TBAA3]]
+; CGSCC-NEXT:    [[I:%.*]] = load i32, ptr @Flag3, align 4, !tbaa [[INT_TBAA3]]
 ; CGSCC-NEXT:    ret i32 [[I]]
 ;
   %sel = select i1 %c, ptr @Flag3, ptr %p
@@ -1887,15 +1887,15 @@ define i32 @static_global_not_simplifiable_3(i1 %c, ptr %p) {
 ; FIXME: We could replace these loads.
 define i32 @write_read_global() {
 ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn
-; TUNIT-LABEL: define {{[^@]+}}@write_read_global
-; TUNIT-SAME: () #[[ATTR3]] {
+; TUNIT-LABEL: define i32 @write_read_global(
+; TUNIT-SAME: ) #[[ATTR3]] {
 ; TUNIT-NEXT:    store i32 7, ptr @Gint1, align 4
 ; TUNIT-NEXT:    [[L:%.*]] = load i32, ptr @Gint1, align 4
 ; TUNIT-NEXT:    ret i32 [[L]]
 ;
 ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn
-; CGSCC-LABEL: define {{[^@]+}}@write_read_global
-; CGSCC-SAME: () #[[ATTR5]] {
+; CGSCC-LABEL: define i32 @write_read_global(
+; CGSCC-SAME: ) #[[ATTR5]] {
 ; CGSCC-NEXT:    store i32 7, ptr @Gint1, align 4
 ; CGSCC-NEXT:    [[L:%.*]] = load i32, ptr @Gint1, align 4
 ; CGSCC-NEXT:    ret i32 [[L]]
@@ -1906,14 +1906,14 @@ define i32 @write_read_global() {
 }
 define void @write_global() {
 ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write)
-; TUNIT-LABEL: define {{[^@]+}}@write_global
-; TUNIT-SAME: () #[[ATTR5]] {
+; TUNIT-LABEL: define void @write_global(
+; TUNIT-SAME: ) #[[ATTR5]] {
 ; TUNIT-NEXT:    store i32 7, ptr @Gint2, align 4
 ; TUNIT-NEXT:    ret void
 ;
 ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write)
-; CGSCC-LABEL: define {{[^@]+}}@write_global
-; CGSCC-SAME: () #[[ATTR6]] {
+; CGSCC-LABEL: define void @write_global(
+; CGSCC-SAME: ) #[[ATTR6]] {
 ; CGSCC-NEXT:    store i32 7, ptr @Gint2, align 4
 ; CGSCC-NEXT:    ret void
 ;
@@ -1922,14 +1922,14 @@ define void @write_global() {
 }
 define i32 @read_global() {
 ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(read)
-; TUNIT-LABEL: define {{[^@]+}}@read_global
-; TUNIT-SAME: () #[[ATTR6]] {
+; TUNIT-LABEL: define i32 @read_global(
+; TUNIT-SAME: ) #[[ATTR6]] {
 ; TUNIT-NEXT:    [[L:%.*]] = load i32, ptr @Gint2, align 4
 ; TUNIT-NEXT:    ret i32 [[L]]
 ;
 ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(read)
-; CGSCC-LABEL: define {{[^@]+}}@read_global
-; CGSCC-SAME: () #[[ATTR7]] {
+; CGSCC-LABEL: define i32 @read_global(
+; CGSCC-SAME: ) #[[ATTR7]] {
 ; CGSCC-NEXT:    [[L:%.*]] = load i32, ptr @Gint2, align 4
 ; CGSCC-NEXT:    ret i32 [[L]]
 ;
@@ -1938,13 +1938,13 @@ define i32 @read_global() {
 }
 define i32 @write_read_static_global() {
 ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write)
-; TUNIT-LABEL: define {{[^@]+}}@write_read_static_global
-; TUNIT-SAME: () #[[ATTR5]] {
+; TUNIT-LABEL: define noundef i32 @write_read_static_global(
+; TUNIT-SAME: ) #[[ATTR5]] {
 ; TUNIT-NEXT:    ret i32 7
 ;
 ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write)
-; CGSCC-LABEL: define {{[^@]+}}@write_read_static_global
-; CGSCC-SAME: () #[[ATTR6]] {
+; CGSCC-LABEL: define noundef i32 @write_read_static_global(
+; CGSCC-SAME: ) #[[ATTR6]] {
 ; CGSCC-NEXT:    ret i32 7
 ;
   store i32 7, ptr @Gstatic_int1
@@ -1953,14 +1953,14 @@ define i32 @write_read_static_global() {
 }
 define void @write_static_global() {
 ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write)
-; TUNIT-LABEL: define {{[^@]+}}@write_static_global
-; TUNIT-SAME: () #[[ATTR5]] {
+; TUNIT-LABEL: define void @write_static_global(
+; TUNIT-SAME: ) #[[ATTR5]] {
 ; TUNIT-NEXT:    store i32 7, ptr @Gstatic_int2, align 4
 ; TUNIT-NEXT:    ret void
 ;
 ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write)
-; CGSCC-LABEL: define {{[^@]+}}@write_static_global
-; CGSCC-SAME: () #[[ATTR6]] {
+; CGSCC-LABEL: define void @write_static_global(
+; CGSCC-SAME: ) #[[ATTR6]] {
 ; CGSCC-NEXT:    store i32 7, ptr @Gstatic_int2, align 4
 ; CGSCC-NEXT:    ret void
 ;
@@ -1969,14 +1969,14 @@ define void @write_static_global() {
 }
 define i32 @read_static_global() {
 ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(read)
-; TUNIT-LABEL: define {{[^@]+}}@read_static_global
-; TUNIT-SAME: () #[[ATTR6]] {
+; TUNIT-LABEL: define noundef i32 @read_static_global(
+; TUNIT-SAME: ) #[[ATTR6]] {
 ; TUNIT-NEXT:    [[L:%.*]] = load i32, ptr @Gstatic_int2, align 4
 ; TUNIT-NEXT:    ret i32 [[L]]
 ;
 ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(read)
-; CGSCC-LABEL: define {{[^@]+}}@read_static_global
-; CGSCC-SAME: () #[[ATTR7]] {
+; CGSCC-LABEL: define noundef i32 @read_static_global(
+; CGSCC-SAME: ) #[[ATTR7]] {
 ; CGSCC-NEXT:    [[L:%.*]] = load i32, ptr @Gstatic_int2, align 4
 ; CGSCC-NEXT:    ret i32 [[L]]
 ;
@@ -1985,13 +1985,13 @@ define i32 @read_static_global() {
 }
 define i32 @write_read_static_undef_global() {
 ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write)
-; TUNIT-LABEL: define {{[^@]+}}@write_read_static_undef_global
-; TUNIT-SAME: () #[[ATTR5]] {
+; TUNIT-LABEL: define noundef i32 @write_read_static_undef_global(
+; TUNIT-SAME: ) #[[ATTR5]] {
 ; TUNIT-NEXT:    ret i32 7
 ;
 ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write)
-; CGSCC-LABEL: define {{[^@]+}}@write_read_static_undef_global
-; CGSCC-SAME: () #[[ATTR6]] {
+; CGSCC-LABEL: define noundef i32 @write_read_static_undef_global(
+; CGSCC-SAME: ) #[[ATTR6]] {
 ; CGSCC-NEXT:    ret i32 7
 ;
   store i32 7, ptr @Gstatic_undef_int1
@@ -2000,13 +2000,13 @@ define i32 @write_read_static_undef_global() {
 }
 define void @write_static_undef_global() {
 ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write)
-; TUNIT-LABEL: define {{[^@]+}}@write_static_undef_global
-; TUNIT-SAME: () #[[ATTR5]] {
+; TUNIT-LABEL: define void @write_static_undef_global(
+; TUNIT-SAME: ) #[[ATTR5]] {
 ; TUNIT-NEXT:    ret void
 ;
 ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write)
-; CGSCC-LABEL: define {{[^@]+}}@write_static_undef_global
-; CGSCC-SAME: () #[[ATTR6]] {
+; CGSCC-LABEL: define void @write_static_undef_global(
+; CGSCC-SAME: ) #[[ATTR6]] {
 ; CGSCC-NEXT:    store i32 7, ptr @Gstatic_undef_int2, align 4
 ; CGSCC-NEXT:    ret void
 ;
@@ -2015,8 +2015,8 @@ define void @write_static_undef_global() {
 }
 define i32 @read_static_undef_global() {
 ; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
-; CHECK-LABEL: define {{[^@]+}}@read_static_undef_global
-; CHECK-SAME: () #[[ATTR4]] {
+; CHECK-LABEL: define i32 @read_static_undef_global(
+; CHECK-SAME: ) #[[ATTR4]] {
 ; CHECK-NEXT:    ret i32 7
 ;
   %l = load i32, ptr @Gstatic_undef_int2
@@ -2025,8 +2025,8 @@ define i32 @read_static_undef_global() {
 
 define i32 @single_read_of_static_global() {
 ; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
-; CHECK-LABEL: define {{[^@]+}}@single_read_of_static_global
-; CHECK-SAME: () #[[ATTR4]] {
+; CHECK-LABEL: define noundef i32 @single_read_of_static_global(
+; CHECK-SAME: ) #[[ATTR4]] {
 ; CHECK-NEXT:    ret i32 0
 ;
   %l = load i32, ptr @Gstatic_int3
@@ -2035,20 +2035,20 @@ define i32 @single_read_of_static_global() {
 
 define i8 @phi_store() {
 ; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
-; CHECK-LABEL: define {{[^@]+}}@phi_store
-; CHECK-SAME: () #[[ATTR4]] {
-; CHECK-NEXT:  entry:
+; CHECK-LABEL: define i8 @phi_store(
+; CHECK-SAME: ) #[[ATTR4]] {
+; CHECK-NEXT:  [[ENTRY:.*]]:
 ; CHECK-NEXT:    [[A:%.*]] = alloca i16, align 2
-; CHECK-NEXT:    br label [[LOOP:%.*]]
-; CHECK:       loop:
-; CHECK-NEXT:    [[P:%.*]] = phi ptr [ [[A]], [[ENTRY:%.*]] ], [ [[G:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[I:%.*]] = phi i8 [ 0, [[ENTRY]] ], [ [[O:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[P:%.*]] = phi ptr [ [[A]], %[[ENTRY]] ], [ [[G:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[I:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[O:%.*]], %[[LOOP]] ]
 ; CHECK-NEXT:    store i8 1, ptr [[P]], align 1
 ; CHECK-NEXT:    [[G]] = getelementptr i8, ptr [[P]], i64 1
 ; CHECK-NEXT:    [[O]] = add nsw i8 [[I]], 1
 ; CHECK-NEXT:    [[C:%.*]] = icmp eq i8 [[O]], 2
-; CHECK-NEXT:    br i1 [[C]], label [[END:%.*]], label [[LOOP]]
-; CHECK:       end:
+; CHECK-NEXT:    br i1 [[C]], label %[[END:.*]], label %[[LOOP]]
+; CHECK:       [[END]]:
 ; CHECK-NEXT:    [[S:%.*]] = getelementptr i8, ptr [[A]], i64 1
 ; CHECK-NEXT:    [[L:%.*]] = load i8, ptr [[S]], align 1
 ; CHECK-NEXT:    ret i8 [[L]]
@@ -2074,19 +2074,19 @@ end:
 define i8 @phi_no_store_1() {
 ;
 ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn
-; TUNIT-LABEL: define {{[^@]+}}@phi_no_store_1
-; TUNIT-SAME: () #[[ATTR3]] {
-; TUNIT-NEXT:  entry:
-; TUNIT-NEXT:    br label [[LOOP:%.*]]
-; TUNIT:       loop:
-; TUNIT-NEXT:    [[P:%.*]] = phi ptr [ @a1, [[ENTRY:%.*]] ], [ [[G:%.*]], [[LOOP]] ]
-; TUNIT-NEXT:    [[I:%.*]] = phi i8 [ 0, [[ENTRY]] ], [ [[O:%.*]], [[LOOP]] ]
+; TUNIT-LABEL: define i8 @phi_no_store_1(
+; TUNIT-SAME: ) #[[ATTR3]] {
+; TUNIT-NEXT:  [[ENTRY:.*]]:
+; TUNIT-NEXT:    br label %[[LOOP:.*]]
+; TUNIT:       [[LOOP]]:
+; TUNIT-NEXT:    [[P:%.*]] = phi ptr [ @a1, %[[ENTRY]] ], [ [[G:%.*]], %[[LOOP]] ]
+; TUNIT-NEXT:    [[I:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[O:%.*]], %[[LOOP]] ]
 ; TUNIT-NEXT:    store i8 1, ptr [[P]], align 1
 ; TUNIT-NEXT:    [[G]] = getelementptr i8, ptr [[P]], i64 1
 ; TUNIT-NEXT:    [[O]] = add nsw i8 [[I]], 1
 ; TUNIT-NEXT:    [[C:%.*]] = icmp eq i8 [[O]], 3
-; TUNIT-NEXT:    br i1 [[C]], label [[END:%.*]], label [[LOOP]]
-; TUNIT:       end:
+; TUNIT-NEXT:    br i1 [[C]], label %[[END:.*]], label %[[LOOP]]
+; TUNIT:       [[END]]:
 ; TUNIT-NEXT:    [[S11:%.*]] = getelementptr i8, ptr @a1, i64 2
 ; TUNIT-NEXT:    [[L11:%.*]] = load i8, ptr [[S11]], align 2
 ; TUNIT-NEXT:    [[S12:%.*]] = getelementptr i8, ptr @a1, i64 3
@@ -2095,19 +2095,19 @@ define i8 @phi_no_store_1() {
 ; TUNIT-NEXT:    ret i8 [[ADD]]
 ;
 ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn
-; CGSCC-LABEL: define {{[^@]+}}@phi_no_store_1
-; CGSCC-SAME: () #[[ATTR5]] {
-; CGSCC-NEXT:  entry:
-; CGSCC-NEXT:    br label [[LOOP:%.*]]
-; CGSCC:       loop:
-; CGSCC-NEXT:    [[P:%.*]] = phi ptr [ @a1, [[ENTRY:%.*]] ], [ [[G:%.*]], [[LOOP]] ]
-; CGSCC-NEXT:    [[I:%.*]] = phi i8 [ 0, [[ENTRY]] ], [ [[O:%.*]], [[LOOP]] ]
+; CGSCC-LABEL: define i8 @phi_no_store_1(
+; CGSCC-SAME: ) #[[ATTR5]] {
+; CGSCC-NEXT:  [[ENTRY:.*]]:
+; CGSCC-NEXT:    br label %[[LOOP:.*]]
+; CGSCC:       [[LOOP]]:
+; CGSCC-NEXT:    [[P:%.*]] = phi ptr [ @a1, %[[ENTRY]] ], [ [[G:%.*]], %[[LOOP]] ]
+; CGSCC-NEXT:    [[I:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[O:%.*]], %[[LOOP]] ]
 ; CGSCC-NEXT:    store i8 1, ptr [[P]], align 1
 ; CGSCC-NEXT:    [[G]] = getelementptr i8, ptr [[P]], i64 1
 ; CGSCC-NEXT:    [[O]] = add nsw i8 [[I]], 1
 ; CGSCC-NEXT:    [[C:%.*]] = icmp eq i8 [[O]], 3
-; CGSCC-NEXT:    br i1 [[C]], label [[END:%.*]], label [[LOOP]]
-; CGSCC:       end:
+; CGSCC-NEXT:    br i1 [[C]], label %[[END:.*]], label %[[LOOP]]
+; CGSCC:       [[END]]:
 ; CGSCC-NEXT:    [[S11:%.*]] = getelementptr i8, ptr @a1, i64 2
 ; CGSCC-NEXT:    [[L11:%.*]] = load i8, ptr [[S11]], align 2
 ; CGSCC-NEXT:    [[S12:%.*]] = getelementptr i8, ptr @a1, i64 3
@@ -2138,19 +2138,19 @@ end:
 define i8 @phi_no_store_2() {
 ;
 ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn
-; TUNIT-LABEL: define {{[^@]+}}@phi_no_store_2
-; TUNIT-SAME: () #[[ATTR3]] {
-; TUNIT-NEXT:  entry:
-; TUNIT-NEXT:    br label [[LOOP:%.*]]
-; TUNIT:       loop:
-; TUNIT-NEXT:    [[P:%.*]] = phi ptr [ @a2, [[ENTRY:%.*]] ], [ [[G:%.*]], [[LOOP]] ]
-; TUNIT-NEXT:    [[I:%.*]] = phi i8 [ 0, [[ENTRY]] ], [ [[O:%.*]], [[LOOP]] ]
+; TUNIT-LABEL: define i8 @phi_no_store_2(
+; TUNIT-SAME: ) #[[ATTR3]] {
+; TUNIT-NEXT:  [[ENTRY:.*]]:
+; TUNIT-NEXT:    br label %[[LOOP:.*]]
+; TUNIT:       [[LOOP]]:
+; TUNIT-NEXT:    [[P:%.*]] = phi ptr [ @a2, %[[ENTRY]] ], [ [[G:%.*]], %[[LOOP]] ]
+; TUNIT-NEXT:    [[I:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[O:%.*]], %[[LOOP]] ]
 ; TUNIT-NEXT:    store i8 1, ptr [[P]], align 1
 ; TUNIT-NEXT:    [[G]] = getelementptr i8, ptr @a2, i64 2
 ; TUNIT-NEXT:    [[O]] = add nsw i8 [[I]], 1
 ; TUNIT-NEXT:    [[C:%.*]] = icmp eq i8 [[O]], 7
-; TUNIT-NEXT:    br i1 [[C]], label [[END:%.*]], label [[LOOP]]
-; TUNIT:       end:
+; TUNIT-NEXT:    br i1 [[C]], label %[[END:.*]], label %[[LOOP]]
+; TUNIT:       [[END]]:
 ; TUNIT-NEXT:    [[S21:%.*]] = getelementptr i8, ptr @a2, i64 2
 ; TUNIT-NEXT:    [[L21:%.*]] = load i8, ptr [[S21]], align 2
 ; TUNIT-NEXT:    [[S22:%.*]] = getelementptr i8, ptr @a2, i64 3
@@ -2159,19 +2159,19 @@ define i8 @phi_no_store_2() {
 ; TUNIT-NEXT:    ret i8 [[ADD]]
 ;
 ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn
-; CGSCC-LABEL: define {{[^@]+}}@phi_no_store_2
-; CGSCC-SAME: () #[[ATTR5]] {
-; CGSCC-NEXT:  entry:
-; CGSCC-NEXT:    br label [[LOOP:%.*]]
-; CGSCC:       loop:
-; CGSCC-NEXT:    [[P:%.*]] = phi ptr [ @a2, [[ENTRY:%.*]] ], [ [[G:%.*]], [[LOOP]] ]
-; CGSCC-NEXT:    [[I:%.*]] = phi i8 [ 0, [[ENTRY]] ], [ [[O:%.*]], [[LOOP]] ]
+; CGSCC-LABEL: define i8 @phi_no_store_2(
+; CGSCC-SAME: ) #[[ATTR5]] {
+; CGSCC-NEXT:  [[ENTRY:.*]]:
+; CGSCC-NEXT:    br label %[[LOOP:.*]]
+; CGSCC:       [[LOOP]]:
+; CGSCC-NEXT:    [[P:%.*]] = phi ptr [ @a2, %[[ENTRY]] ], [ [[G:%.*]], %[[LOOP]] ]
+; CGSCC-NEXT:    [[I:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[O:%.*]], %[[LOOP]] ]
 ; CGSCC-NEXT:    store i8 1, ptr [[P]], align 1
 ; CGSCC-NEXT:    [[G]] = getelementptr i8, ptr @a2, i64 2
 ; CGSCC-NEXT:    [[O]] = add nsw i8 [[I]], 1
 ; CGSCC-NEXT:    [[C:%.*]] = icmp eq i8 [[O]], 7
-; CGSCC-NEXT:    br i1 [[C]], label [[END:%.*]], label [[LOOP]]
-; CGSCC:       end:
+; CGSCC-NEXT:    br i1 [[C]], label %[[END:.*]], label %[[LOOP]]
+; CGSCC:       [[END]]:
 ; CGSCC-NEXT:    [[S21:%.*]] = getelementptr i8, ptr @a2, i64 2
 ; CGSCC-NEXT:    [[L21:%.*]] = load i8, ptr [[S21]], align 2
 ; CGSCC-NEXT:    [[S22:%.*]] = getelementptr i8, ptr @a2, i64 3
@@ -2200,21 +2200,21 @@ end:
 
 define i8 @phi_no_store_3() {
 ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn
-; TUNIT-LABEL: define {{[^@]+}}@phi_no_store_3
-; TUNIT-SAME: () #[[ATTR3]] {
-; TUNIT-NEXT:  entry:
+; TUNIT-LABEL: define i8 @phi_no_store_3(
+; TUNIT-SAME: ) #[[ATTR3]] {
+; TUNIT-NEXT:  [[ENTRY:.*]]:
 ; TUNIT-NEXT:    [[S30:%.*]] = getelementptr i8, ptr @a3, i64 3
 ; TUNIT-NEXT:    store i8 0, ptr [[S30]], align 1
-; TUNIT-NEXT:    br label [[LOOP:%.*]]
-; TUNIT:       loop:
-; TUNIT-NEXT:    [[P:%.*]] = phi ptr [ @a3, [[ENTRY:%.*]] ], [ [[G:%.*]], [[LOOP]] ]
-; TUNIT-NEXT:    [[I:%.*]] = phi i8 [ 0, [[ENTRY]] ], [ [[O:%.*]], [[LOOP]] ]
+; TUNIT-NEXT:    br label %[[LOOP:.*]]
+; TUNIT:       [[LOOP]]:
+; TUNIT-NEXT:    [[P:%.*]] = phi ptr [ @a3, %[[ENTRY]] ], [ [[G:%.*]], %[[LOOP]] ]
+; TUNIT-NEXT:    [[I:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[O:%.*]], %[[LOOP]] ]
 ; TUNIT-NEXT:    store i8 1, ptr [[P]], align 1
 ; TUNIT-NEXT:    [[G]] = getelementptr i8, ptr @a3, i64 2
 ; TUNIT-NEXT:    [[O]] = add nsw i8 [[I]], 1
 ; TUNIT-NEXT:    [[C:%.*]] = icmp eq i8 [[O]], 7
-; TUNIT-NEXT:    br i1 [[C]], label [[END:%.*]], label [[LOOP]]
-; TUNIT:       end:
+; TUNIT-NEXT:    br i1 [[C]], label %[[END:.*]], label %[[LOOP]]
+; TUNIT:       [[END]]:
 ; TUNIT-NEXT:    [[S31:%.*]] = getelementptr i8, ptr @a3, i64 2
 ; TUNIT-NEXT:    [[L31:%.*]] = load i8, ptr [[S31]], align 2
 ; TUNIT-NEXT:    [[S32:%.*]] = getelementptr i8, ptr @a3, i64 3
@@ -2226,21 +2226,21 @@ define i8 @phi_no_store_3() {
 ; TUNIT-NEXT:    ret i8 [[ADD2]]
 ;
 ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn
-; CGSCC-LABEL: define {{[^@]+}}@phi_no_store_3
-; CGSCC-SAME: () #[[ATTR5]] {
-; CGSCC-NEXT:  entry:
+; CGSCC-LABEL: define i8 @phi_no_store_3(
+; CGSCC-SAME: ) #[[ATTR5]] {
+; CGSCC-NEXT:  [[ENTRY:.*]]:
 ; CGSCC-NEXT:    [[S30:%.*]] = getelementptr i8, ptr @a3, i64 3
 ; CGSCC-NEXT:    store i8 0, ptr [[S30]], align 1
-; CGSCC-NEXT:    br label [[LOOP:%.*]]
-; CGSCC:       loop:
-; CGSCC-NEXT:    [[P:%.*]] = phi ptr [ @a3, [[ENTRY:%.*]] ], [ [[G:%.*]], [[LOOP]] ]
-; CGSCC-NEXT:    [[I:%.*]] = phi i8 [ 0, [[ENTRY]] ], [ [[O:%.*]], [[LOOP]] ]
+; CGSCC-NEXT:    br label %[[LOOP:.*]]
+; CGSCC:       [[LOOP]]:
+; CGSCC-NEXT:    [[P:%.*]] = phi ptr [ @a3, %[[ENTRY]] ], [ [[G:%.*]], %[[LOOP]] ]
+; CGSCC-NEXT:    [[I:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[O:%.*]], %[[LOOP]] ]
 ; CGSCC-NEXT:    store i8 1, ptr [[P]], align 1
 ; CGSCC-NEXT:    [[G]] = getelementptr i8, ptr @a3, i64 2
 ; CGSCC-NEXT:    [[O]] = add nsw i8 [[I]], 1
 ; CGSCC-NEXT:    [[C:%.*]] = icmp eq i8 [[O]], 7
-; CGSCC-NEXT:    br i1 [[C]], label [[END:%.*]], label [[LOOP]]
-; CGSCC:       end:
+; CGSCC-NEXT:    br i1 [[C]], label %[[END:.*]], label %[[LOOP]]
+; CGSCC:       [[END]]:
 ; CGSCC-NEXT:    [[S31:%.*]] = getelementptr i8, ptr @a3, i64 2
 ; CGSCC-NEXT:    [[L31:%.*]] = load i8, ptr [[S31]], align 2
 ; CGSCC-NEXT:    [[S32:%.*]] = getelementptr i8, ptr @a3, i64 3
@@ -2277,15 +2277,15 @@ end:
 
 define i8 @cast_and_load_1() {
 ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn
-; TUNIT-LABEL: define {{[^@]+}}@cast_and_load_1
-; TUNIT-SAME: () #[[ATTR3]] {
+; TUNIT-LABEL: define i8 @cast_and_load_1(
+; TUNIT-SAME: ) #[[ATTR3]] {
 ; TUNIT-NEXT:    store i32 42, ptr @bytes1, align 4
 ; TUNIT-NEXT:    [[L:%.*]] = load i8, ptr @bytes1, align 4
 ; TUNIT-NEXT:    ret i8 [[L]]
 ;
 ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn
-; CGSCC-LABEL: define {{[^@]+}}@cast_and_load_1
-; CGSCC-SAME: () #[[ATTR5]] {
+; CGSCC-LABEL: define i8 @cast_and_load_1(
+; CGSCC-SAME: ) #[[ATTR5]] {
 ; CGSCC-NEXT:    store i32 42, ptr @bytes1, align 4
 ; CGSCC-NEXT:    [[L:%.*]] = load i8, ptr @bytes1, align 4
 ; CGSCC-NEXT:    ret i8 [[L]]
@@ -2297,15 +2297,15 @@ define i8 @cast_and_load_1() {
 
 define i64 @cast_and_load_2() {
 ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn
-; TUNIT-LABEL: define {{[^@]+}}@cast_and_load_2
-; TUNIT-SAME: () #[[ATTR3]] {
+; TUNIT-LABEL: define i64 @cast_and_load_2(
+; TUNIT-SAME: ) #[[ATTR3]] {
 ; TUNIT-NEXT:    store i32 42, ptr @bytes2, align 4
 ; TUNIT-NEXT:    [[L:%.*]] = load i64, ptr @bytes2, align 4
 ; TUNIT-NEXT:    ret i64 [[L]]
 ;
 ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn
-; CGSCC-LABEL: define {{[^@]+}}@cast_and_load_2
-; CGSCC-SAME: () #[[ATTR5]] {
+; CGSCC-LABEL: define i64 @cast_and_load_2(
+; CGSCC-SAME: ) #[[ATTR5]] {
 ; CGSCC-NEXT:    store i32 42, ptr @bytes2, align 4
 ; CGSCC-NEXT:    [[L:%.*]] = load i64, ptr @bytes2, align 4
 ; CGSCC-NEXT:    ret i64 [[L]]
@@ -2318,33 +2318,33 @@ define i64 @cast_and_load_2() {
 define void @recursive_load_store(i64 %N, i32 %v) {
 ;
 ; TUNIT: Function Attrs: nofree norecurse nosync nounwind memory(write)
-; TUNIT-LABEL: define {{[^@]+}}@recursive_load_store
-; TUNIT-SAME: (i64 [[N:%.*]], i32 [[V:%.*]]) #[[ATTR7:[0-9]+]] {
-; TUNIT-NEXT:  entry:
-; TUNIT-NEXT:    br label [[FOR_COND:%.*]]
-; TUNIT:       for.cond:
-; TUNIT-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY:%.*]] ], [ 0, [[ENTRY:%.*]] ]
+; TUNIT-LABEL: define void @recursive_load_store(
+; TUNIT-SAME: i64 [[N:%.*]], i32 [[V:%.*]]) #[[ATTR7:[0-9]+]] {
+; TUNIT-NEXT:  [[ENTRY:.*]]:
+; TUNIT-NEXT:    br label %[[FOR_COND:.*]]
+; TUNIT:       [[FOR_COND]]:
+; TUNIT-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY:.*]] ], [ 0, %[[ENTRY]] ]
 ; TUNIT-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV]], [[N]]
-; TUNIT-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END:%.*]]
-; TUNIT:       for.body:
+; TUNIT-NEXT:    br i1 [[EXITCOND]], label %[[FOR_BODY]], label %[[FOR_END:.*]]
+; TUNIT:       [[FOR_BODY]]:
 ; TUNIT-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; TUNIT-NEXT:    br label [[FOR_COND]]
-; TUNIT:       for.end:
+; TUNIT-NEXT:    br label %[[FOR_COND]]
+; TUNIT:       [[FOR_END]]:
 ; TUNIT-NEXT:    ret void
 ;
 ; CGSCC: Function Attrs: nofree norecurse nosync nounwind memory(write)
-; CGSCC-LABEL: define {{[^@]+}}@recursive_load_store
-; CGSCC-SAME: (i64 [[N:%.*]], i32 [[V:%.*]]) #[[ATTR8:[0-9]+]] {
-; CGSCC-NEXT:  entry:
-; CGSCC-NEXT:    br label [[FOR_COND:%.*]]
-; CGSCC:       for.cond:
-; CGSCC-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY:%.*]] ], [ 0, [[ENTRY:%.*]] ]
+; CGSCC-LABEL: define void @recursive_load_store(
+; CGSCC-SAME: i64 [[N:%.*]], i32 [[V:%.*]]) #[[ATTR8:[0-9]+]] {
+; CGSCC-NEXT:  [[ENTRY:.*]]:
+; CGSCC-NEXT:    br label %[[FOR_COND:.*]]
+; CGSCC:       [[FOR_COND]]:
+; CGSCC-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY:.*]] ], [ 0, %[[ENTRY]] ]
 ; CGSCC-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV]], [[N]]
-; CGSCC-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END:%.*]]
-; CGSCC:       for.body:
+; CGSCC-NEXT:    br i1 [[EXITCOND]], label %[[FOR_BODY]], label %[[FOR_END:.*]]
+; CGSCC:       [[FOR_BODY]]:
 ; CGSCC-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; CGSCC-NEXT:    br label [[FOR_COND]]
-; CGSCC:       for.end:
+; CGSCC-NEXT:    br label %[[FOR_COND]]
+; CGSCC:       [[FOR_END]]:
 ; CGSCC-NEXT:    ret void
 ;
 entry:
@@ -2369,9 +2369,9 @@ for.end:
 }
 
 define dso_local i32 @round_trip_malloc(i32 %x) {
-; CHECK-LABEL: define {{[^@]+}}@round_trip_malloc
-; CHECK-SAME: (i32 returned [[X:%.*]]) {
-; CHECK-NEXT:  entry:
+; CHECK-LABEL: define dso_local i32 @round_trip_malloc(
+; CHECK-SAME: i32 returned [[X:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[CALL_H2S:%.*]] = alloca i8, i64 4, align 1
 ; CHECK-NEXT:    store i32 [[X]], ptr [[CALL_H2S]], align 4
 ; CHECK-NEXT:    ret i32 [[X]]
@@ -2385,8 +2385,8 @@ entry:
 }
 
 define dso_local i32 @round_trip_malloc_constant() {
-; CHECK-LABEL: define {{[^@]+}}@round_trip_malloc_constant() {
-; CHECK-NEXT:  entry:
+; CHECK-LABEL: define dso_local noundef i32 @round_trip_malloc_constant() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    ret i32 7
 ;
 entry:
@@ -2402,16 +2402,16 @@ declare noalias ptr @malloc(i64) allockind("alloc,uninitialized") allocsize(0) "
 declare void @free(ptr) allockind("free") "alloc-family"="malloc"
 
 define dso_local i32 @conditional_malloc(i32 %x) {
-; CHECK-LABEL: define {{[^@]+}}@conditional_malloc
-; CHECK-SAME: (i32 returned [[X:%.*]]) {
-; CHECK-NEXT:  entry:
+; CHECK-LABEL: define dso_local i32 @conditional_malloc(
+; CHECK-SAME: i32 returned [[X:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[CALL_H2S:%.*]] = alloca i8, i64 4, align 1
 ; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[X]], 0
-; CHECK-NEXT:    br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
-; CHECK:       if.then:
+; CHECK-NEXT:    br i1 [[TOBOOL]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
+; CHECK:       [[IF_THEN]]:
 ; CHECK-NEXT:    store i32 [[X]], ptr [[CALL_H2S]], align 4
-; CHECK-NEXT:    br label [[IF_END]]
-; CHECK:       if.end:
+; CHECK-NEXT:    br label %[[IF_END]]
+; CHECK:       [[IF_END]]:
 ; CHECK-NEXT:    ret i32 [[X]]
 ;
 entry:
@@ -2429,9 +2429,9 @@ if.end:                                           ; preds = %if.then, %entry
 }
 
 define dso_local i32 @round_trip_calloc(i32 %x) {
-; CHECK-LABEL: define {{[^@]+}}@round_trip_calloc
-; CHECK-SAME: (i32 returned [[X:%.*]]) {
-; CHECK-NEXT:  entry:
+; CHECK-LABEL: define dso_local i32 @round_trip_calloc(
+; CHECK-SAME: i32 returned [[X:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[CALL_H2S:%.*]] = alloca i8, i64 4, align 1
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr [[CALL_H2S]], i8 0, i64 4, i1 false)
 ; CHECK-NEXT:    store i32 [[X]], ptr [[CALL_H2S]], align 4
@@ -2445,8 +2445,8 @@ entry:
 }
 
 define dso_local i32 @round_trip_calloc_constant() {
-; CHECK-LABEL: define {{[^@]+}}@round_trip_calloc_constant() {
-; CHECK-NEXT:  entry:
+; CHECK-LABEL: define dso_local noundef i32 @round_trip_calloc_constant() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[CALL_H2S:%.*]] = alloca i8, i64 4, align 1
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr [[CALL_H2S]], i8 0, i64 4, i1 false)
 ; CHECK-NEXT:    ret i32 11
@@ -2461,17 +2461,17 @@ entry:
 declare noalias ptr @calloc(i64, i64) allockind("alloc,zeroed") allocsize(0, 1) "alloc-family"="malloc"
 
 define dso_local i32 @conditional_calloc(i32 %x) {
-; CHECK-LABEL: define {{[^@]+}}@conditional_calloc
-; CHECK-SAME: (i32 [[X:%.*]]) {
-; CHECK-NEXT:  entry:
+; CHECK-LABEL: define dso_local i32 @conditional_calloc(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[CALL_H2S:%.*]] = alloca i8, i64 4, align 1
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr [[CALL_H2S]], i8 0, i64 4, i1 false)
 ; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[X]], 0
-; CHECK-NEXT:    br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
-; CHECK:       if.then:
+; CHECK-NEXT:    br i1 [[TOBOOL]], label %[[IF_END:.*]], label %[[IF_THEN:.*]]
+; CHECK:       [[IF_THEN]]:
 ; CHECK-NEXT:    store i32 [[X]], ptr [[CALL_H2S]], align 4
-; CHECK-NEXT:    br label [[IF_END]]
-; CHECK:       if.end:
+; CHECK-NEXT:    br label %[[IF_END]]
+; CHECK:       [[IF_END]]:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[CALL_H2S]], align 4
 ; CHECK-NEXT:    ret i32 [[TMP0]]
 ;
@@ -2491,15 +2491,15 @@ if.end:                                           ; preds = %if.then, %entry
 }
 
 define dso_local i32 @conditional_calloc_zero(i1 %c) {
-; CHECK-LABEL: define {{[^@]+}}@conditional_calloc_zero
-; CHECK-SAME: (i1 [[C:%.*]]) {
-; CHECK-NEXT:  entry:
+; CHECK-LABEL: define dso_local noundef i32 @conditional_calloc_zero(
+; CHECK-SAME: i1 [[C:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[CALL_H2S:%.*]] = alloca i8, i64 4, align 1
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr [[CALL_H2S]], i8 0, i64 4, i1 false)
-; CHECK-NEXT:    br i1 [[C]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
-; CHECK:       if.then:
-; CHECK-NEXT:    br label [[IF_END]]
-; CHECK:       if.end:
+; CHECK-NEXT:    br i1 [[C]], label %[[IF_END:.*]], label %[[IF_THEN:.*]]
+; CHECK:       [[IF_THEN]]:
+; CHECK-NEXT:    br label %[[IF_END]]
+; CHECK:       [[IF_END]]:
 ; CHECK-NEXT:    ret i32 0
 ;
 entry:
@@ -2517,16 +2517,16 @@ if.end:                                           ; preds = %if.then, %entry
 }
 
 define dso_local ptr @malloc_like(i32 %s) {
-; TUNIT-LABEL: define {{[^@]+}}@malloc_like
-; TUNIT-SAME: (i32 [[S:%.*]]) {
-; TUNIT-NEXT:  entry:
+; TUNIT-LABEL: define dso_local noalias ptr @malloc_like(
+; TUNIT-SAME: i32 [[S:%.*]]) {
+; TUNIT-NEXT:  [[ENTRY:.*:]]
 ; TUNIT-NEXT:    [[CONV:%.*]] = sext i32 [[S]] to i64
 ; TUNIT-NEXT:    [[CALL:%.*]] = call noalias ptr @malloc(i64 [[CONV]]) #[[ATTR20:[0-9]+]]
 ; TUNIT-NEXT:    ret ptr [[CALL]]
 ;
-; CGSCC-LABEL: define {{[^@]+}}@malloc_like
-; CGSCC-SAME: (i32 [[S:%.*]]) {
-; CGSCC-NEXT:  entry:
+; CGSCC-LABEL: define dso_local noalias ptr @malloc_like(
+; CGSCC-SAME: i32 [[S:%.*]]) {
+; CGSCC-NEXT:  [[ENTRY:.*:]]
 ; CGSCC-NEXT:    [[CONV:%.*]] = sext i32 [[S]] to i64
 ; CGSCC-NEXT:    [[CALL:%.*]] = call noalias ptr @malloc(i64 [[CONV]]) #[[ATTR23:[0-9]+]]
 ; CGSCC-NEXT:    ret ptr [[CALL]]
@@ -2538,18 +2538,18 @@ entry:
 }
 
 define dso_local i32 @round_trip_malloc_like(i32 %x) {
-; TUNIT-LABEL: define {{[^@]+}}@round_trip_malloc_like
-; TUNIT-SAME: (i32 [[X:%.*]]) {
-; TUNIT-NEXT:  entry:
+; TUNIT-LABEL: define dso_local i32 @round_trip_malloc_like(
+; TUNIT-SAME: i32 [[X:%.*]]) {
+; TUNIT-NEXT:  [[ENTRY:.*:]]
 ; TUNIT-NEXT:    [[CALL:%.*]] = call noalias ptr @malloc_like(i32 noundef 4) #[[ATTR20]]
 ; TUNIT-NEXT:    store i32 [[X]], ptr [[CALL]], align 4
 ; TUNIT-NEXT:    [[TMP0:%.*]] = load i32, ptr [[CALL]], align 4
 ; TUNIT-NEXT:    call void @free(ptr noundef nonnull align 4 dereferenceable(4) [[CALL]]) #[[ATTR20]]
 ; TUNIT-NEXT:    ret i32 [[TMP0]]
 ;
-; CGSCC-LABEL: define {{[^@]+}}@round_trip_malloc_like
-; CGSCC-SAME: (i32 [[X:%.*]]) {
-; CGSCC-NEXT:  entry:
+; CGSCC-LABEL: define dso_local i32 @round_trip_malloc_like(
+; CGSCC-SAME: i32 [[X:%.*]]) {
+; CGSCC-NEXT:  [[ENTRY:.*:]]
 ; CGSCC-NEXT:    [[CALL:%.*]] = call noalias ptr @malloc_like(i32 noundef 4) #[[ATTR23]]
 ; CGSCC-NEXT:    store i32 [[X]], ptr [[CALL]], align 4
 ; CGSCC-NEXT:    [[TMP0:%.*]] = load i32, ptr [[CALL]], align 4
@@ -2565,18 +2565,18 @@ entry:
 }
 
 define dso_local i32 @round_trip_unknown_alloc(i32 %x) {
-; TUNIT-LABEL: define {{[^@]+}}@round_trip_unknown_alloc
-; TUNIT-SAME: (i32 [[X:%.*]]) {
-; TUNIT-NEXT:  entry:
+; TUNIT-LABEL: define dso_local i32 @round_trip_unknown_alloc(
+; TUNIT-SAME: i32 [[X:%.*]]) {
+; TUNIT-NEXT:  [[ENTRY:.*:]]
 ; TUNIT-NEXT:    [[CALL:%.*]] = call noalias ptr @unknown_alloc(i32 noundef 4) #[[ATTR20]]
 ; TUNIT-NEXT:    store i32 [[X]], ptr [[CALL]], align 4
 ; TUNIT-NEXT:    [[TMP0:%.*]] = load i32, ptr [[CALL]], align 4
 ; TUNIT-NEXT:    call void @free(ptr noundef nonnull align 4 dereferenceable(4) [[CALL]]) #[[ATTR20]]
 ; TUNIT-NEXT:    ret i32 [[TMP0]]
 ;
-; CGSCC-LABEL: define {{[^@]+}}@round_trip_unknown_alloc
-; CGSCC-SAME: (i32 [[X:%.*]]) {
-; CGSCC-NEXT:  entry:
+; CGSCC-LABEL: define dso_local i32 @round_trip_unknown_alloc(
+; CGSCC-SAME: i32 [[X:%.*]]) {
+; CGSCC-NEXT:  [[ENTRY:.*:]]
 ; CGSCC-NEXT:    [[CALL:%.*]] = call noalias ptr @unknown_alloc(i32 noundef 4) #[[ATTR23]]
 ; CGSCC-NEXT:    store i32 [[X]], ptr [[CALL]], align 4
 ; CGSCC-NEXT:    [[TMP0:%.*]] = load i32, ptr [[CALL]], align 4
@@ -2594,30 +2594,30 @@ entry:
 declare noalias ptr @unknown_alloc(i32)
 
 define dso_local i32 @conditional_unknown_alloc(i32 %x) {
-; TUNIT-LABEL: define {{[^@]+}}@conditional_unknown_alloc
-; TUNIT-SAME: (i32 [[X:%.*]]) {
-; TUNIT-NEXT:  entry:
+; TUNIT-LABEL: define dso_local i32 @conditional_unknown_alloc(
+; TUNIT-SAME: i32 [[X:%.*]]) {
+; TUNIT-NEXT:  [[ENTRY:.*:]]
 ; TUNIT-NEXT:    [[CALL:%.*]] = call noalias ptr @unknown_alloc(i32 noundef 4) #[[ATTR20]]
 ; TUNIT-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[X]], 0
-; TUNIT-NEXT:    br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
-; TUNIT:       if.then:
+; TUNIT-NEXT:    br i1 [[TOBOOL]], label %[[IF_END:.*]], label %[[IF_THEN:.*]]
+; TUNIT:       [[IF_THEN]]:
 ; TUNIT-NEXT:    store i32 [[X]], ptr [[CALL]], align 4
-; TUNIT-NEXT:    br label [[IF_END]]
-; TUNIT:       if.end:
+; TUNIT-NEXT:    br label %[[IF_END]]
+; TUNIT:       [[IF_END]]:
 ; TUNIT-NEXT:    [[TMP0:%.*]] = load i32, ptr [[CALL]], align 4
 ; TUNIT-NEXT:    call void @free(ptr nonnull align 4 dereferenceable(4) [[CALL]]) #[[ATTR20]]
 ; TUNIT-NEXT:    ret i32 [[TMP0]]
 ;
-; CGSCC-LABEL: define {{[^@]+}}@conditional_unknown_alloc
-; CGSCC-SAME: (i32 [[X:%.*]]) {
-; CGSCC-NEXT:  entry:
+; CGSCC-LABEL: define dso_local i32 @conditional_unknown_alloc(
+; CGSCC-SAME: i32 [[X:%.*]]) {
+; CGSCC-NEXT:  [[ENTRY:.*:]]
 ; CGSCC-NEXT:    [[CALL:%.*]] = call noalias ptr @unknown_alloc(i32 noundef 4) #[[ATTR23]]
 ; CGSCC-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[X]], 0
-; CGSCC-NEXT:    br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
-; CGSCC:       if.then:
+; CGSCC-NEXT:    br i1 [[TOBOOL]], label %[[IF_END:.*]], label %[[IF_THEN:.*]]
+; CGSCC:       [[IF_THEN]]:
 ; CGSCC-NEXT:    store i32 [[X]], ptr [[CALL]], align 4
-; CGSCC-NEXT:    br label [[IF_END]]
-; CGSCC:       if.end:
+; CGSCC-NEXT:    br label %[[IF_END]]
+; CGSCC:       [[IF_END]]:
 ; CGSCC-NEXT:    [[TMP0:%.*]] = load i32, ptr [[CALL]], align 4
 ; CGSCC-NEXT:    call void @free(ptr nonnull align 4 dereferenceable(4) [[CALL]]) #[[ATTR23]]
 ; CGSCC-NEXT:    ret i32 [[TMP0]]
@@ -2643,9 +2643,9 @@ if.end:                                           ; preds = %if.then, %entry
 
 ; We mark %dst as writeonly and %src as readonly, that is (for now) all we can expect.
 define dso_local void @test_nested_memory(ptr %dst, ptr %src) {
-; TUNIT-LABEL: define {{[^@]+}}@test_nested_memory
-; TUNIT-SAME: (ptr nofree writeonly captures(none) [[DST:%.*]], ptr nofree readonly captures(none) [[SRC:%.*]]) {
-; TUNIT-NEXT:  entry:
+; TUNIT-LABEL: define dso_local void @test_nested_memory(
+; TUNIT-SAME: ptr nofree writeonly captures(none) [[DST:%.*]], ptr nofree readonly captures(none) [[SRC:%.*]]) {
+; TUNIT-NEXT:  [[ENTRY:.*:]]
 ; TUNIT-NEXT:    [[CALL_H2S:%.*]] = alloca i8, i64 24, align 1
 ; TUNIT-NEXT:    [[LOCAL:%.*]] = alloca [[STRUCT_STY:%.*]], align 8
 ; TUNIT-NEXT:    [[INNER:%.*]] = getelementptr inbounds [[STRUCT_STY]], ptr [[LOCAL]], i64 0, i32 2
@@ -2662,9 +2662,9 @@ define dso_local void @test_nested_memory(ptr %dst, ptr %src) {
 ; TUNIT-NEXT:    call fastcc void @nested_memory_callee(ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]]) #[[ATTR21:[0-9]+]]
 ; TUNIT-NEXT:    ret void
 ;
-; CGSCC-LABEL: define {{[^@]+}}@test_nested_memory
-; CGSCC-SAME: (ptr nofree [[DST:%.*]], ptr nofree [[SRC:%.*]]) {
-; CGSCC-NEXT:  entry:
+; CGSCC-LABEL: define dso_local void @test_nested_memory(
+; CGSCC-SAME: ptr nofree [[DST:%.*]], ptr nofree [[SRC:%.*]]) {
+; CGSCC-NEXT:  [[ENTRY:.*:]]
 ; CGSCC-NEXT:    [[LOCAL:%.*]] = alloca [[STRUCT_STY:%.*]], align 8
 ; CGSCC-NEXT:    [[INNER:%.*]] = getelementptr inbounds [[STRUCT_STY]], ptr [[LOCAL]], i64 0, i32 2
 ; CGSCC-NEXT:    [[CALL:%.*]] = call noalias dereferenceable_or_null(24) ptr @malloc(i64 noundef 24)
@@ -2690,9 +2690,9 @@ entry:
 
 define internal fastcc void @nested_memory_callee(ptr nocapture readonly %S) nofree norecurse nounwind uwtable {
 ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn uwtable
-; TUNIT-LABEL: define {{[^@]+}}@nested_memory_callee
-; TUNIT-SAME: (ptr [[TMP0:%.*]], ptr [[TMP1:%.*]], ptr [[TMP2:%.*]]) #[[ATTR11:[0-9]+]] {
-; TUNIT-NEXT:  entry:
+; TUNIT-LABEL: define internal fastcc void @nested_memory_callee(
+; TUNIT-SAME: ptr [[TMP0:%.*]], ptr [[TMP1:%.*]], ptr [[TMP2:%.*]]) #[[ATTR11:[0-9]+]] {
+; TUNIT-NEXT:  [[ENTRY:.*:]]
 ; TUNIT-NEXT:    [[S_PRIV:%.*]] = alloca [[STRUCT_STY:%.*]], align 8
 ; TUNIT-NEXT:    store ptr [[TMP0]], ptr [[S_PRIV]], align 8
 ; TUNIT-NEXT:    [[S_PRIV_B8:%.*]] = getelementptr i8, ptr [[S_PRIV]], i64 8
@@ -2700,21 +2700,21 @@ define internal fastcc void @nested_memory_callee(ptr nocapture readonly %S) nof
 ; TUNIT-NEXT:    [[S_PRIV_B16:%.*]] = getelementptr i8, ptr [[S_PRIV]], i64 16
 ; TUNIT-NEXT:    store ptr [[TMP2]], ptr [[S_PRIV_B16]], align 8
 ; TUNIT-NEXT:    [[INNER:%.*]] = getelementptr inbounds [[STRUCT_STY]], ptr [[S_PRIV]], i64 0, i32 2
-; TUNIT-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[INNER]], align 8
+; TUNIT-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[INNER]], align 8, !invariant.load [[META32:![0-9]+]]
 ; TUNIT-NEXT:    [[INNER1:%.*]] = getelementptr inbounds [[STRUCT_STY]], ptr [[TMP3]], i64 0, i32 2
-; TUNIT-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[INNER1]], align 8
+; TUNIT-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[INNER1]], align 8, !invariant.load [[META32]]
 ; TUNIT-NEXT:    [[SRC:%.*]] = getelementptr inbounds [[STRUCT_STY]], ptr [[TMP4]], i64 0, i32 1
-; TUNIT-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[SRC]], align 8
-; TUNIT-NEXT:    [[TMP6:%.*]] = load double, ptr [[TMP5]], align 8
+; TUNIT-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[SRC]], align 8, !invariant.load [[META32]]
+; TUNIT-NEXT:    [[TMP6:%.*]] = load double, ptr [[TMP5]], align 8, !invariant.load [[META32]]
 ; TUNIT-NEXT:    [[CONV:%.*]] = fptrunc double [[TMP6]] to float
-; TUNIT-NEXT:    [[TMP7:%.*]] = load ptr, ptr [[TMP4]], align 8
+; TUNIT-NEXT:    [[TMP7:%.*]] = load ptr, ptr [[TMP4]], align 8, !invariant.load [[META32]]
 ; TUNIT-NEXT:    store float [[CONV]], ptr [[TMP7]], align 4
 ; TUNIT-NEXT:    ret void
 ;
 ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn uwtable
-; CGSCC-LABEL: define {{[^@]+}}@nested_memory_callee
-; CGSCC-SAME: (ptr nofree [[TMP0:%.*]], ptr nofree [[TMP1:%.*]], ptr nofree [[TMP2:%.*]]) #[[ATTR12:[0-9]+]] {
-; CGSCC-NEXT:  entry:
+; CGSCC-LABEL: define internal fastcc void @nested_memory_callee(
+; CGSCC-SAME: ptr nofree [[TMP0:%.*]], ptr nofree [[TMP1:%.*]], ptr nofree [[TMP2:%.*]]) #[[ATTR12:[0-9]+]] {
+; CGSCC-NEXT:  [[ENTRY:.*:]]
 ; CGSCC-NEXT:    [[S_PRIV:%.*]] = alloca [[STRUCT_STY:%.*]], align 8
 ; CGSCC-NEXT:    store ptr [[TMP0]], ptr [[S_PRIV]], align 8
 ; CGSCC-NEXT:    [[S_PRIV_B8:%.*]] = getelementptr i8, ptr [[S_PRIV]], i64 8
@@ -2722,14 +2722,14 @@ define internal fastcc void @nested_memory_callee(ptr nocapture readonly %S) nof
 ; CGSCC-NEXT:    [[S_PRIV_B16:%.*]] = getelementptr i8, ptr [[S_PRIV]], i64 16
 ; CGSCC-NEXT:    store ptr [[TMP2]], ptr [[S_PRIV_B16]], align 8
 ; CGSCC-NEXT:    [[INNER:%.*]] = getelementptr inbounds [[STRUCT_STY]], ptr [[S_PRIV]], i64 0, i32 2
-; CGSCC-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[INNER]], align 8
+; CGSCC-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[INNER]], align 8, !invariant.load [[META32:![0-9]+]]
 ; CGSCC-NEXT:    [[INNER1:%.*]] = getelementptr inbounds [[STRUCT_STY]], ptr [[TMP3]], i64 0, i32 2
-; CGSCC-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[INNER1]], align 8
+; CGSCC-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[INNER1]], align 8, !invariant.load [[META32]]
 ; CGSCC-NEXT:    [[SRC:%.*]] = getelementptr inbounds [[STRUCT_STY]], ptr [[TMP4]], i64 0, i32 1
-; CGSCC-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[SRC]], align 8
-; CGSCC-NEXT:    [[TMP6:%.*]] = load double, ptr [[TMP5]], align 8
+; CGSCC-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[SRC]], align 8, !invariant.load [[META32]]
+; CGSCC-NEXT:    [[TMP6:%.*]] = load double, ptr [[TMP5]], align 8, !invariant.load [[META32]]
 ; CGSCC-NEXT:    [[CONV:%.*]] = fptrunc double [[TMP6]] to float
-; CGSCC-NEXT:    [[TMP7:%.*]] = load ptr, ptr [[TMP4]], align 8
+; CGSCC-NEXT:    [[TMP7:%.*]] = load ptr, ptr [[TMP4]], align 8, !invariant.load [[META32]]
 ; CGSCC-NEXT:    store float [[CONV]], ptr [[TMP7]], align 4
 ; CGSCC-NEXT:    ret void
 ;
@@ -2751,34 +2751,34 @@ entry:
 ; varying and the accesses thus not "exact". This used to simplify %cmp12 to true.
 define hidden void @no_propagation_of_unknown_index_access(ptr %in, ptr %out, i32 %idx) #0 {
 ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite)
-; TUNIT-LABEL: define {{[^@]+}}@no_propagation_of_unknown_index_access
-; TUNIT-SAME: (ptr nofree readonly captures(none) [[IN:%.*]], ptr nofree writeonly captures(none) [[OUT:%.*]], i32 [[IDX:%.*]]) #[[ATTR1]] {
-; TUNIT-NEXT:  entry:
+; TUNIT-LABEL: define hidden void @no_propagation_of_unknown_index_access(
+; TUNIT-SAME: ptr nofree readonly captures(none) [[IN:%.*]], ptr nofree writeonly captures(none) [[OUT:%.*]], i32 [[IDX:%.*]]) #[[ATTR1]] {
+; TUNIT-NEXT:  [[ENTRY:.*]]:
 ; TUNIT-NEXT:    [[BUF:%.*]] = alloca [128 x i32], align 16
 ; TUNIT-NEXT:    call void @llvm.lifetime.start.p0(ptr noalias nofree noundef nonnull align 16 captures(none) dereferenceable(512) [[BUF]]) #[[ATTR17]]
-; TUNIT-NEXT:    br label [[FOR_COND:%.*]]
-; TUNIT:       for.cond:
-; TUNIT-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
+; TUNIT-NEXT:    br label %[[FOR_COND:.*]]
+; TUNIT:       [[FOR_COND]]:
+; TUNIT-NEXT:    [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY:.*]] ]
 ; TUNIT-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I_0]], 128
-; TUNIT-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
-; TUNIT:       for.cond.cleanup:
-; TUNIT-NEXT:    br label [[FOR_COND4:%.*]]
-; TUNIT:       for.body:
+; TUNIT-NEXT:    br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP:.*]]
+; TUNIT:       [[FOR_COND_CLEANUP]]:
+; TUNIT-NEXT:    br label %[[FOR_COND4:.*]]
+; TUNIT:       [[FOR_BODY]]:
 ; TUNIT-NEXT:    [[IDXPROM:%.*]] = sext i32 [[I_0]] to i64
 ; TUNIT-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[IN]], i64 [[IDXPROM]]
-; TUNIT-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; TUNIT-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !invariant.load [[META32]]
 ; TUNIT-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds [128 x i32], ptr [[BUF]], i64 0, i64 [[IDXPROM]]
 ; TUNIT-NEXT:    store i32 [[TMP0]], ptr [[ARRAYIDX2]], align 4
 ; TUNIT-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
-; TUNIT-NEXT:    br label [[FOR_COND]], !llvm.loop [[TBAA10]]
-; TUNIT:       for.cond4:
-; TUNIT-NEXT:    [[I3_0:%.*]] = phi i32 [ 0, [[FOR_COND_CLEANUP]] ], [ [[INC16:%.*]], [[FOR_BODY7:%.*]] ]
+; TUNIT-NEXT:    br label %[[FOR_COND]], !llvm.loop [[FLOAT_TBAA10]]
+; TUNIT:       [[FOR_COND4]]:
+; TUNIT-NEXT:    [[I3_0:%.*]] = phi i32 [ 0, %[[FOR_COND_CLEANUP]] ], [ [[INC16:%.*]], %[[FOR_BODY7:.*]] ]
 ; TUNIT-NEXT:    [[CMP5:%.*]] = icmp slt i32 [[I3_0]], 128
-; TUNIT-NEXT:    br i1 [[CMP5]], label [[FOR_BODY7]], label [[FOR_COND_CLEANUP6:%.*]]
-; TUNIT:       for.cond.cleanup6:
+; TUNIT-NEXT:    br i1 [[CMP5]], label %[[FOR_BODY7]], label %[[FOR_COND_CLEANUP6:.*]]
+; TUNIT:       [[FOR_COND_CLEANUP6]]:
 ; TUNIT-NEXT:    call void @llvm.lifetime.end.p0(ptr noalias nofree noundef nonnull align 16 captures(none) dereferenceable(512) [[BUF]]) #[[ATTR17]]
 ; TUNIT-NEXT:    ret void
-; TUNIT:       for.body7:
+; TUNIT:       [[FOR_BODY7]]:
 ; TUNIT-NEXT:    [[IDXPROM8:%.*]] = sext i32 [[I3_0]] to i64
 ; TUNIT-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds [128 x i32], ptr [[BUF]], i64 0, i64 [[IDXPROM8]]
 ; TUNIT-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX9]], align 4
@@ -2790,37 +2790,37 @@ define hidden void @no_propagation_of_unknown_index_access(ptr %in, ptr %out, i3
 ; TUNIT-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 [[IDXPROM8]]
 ; TUNIT-NEXT:    store i32 [[CONV]], ptr [[ARRAYIDX14]], align 4
 ; TUNIT-NEXT:    [[INC16]] = add nsw i32 [[I3_0]], 1
-; TUNIT-NEXT:    br label [[FOR_COND4]], !llvm.loop [[TBAA12]]
+; TUNIT-NEXT:    br label %[[FOR_COND4]], !llvm.loop [[INT_TBAA12]]
 ;
 ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite)
-; CGSCC-LABEL: define {{[^@]+}}@no_propagation_of_unknown_index_access
-; CGSCC-SAME: (ptr nofree readonly captures(none) [[IN:%.*]], ptr nofree writeonly captures(none) [[OUT:%.*]], i32 [[IDX:%.*]]) #[[ATTR13:[0-9]+]] {
-; CGSCC-NEXT:  entry:
+; CGSCC-LABEL: define hidden void @no_propagation_of_unknown_index_access(
+; CGSCC-SAME: ptr nofree readonly captures(none) [[IN:%.*]], ptr nofree writeonly captures(none) [[OUT:%.*]], i32 [[IDX:%.*]]) #[[ATTR13:[0-9]+]] {
+; CGSCC-NEXT:  [[ENTRY:.*]]:
 ; CGSCC-NEXT:    [[BUF:%.*]] = alloca [128 x i32], align 16
 ; CGSCC-NEXT:    call void @llvm.lifetime.start.p0(ptr noalias nofree noundef nonnull align 16 captures(none) dereferenceable(512) [[BUF]]) #[[ATTR20]]
-; CGSCC-NEXT:    br label [[FOR_COND:%.*]]
-; CGSCC:       for.cond:
-; CGSCC-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
+; CGSCC-NEXT:    br label %[[FOR_COND:.*]]
+; CGSCC:       [[FOR_COND]]:
+; CGSCC-NEXT:    [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY:.*]] ]
 ; CGSCC-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I_0]], 128
-; CGSCC-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
-; CGSCC:       for.cond.cleanup:
-; CGSCC-NEXT:    br label [[FOR_COND4:%.*]]
-; CGSCC:       for.body:
+; CGSCC-NEXT:    br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP:.*]]
+; CGSCC:       [[FOR_COND_CLEANUP]]:
+; CGSCC-NEXT:    br label %[[FOR_COND4:.*]]
+; CGSCC:       [[FOR_BODY]]:
 ; CGSCC-NEXT:    [[IDXPROM:%.*]] = sext i32 [[I_0]] to i64
 ; CGSCC-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[IN]], i64 [[IDXPROM]]
-; CGSCC-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CGSCC-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !invariant.load [[META32]]
 ; CGSCC-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds [128 x i32], ptr [[BUF]], i64 0, i64 [[IDXPROM]]
 ; CGSCC-NEXT:    store i32 [[TMP0]], ptr [[ARRAYIDX2]], align 4
 ; CGSCC-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
-; CGSCC-NEXT:    br label [[FOR_COND]], !llvm.loop [[TBAA10]]
-; CGSCC:       for.cond4:
-; CGSCC-NEXT:    [[I3_0:%.*]] = phi i32 [ 0, [[FOR_COND_CLEANUP]] ], [ [[INC16:%.*]], [[FOR_BODY7:%.*]] ]
+; CGSCC-NEXT:    br label %[[FOR_COND]], !llvm.loop [[FLOAT_TBAA10]]
+; CGSCC:       [[FOR_COND4]]:
+; CGSCC-NEXT:    [[I3_0:%.*]] = phi i32 [ 0, %[[FOR_COND_CLEANUP]] ], [ [[INC16:%.*]], %[[FOR_BODY7:.*]] ]
 ; CGSCC-NEXT:    [[CMP5:%.*]] = icmp slt i32 [[I3_0]], 128
-; CGSCC-NEXT:    br i1 [[CMP5]], label [[FOR_BODY7]], label [[FOR_COND_CLEANUP6:%.*]]
-; CGSCC:       for.cond.cleanup6:
+; CGSCC-NEXT:    br i1 [[CMP5]], label %[[FOR_BODY7]], label %[[FOR_COND_CLEANUP6:.*]]
+; CGSCC:       [[FOR_COND_CLEANUP6]]:
 ; CGSCC-NEXT:    call void @llvm.lifetime.end.p0(ptr noalias nofree noundef nonnull align 16 captures(none) dereferenceable(512) [[BUF]]) #[[ATTR20]]
 ; CGSCC-NEXT:    ret void
-; CGSCC:       for.body7:
+; CGSCC:       [[FOR_BODY7]]:
 ; CGSCC-NEXT:    [[IDXPROM8:%.*]] = sext i32 [[I3_0]] to i64
 ; CGSCC-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds [128 x i32], ptr [[BUF]], i64 0, i64 [[IDXPROM8]]
 ; CGSCC-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX9]], align 4
@@ -2832,7 +2832,7 @@ define hidden void @no_propagation_of_unknown_index_access(ptr %in, ptr %out, i3
 ; CGSCC-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 [[IDXPROM8]]
 ; CGSCC-NEXT:    store i32 [[CONV]], ptr [[ARRAYIDX14]], align 4
 ; CGSCC-NEXT:    [[INC16]] = add nsw i32 [[I3_0]], 1
-; CGSCC-NEXT:    br label [[FOR_COND4]], !llvm.loop [[TBAA12]]
+; CGSCC-NEXT:    br label %[[FOR_COND4]], !llvm.loop [[INT_TBAA12]]
 ;
 entry:
   %buf = alloca [128 x i32], align 16
@@ -2883,30 +2883,30 @@ for.body7:                                        ; preds = %for.cond4
 ; Ensure we do not return true.
 define internal i1 @alloca_non_unique(ptr %p, i32 %in, i1 %c) {
 ; TUNIT: Function Attrs: nofree nosync nounwind memory(argmem: readwrite)
-; TUNIT-LABEL: define {{[^@]+}}@alloca_non_unique
-; TUNIT-SAME: (ptr noalias nofree readonly align 4 captures(none) [[P:%.*]], i32 [[IN:%.*]], i1 noundef [[C:%.*]]) #[[ATTR12:[0-9]+]] {
+; TUNIT-LABEL: define internal i1 @alloca_non_unique(
+; TUNIT-SAME: ptr noalias nofree readonly align 4 captures(none) [[P:%.*]], i32 [[IN:%.*]], i1 noundef [[C:%.*]]) #[[ATTR12:[0-9]+]] {
 ; TUNIT-NEXT:    [[A:%.*]] = alloca i32, align 4
 ; TUNIT-NEXT:    store i32 [[IN]], ptr [[A]], align 4
-; TUNIT-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
-; TUNIT:       t:
+; TUNIT-NEXT:    br i1 [[C]], label %[[T:.*]], label %[[F:.*]]
+; TUNIT:       [[T]]:
 ; TUNIT-NEXT:    [[R:%.*]] = call i1 @alloca_non_unique(ptr noalias nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[A]], i32 noundef 42, i1 noundef false) #[[ATTR14:[0-9]+]]
 ; TUNIT-NEXT:    ret i1 [[R]]
-; TUNIT:       f:
-; TUNIT-NEXT:    [[L:%.*]] = load i32, ptr [[P]], align 4
+; TUNIT:       [[F]]:
+; TUNIT-NEXT:    [[L:%.*]] = load i32, ptr [[P]], align 4, !invariant.load [[META32]]
 ; TUNIT-NEXT:    [[CMP:%.*]] = icmp eq i32 [[IN]], [[L]]
 ; TUNIT-NEXT:    ret i1 [[CMP]]
 ;
 ; CGSCC: Function Attrs: nofree nosync nounwind memory(argmem: readwrite)
-; CGSCC-LABEL: define {{[^@]+}}@alloca_non_unique
-; CGSCC-SAME: (ptr noalias nofree readonly align 4 captures(none) [[P:%.*]], i32 [[IN:%.*]], i1 noundef [[C:%.*]]) #[[ATTR14:[0-9]+]] {
+; CGSCC-LABEL: define internal i1 @alloca_non_unique(
+; CGSCC-SAME: ptr noalias nofree readonly align 4 captures(none) [[P:%.*]], i32 [[IN:%.*]], i1 noundef [[C:%.*]]) #[[ATTR14:[0-9]+]] {
 ; CGSCC-NEXT:    [[A:%.*]] = alloca i32, align 4
 ; CGSCC-NEXT:    store i32 [[IN]], ptr [[A]], align 4
-; CGSCC-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
-; CGSCC:       t:
+; CGSCC-NEXT:    br i1 [[C]], label %[[T:.*]], label %[[F:.*]]
+; CGSCC:       [[T]]:
 ; CGSCC-NEXT:    [[R:%.*]] = call i1 @alloca_non_unique(ptr noalias nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[A]], i32 noundef 42, i1 noundef false) #[[ATTR17:[0-9]+]]
 ; CGSCC-NEXT:    ret i1 [[R]]
-; CGSCC:       f:
-; CGSCC-NEXT:    [[L:%.*]] = load i32, ptr [[P]], align 4
+; CGSCC:       [[F]]:
+; CGSCC-NEXT:    [[L:%.*]] = load i32, ptr [[P]], align 4, !invariant.load [[META32]]
 ; CGSCC-NEXT:    [[CMP:%.*]] = icmp eq i32 [[IN]], [[L]]
 ; CGSCC-NEXT:    ret i1 [[CMP]]
 ;
@@ -2925,14 +2925,14 @@ f:
 ; Ensure we do not return true.
 define i1 @alloca_non_unique_caller(i32 %in, i1 %c) {
 ; TUNIT: Function Attrs: nofree norecurse nosync nounwind memory(none)
-; TUNIT-LABEL: define {{[^@]+}}@alloca_non_unique_caller
-; TUNIT-SAME: (i32 [[IN:%.*]], i1 [[C:%.*]]) #[[ATTR13:[0-9]+]] {
+; TUNIT-LABEL: define i1 @alloca_non_unique_caller(
+; TUNIT-SAME: i32 [[IN:%.*]], i1 [[C:%.*]]) #[[ATTR13:[0-9]+]] {
 ; TUNIT-NEXT:    [[R:%.*]] = call i1 @alloca_non_unique(ptr undef, i32 [[IN]], i1 noundef [[C]]) #[[ATTR14]]
 ; TUNIT-NEXT:    ret i1 [[R]]
 ;
 ; CGSCC: Function Attrs: nofree nosync nounwind memory(none)
-; CGSCC-LABEL: define {{[^@]+}}@alloca_non_unique_caller
-; CGSCC-SAME: (i32 [[IN:%.*]], i1 noundef [[C:%.*]]) #[[ATTR15:[0-9]+]] {
+; CGSCC-LABEL: define i1 @alloca_non_unique_caller(
+; CGSCC-SAME: i32 [[IN:%.*]], i1 noundef [[C:%.*]]) #[[ATTR15:[0-9]+]] {
 ; CGSCC-NEXT:    [[R:%.*]] = call i1 @alloca_non_unique(ptr nofree undef, i32 [[IN]], i1 noundef [[C]]) #[[ATTR25:[0-9]+]]
 ; CGSCC-NEXT:    ret i1 [[R]]
 ;
@@ -2943,8 +2943,8 @@ define i1 @alloca_non_unique_caller(i32 %in, i1 %c) {
 ; Ensure we do not return %bad or %l, but %sel
 define i32 @scope_value_traversal(i32 %bad, i1 %c, i1 %c2) {
 ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
-; TUNIT-LABEL: define {{[^@]+}}@scope_value_traversal
-; TUNIT-SAME: (i32 [[BAD:%.*]], i1 [[C:%.*]], i1 [[C2:%.*]]) #[[ATTR4]] {
+; TUNIT-LABEL: define i32 @scope_value_traversal(
+; TUNIT-SAME: i32 [[BAD:%.*]], i1 [[C:%.*]], i1 [[C2:%.*]]) #[[ATTR4]] {
 ; TUNIT-NEXT:    [[A:%.*]] = alloca i32, align 4
 ; TUNIT-NEXT:    store i32 [[BAD]], ptr [[A]], align 4
 ; TUNIT-NEXT:    call void @scope_value_traversal_helper(ptr noalias nofree noundef nonnull align 4 captures(none) dereferenceable(4) [[A]], i1 [[C2]]) #[[ATTR22:[0-9]+]]
@@ -2953,8 +2953,8 @@ define i32 @scope_value_traversal(i32 %bad, i1 %c, i1 %c2) {
 ; TUNIT-NEXT:    ret i32 [[SEL]]
 ;
 ; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(none)
-; CGSCC-LABEL: define {{[^@]+}}@scope_value_traversal
-; CGSCC-SAME: (i32 [[BAD:%.*]], i1 [[C:%.*]], i1 [[C2:%.*]]) #[[ATTR16:[0-9]+]] {
+; CGSCC-LABEL: define i32 @scope_value_traversal(
+; CGSCC-SAME: i32 [[BAD:%.*]], i1 [[C:%.*]], i1 [[C2:%.*]]) #[[ATTR16:[0-9]+]] {
 ; CGSCC-NEXT:    [[A:%.*]] = alloca i32, align 4
 ; CGSCC-NEXT:    store i32 [[BAD]], ptr [[A]], align 4
 ; CGSCC-NEXT:    call void @scope_value_traversal_helper(ptr noalias nofree noundef nonnull align 4 captures(none) dereferenceable(4) [[A]], i1 [[C2]]) #[[ATTR26:[0-9]+]]
@@ -2972,16 +2972,16 @@ define i32 @scope_value_traversal(i32 %bad, i1 %c, i1 %c2) {
 
 define void @scope_value_traversal_helper(ptr %a, i1 %c) {
 ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite)
-; TUNIT-LABEL: define {{[^@]+}}@scope_value_traversal_helper
-; TUNIT-SAME: (ptr nofree noundef nonnull align 4 captures(none) dereferenceable(4) [[A:%.*]], i1 [[C:%.*]]) #[[ATTR1]] {
+; TUNIT-LABEL: define void @scope_value_traversal_helper(
+; TUNIT-SAME: ptr nofree noundef nonnull align 4 captures(none) dereferenceable(4) [[A:%.*]], i1 [[C:%.*]]) #[[ATTR1]] {
 ; TUNIT-NEXT:    [[L:%.*]] = load i32, ptr [[A]], align 4
 ; TUNIT-NEXT:    [[SEL:%.*]] = select i1 [[C]], i32 [[L]], i32 42
 ; TUNIT-NEXT:    store i32 [[SEL]], ptr [[A]], align 4
 ; TUNIT-NEXT:    ret void
 ;
 ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite)
-; CGSCC-LABEL: define {{[^@]+}}@scope_value_traversal_helper
-; CGSCC-SAME: (ptr nofree noundef nonnull align 4 captures(none) dereferenceable(4) [[A:%.*]], i1 [[C:%.*]]) #[[ATTR13]] {
+; CGSCC-LABEL: define void @scope_value_traversal_helper(
+; CGSCC-SAME: ptr nofree noundef nonnull align 4 captures(none) dereferenceable(4) [[A:%.*]], i1 [[C:%.*]]) #[[ATTR13]] {
 ; CGSCC-NEXT:    [[L:%.*]] = load i32, ptr [[A]], align 4
 ; CGSCC-NEXT:    [[SEL:%.*]] = select i1 [[C]], i32 [[L]], i32 42
 ; CGSCC-NEXT:    store i32 [[SEL]], ptr [[A]], align 4
@@ -2995,9 +2995,9 @@ define void @scope_value_traversal_helper(ptr %a, i1 %c) {
 
 define i8 @gep_index_from_binary_operator(i1 %cnd1, i1 %cnd2) {
 ; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
-; CHECK-LABEL: define {{[^@]+}}@gep_index_from_binary_operator
-; CHECK-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) #[[ATTR4]] {
-; CHECK-NEXT:  entry:
+; CHECK-LABEL: define noundef i8 @gep_index_from_binary_operator(
+; CHECK-SAME: i1 [[CND1:%.*]], i1 [[CND2:%.*]]) #[[ATTR4]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[BYTES:%.*]] = alloca [1024 x i8], align 16
 ; CHECK-NEXT:    [[GEP_FIXED:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 12
 ; CHECK-NEXT:    ret i8 100
@@ -3014,9 +3014,9 @@ entry:
 
 define i8 @gep_index_from_memory(i1 %cnd1, i1 %cnd2) {
 ; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
-; CHECK-LABEL: define {{[^@]+}}@gep_index_from_memory
-; CHECK-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) #[[ATTR4]] {
-; CHECK-NEXT:  entry:
+; CHECK-LABEL: define i8 @gep_index_from_memory(
+; CHECK-SAME: i1 [[CND1:%.*]], i1 [[CND2:%.*]]) #[[ATTR4]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[BYTES:%.*]] = alloca [1024 x i8], align 16
 ; CHECK-NEXT:    [[GEP_LOADED:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 12
 ; CHECK-NEXT:    ret i8 100
@@ -3040,27 +3040,27 @@ entry:
 ; Ensure this is not flattened to return 3
 define i32 @a(i1 %c) {
 ; TUNIT: Function Attrs: nofree nosync nounwind
-; TUNIT-LABEL: define {{[^@]+}}@a
-; TUNIT-SAME: (i1 noundef [[C:%.*]]) #[[ATTR14]] {
+; TUNIT-LABEL: define noundef i32 @a(
+; TUNIT-SAME: i1 noundef [[C:%.*]]) #[[ATTR14]] {
 ; TUNIT-NEXT:    store i32 3, ptr @G, align 4
-; TUNIT-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
-; TUNIT:       t:
+; TUNIT-NEXT:    br i1 [[C]], label %[[T:.*]], label %[[F:.*]]
+; TUNIT:       [[T]]:
 ; TUNIT-NEXT:    [[REC:%.*]] = call i32 @a(i1 noundef false) #[[ATTR14]]
-; TUNIT-NEXT:    br label [[F]]
-; TUNIT:       f:
+; TUNIT-NEXT:    br label %[[F]]
+; TUNIT:       [[F]]:
 ; TUNIT-NEXT:    [[R:%.*]] = load i32, ptr @G, align 4
 ; TUNIT-NEXT:    store i32 5, ptr @G, align 4
 ; TUNIT-NEXT:    ret i32 [[R]]
 ;
 ; CGSCC: Function Attrs: nofree nosync nounwind
-; CGSCC-LABEL: define {{[^@]+}}@a
-; CGSCC-SAME: (i1 noundef [[C:%.*]]) #[[ATTR17]] {
+; CGSCC-LABEL: define noundef i32 @a(
+; CGSCC-SAME: i1 noundef [[C:%.*]]) #[[ATTR17]] {
 ; CGSCC-NEXT:    store i32 3, ptr @G, align 4
-; CGSCC-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
-; CGSCC:       t:
+; CGSCC-NEXT:    br i1 [[C]], label %[[T:.*]], label %[[F:.*]]
+; CGSCC:       [[T]]:
 ; CGSCC-NEXT:    [[REC:%.*]] = call i32 @a(i1 noundef false) #[[ATTR17]]
-; CGSCC-NEXT:    br label [[F]]
-; CGSCC:       f:
+; CGSCC-NEXT:    br label %[[F]]
+; CGSCC:       [[F]]:
 ; CGSCC-NEXT:    [[R:%.*]] = load i32, ptr @G, align 4
 ; CGSCC-NEXT:    store i32 5, ptr @G, align 4
 ; CGSCC-NEXT:    ret i32 [[R]]
@@ -3081,22 +3081,22 @@ f:
 @GC = internal global i32 undef, align 4
 define void @atomicrmw(ptr %p, i32 %i, i1 %cnd) {
 ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn
-; TUNIT-LABEL: define {{[^@]+}}@atomicrmw
-; TUNIT-SAME: (ptr nofree [[P:%.*]], i32 [[I:%.*]], i1 noundef [[CND:%.*]]) #[[ATTR3]] {
-; TUNIT-NEXT:    br i1 [[CND]], label [[T:%.*]], label [[M:%.*]]
-; TUNIT:       t:
-; TUNIT-NEXT:    br label [[M]]
-; TUNIT:       m:
+; TUNIT-LABEL: define void @atomicrmw(
+; TUNIT-SAME: ptr nofree [[P:%.*]], i32 [[I:%.*]], i1 noundef [[CND:%.*]]) #[[ATTR3]] {
+; TUNIT-NEXT:    br i1 [[CND]], label %[[T:.*]], label %[[M:.*]]
+; TUNIT:       [[T]]:
+; TUNIT-NEXT:    br label %[[M]]
+; TUNIT:       [[M]]:
 ; TUNIT-NEXT:    [[ARMW:%.*]] = atomicrmw add ptr @GC, i32 [[I]] monotonic, align 4
 ; TUNIT-NEXT:    ret void
 ;
 ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn
-; CGSCC-LABEL: define {{[^@]+}}@atomicrmw
-; CGSCC-SAME: (ptr nofree [[P:%.*]], i32 [[I:%.*]], i1 noundef [[CND:%.*]]) #[[ATTR5]] {
-; CGSCC-NEXT:    br i1 [[CND]], label [[T:%.*]], label [[M:%.*]]
-; CGSCC:       t:
-; CGSCC-NEXT:    br label [[M]]
-; CGSCC:       m:
+; CGSCC-LABEL: define void @atomicrmw(
+; CGSCC-SAME: ptr nofree [[P:%.*]], i32 [[I:%.*]], i1 noundef [[CND:%.*]]) #[[ATTR5]] {
+; CGSCC-NEXT:    br i1 [[CND]], label %[[T:.*]], label %[[M:.*]]
+; CGSCC:       [[T]]:
+; CGSCC-NEXT:    br label %[[M]]
+; CGSCC:       [[M]]:
 ; CGSCC-NEXT:    [[ARMW:%.*]] = atomicrmw add ptr @GC, i32 [[I]] monotonic, align 4
 ; CGSCC-NEXT:    ret void
 ;
@@ -3123,24 +3123,24 @@ m:
 
 define i32 @recSimplify(i32 %v, i1 %cond) {
 ; TUNIT: Function Attrs: nofree nosync nounwind
-; TUNIT-LABEL: define {{[^@]+}}@recSimplify
-; TUNIT-SAME: (i32 [[V:%.*]], i1 noundef [[COND:%.*]]) #[[ATTR14]] {
-; TUNIT-NEXT:    br i1 [[COND]], label [[REC:%.*]], label [[COMP:%.*]]
-; TUNIT:       rec:
+; TUNIT-LABEL: define i32 @recSimplify(
+; TUNIT-SAME: i32 [[V:%.*]], i1 noundef [[COND:%.*]]) #[[ATTR14]] {
+; TUNIT-NEXT:    br i1 [[COND]], label %[[REC:.*]], label %[[COMP:.*]]
+; TUNIT:       [[REC]]:
 ; TUNIT-NEXT:    [[RV:%.*]] = call i32 @recSimplify(i32 undef, i1 noundef false) #[[ATTR14]]
 ; TUNIT-NEXT:    ret i32 1
-; TUNIT:       comp:
+; TUNIT:       [[COMP]]:
 ; TUNIT-NEXT:    store i32 1, ptr @GRS2, align 4
 ; TUNIT-NEXT:    ret i32 1
 ;
 ; CGSCC: Function Attrs: nofree nosync nounwind
-; CGSCC-LABEL: define {{[^@]+}}@recSimplify
-; CGSCC-SAME: (i32 [[V:%.*]], i1 noundef [[COND:%.*]]) #[[ATTR17]] {
-; CGSCC-NEXT:    br i1 [[COND]], label [[REC:%.*]], label [[COMP:%.*]]
-; CGSCC:       rec:
+; CGSCC-LABEL: define i32 @recSimplify(
+; CGSCC-SAME: i32 [[V:%.*]], i1 noundef [[COND:%.*]]) #[[ATTR17]] {
+; CGSCC-NEXT:    br i1 [[COND]], label %[[REC:.*]], label %[[COMP:.*]]
+; CGSCC:       [[REC]]:
 ; CGSCC-NEXT:    [[RV:%.*]] = call i32 @recSimplify(i32 [[V]], i1 noundef false) #[[ATTR17]]
 ; CGSCC-NEXT:    ret i32 [[RV]]
-; CGSCC:       comp:
+; CGSCC:       [[COMP]]:
 ; CGSCC-NEXT:    store i32 [[V]], ptr @GRS, align 4
 ; CGSCC-NEXT:    store i32 1, ptr @GRS2, align 4
 ; CGSCC-NEXT:    [[L:%.*]] = load i32, ptr @GRS, align 4
@@ -3167,8 +3167,8 @@ comp:
 
 define internal i32 @recSimplify2() {
 ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(read)
-; CGSCC-LABEL: define {{[^@]+}}@recSimplify2
-; CGSCC-SAME: () #[[ATTR7]] {
+; CGSCC-LABEL: define internal i32 @recSimplify2(
+; CGSCC-SAME: ) #[[ATTR7]] {
 ; CGSCC-NEXT:    [[R:%.*]] = load i32, ptr @GRS, align 4
 ; CGSCC-NEXT:    ret i32 [[R]]
 ;
@@ -3179,18 +3179,18 @@ define internal i32 @recSimplify2() {
 ; Verify we do not return 10.
 define i32 @may_access_after_return(i32 noundef %N, i32 noundef %M) {
 ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
-; TUNIT-LABEL: define {{[^@]+}}@may_access_after_return
-; TUNIT-SAME: (i32 noundef [[N:%.*]], i32 noundef [[M:%.*]]) #[[ATTR4]] {
-; TUNIT-NEXT:  entry:
+; TUNIT-LABEL: define noundef i32 @may_access_after_return(
+; TUNIT-SAME: i32 noundef [[N:%.*]], i32 noundef [[M:%.*]]) #[[ATTR4]] {
+; TUNIT-NEXT:  [[ENTRY:.*:]]
 ; TUNIT-NEXT:    [[A:%.*]] = alloca i32, align 4
 ; TUNIT-NEXT:    [[B:%.*]] = alloca i32, align 4
 ; TUNIT-NEXT:    call void @write_both(ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(4) [[A]], ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(4) [[B]]) #[[ATTR18]]
 ; TUNIT-NEXT:    ret i32 8
 ;
 ; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(none)
-; CGSCC-LABEL: define {{[^@]+}}@may_access_after_return
-; CGSCC-SAME: (i32 noundef [[N:%.*]], i32 noundef [[M:%.*]]) #[[ATTR16]] {
-; CGSCC-NEXT:  entry:
+; CGSCC-LABEL: define i32 @may_access_after_return(
+; CGSCC-SAME: i32 noundef [[N:%.*]], i32 noundef [[M:%.*]]) #[[ATTR16]] {
+; CGSCC-NEXT:  [[ENTRY:.*:]]
 ; CGSCC-NEXT:    [[A:%.*]] = alloca i32, align 4
 ; CGSCC-NEXT:    [[B:%.*]] = alloca i32, align 4
 ; CGSCC-NEXT:    call void @write_both(ptr noalias nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(4) [[A]], ptr noalias nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(4) [[B]]) #[[ATTR21]]
@@ -3213,9 +3213,9 @@ entry:
 
 define internal void @write_both(ptr noundef %Q, ptr noundef %R) {
 ; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write)
-; CHECK-LABEL: define {{[^@]+}}@write_both
-; CHECK-SAME: (ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(4) [[Q:%.*]], ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(4) [[R:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:  entry:
+; CHECK-LABEL: define internal void @write_both(
+; CHECK-SAME: ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(4) [[Q:%.*]], ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(4) [[R:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    store i32 3, ptr [[Q]], align 4
 ; CHECK-NEXT:    store i32 5, ptr [[R]], align 4
 ; CHECK-NEXT:    ret void
@@ -3228,9 +3228,9 @@ entry:
 
 define internal ptr @passthrough(ptr noundef %P) {
 ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
-; CGSCC-LABEL: define {{[^@]+}}@passthrough
-; CGSCC-SAME: (ptr noalias nofree noundef nonnull readnone returned align 4 dereferenceable(4) "no-capture-maybe-returned" [[P:%.*]]) #[[ATTR4]] {
-; CGSCC-NEXT:  entry:
+; CGSCC-LABEL: define internal noundef nonnull align 4 dereferenceable(4) ptr @passthrough(
+; CGSCC-SAME: ptr noalias nofree noundef nonnull readnone returned align 4 dereferenceable(4) "no-capture-maybe-returned" [[P:%.*]]) #[[ATTR4]] {
+; CGSCC-NEXT:  [[ENTRY:.*:]]
 ; CGSCC-NEXT:    ret ptr [[P]]
 ;
 entry:
@@ -3240,9 +3240,9 @@ entry:
 ; Verify we do not return 10.
 define i32 @may_access_after_return_choice(i32 noundef %N, i32 noundef %M, i1 %c) {
 ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
-; TUNIT-LABEL: define {{[^@]+}}@may_access_after_return_choice
-; TUNIT-SAME: (i32 noundef [[N:%.*]], i32 noundef [[M:%.*]], i1 [[C:%.*]]) #[[ATTR4]] {
-; TUNIT-NEXT:  entry:
+; TUNIT-LABEL: define noundef i32 @may_access_after_return_choice(
+; TUNIT-SAME: i32 noundef [[N:%.*]], i32 noundef [[M:%.*]], i1 [[C:%.*]]) #[[ATTR4]] {
+; TUNIT-NEXT:  [[ENTRY:.*:]]
 ; TUNIT-NEXT:    [[A:%.*]] = alloca i32, align 4
 ; TUNIT-NEXT:    [[B:%.*]] = alloca i32, align 4
 ; TUNIT-NEXT:    [[CALL:%.*]] = call nonnull align 4 dereferenceable(4) ptr @passthrough_choice(i1 [[C]], ptr noalias nofree noundef nonnull readnone align 4 dereferenceable(4) "no-capture-maybe-returned" [[A]], ptr noalias nofree noundef nonnull readnone align 4 dereferenceable(4) "no-capture-maybe-returned" [[B]]) #[[ATTR23:[0-9]+]]
@@ -3254,9 +3254,9 @@ define i32 @may_access_after_return_choice(i32 noundef %N, i32 noundef %M, i1 %c
 ; TUNIT-NEXT:    ret i32 [[ADD]]
 ;
 ; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn
-; CGSCC-LABEL: define {{[^@]+}}@may_access_after_return_choice
-; CGSCC-SAME: (i32 noundef [[N:%.*]], i32 noundef [[M:%.*]], i1 [[C:%.*]]) #[[ATTR3]] {
-; CGSCC-NEXT:  entry:
+; CGSCC-LABEL: define i32 @may_access_after_return_choice(
+; CGSCC-SAME: i32 noundef [[N:%.*]], i32 noundef [[M:%.*]], i1 [[C:%.*]]) #[[ATTR3]] {
+; CGSCC-NEXT:  [[ENTRY:.*:]]
 ; CGSCC-NEXT:    [[A:%.*]] = alloca i32, align 4
 ; CGSCC-NEXT:    [[B:%.*]] = alloca i32, align 4
 ; CGSCC-NEXT:    [[CALL:%.*]] = call nonnull align 4 dereferenceable(4) ptr @passthrough_choice(i1 [[C]], ptr noalias nofree noundef nonnull readnone align 4 dereferenceable(4) [[A]], ptr noalias nofree noundef nonnull readnone align 4 dereferenceable(4) [[B]]) #[[ATTR28:[0-9]+]]
@@ -3281,9 +3281,9 @@ entry:
 
 define internal ptr @passthrough_choice(i1 %c, ptr noundef %P, ptr noundef %Q) {
 ; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
-; CHECK-LABEL: define {{[^@]+}}@passthrough_choice
-; CHECK-SAME: (i1 [[C:%.*]], ptr noalias nofree noundef nonnull readnone align 4 dereferenceable(4) "no-capture-maybe-returned" [[P:%.*]], ptr noalias nofree noundef nonnull readnone align 4 dereferenceable(4) "no-capture-maybe-returned" [[Q:%.*]]) #[[ATTR4]] {
-; CHECK-NEXT:  entry:
+; CHECK-LABEL: define internal noundef nonnull align 4 dereferenceable(4) ptr @passthrough_choice(
+; CHECK-SAME: i1 [[C:%.*]], ptr noalias nofree noundef nonnull readnone align 4 dereferenceable(4) "no-capture-maybe-returned" [[P:%.*]], ptr noalias nofree noundef nonnull readnone align 4 dereferenceable(4) "no-capture-maybe-returned" [[Q:%.*]]) #[[ATTR4]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[R:%.*]] = select i1 [[C]], ptr [[P]], ptr [[Q]]
 ; CHECK-NEXT:    ret ptr [[R]]
 ;
@@ -3295,18 +3295,18 @@ entry:
 ; Verify we do not return 10.
 define i32 @may_access_after_return_no_choice1(i32 noundef %N, i32 noundef %M) {
 ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
-; TUNIT-LABEL: define {{[^@]+}}@may_access_after_return_no_choice1
-; TUNIT-SAME: (i32 noundef [[N:%.*]], i32 noundef [[M:%.*]]) #[[ATTR4]] {
-; TUNIT-NEXT:  entry:
+; TUNIT-LABEL: define noundef i32 @may_access_after_return_no_choice1(
+; TUNIT-SAME: i32 noundef [[N:%.*]], i32 noundef [[M:%.*]]) #[[ATTR4]] {
+; TUNIT-NEXT:  [[ENTRY:.*:]]
 ; TUNIT-NEXT:    [[A:%.*]] = alloca i32, align 4
 ; TUNIT-NEXT:    [[B:%.*]] = alloca i32, align 4
 ; TUNIT-NEXT:    call void @write_both(ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(4) [[A]], ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(4) [[B]]) #[[ATTR18]]
 ; TUNIT-NEXT:    ret i32 8
 ;
 ; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(none)
-; CGSCC-LABEL: define {{[^@]+}}@may_access_after_return_no_choice1
-; CGSCC-SAME: (i32 noundef [[N:%.*]], i32 noundef [[M:%.*]]) #[[ATTR16]] {
-; CGSCC-NEXT:  entry:
+; CGSCC-LABEL: define i32 @may_access_after_return_no_choice1(
+; CGSCC-SAME: i32 noundef [[N:%.*]], i32 noundef [[M:%.*]]) #[[ATTR16]] {
+; CGSCC-NEXT:  [[ENTRY:.*:]]
 ; CGSCC-NEXT:    [[A:%.*]] = alloca i32, align 4
 ; CGSCC-NEXT:    [[B:%.*]] = alloca i32, align 4
 ; CGSCC-NEXT:    call void @write_both(ptr noalias nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(4) [[A]], ptr noalias nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(4) [[B]]) #[[ATTR21]]
@@ -3330,18 +3330,18 @@ entry:
 ; Verify we do not return 10.
 define i32 @may_access_after_return_no_choice2(i32 noundef %N, i32 noundef %M) {
 ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
-; TUNIT-LABEL: define {{[^@]+}}@may_access_after_return_no_choice2
-; TUNIT-SAME: (i32 noundef [[N:%.*]], i32 noundef [[M:%.*]]) #[[ATTR4]] {
-; TUNIT-NEXT:  entry:
+; TUNIT-LABEL: define noundef i32 @may_access_after_return_no_choice2(
+; TUNIT-SAME: i32 noundef [[N:%.*]], i32 noundef [[M:%.*]]) #[[ATTR4]] {
+; TUNIT-NEXT:  [[ENTRY:.*:]]
 ; TUNIT-NEXT:    [[A:%.*]] = alloca i32, align 4
 ; TUNIT-NEXT:    [[B:%.*]] = alloca i32, align 4
 ; TUNIT-NEXT:    call void @write_both(ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(4) [[B]], ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(4) [[A]]) #[[ATTR18]]
 ; TUNIT-NEXT:    ret i32 8
 ;
 ; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(none)
-; CGSCC-LABEL: define {{[^@]+}}@may_access_after_return_no_choice2
-; CGSCC-SAME: (i32 noundef [[N:%.*]], i32 noundef [[M:%.*]]) #[[ATTR16]] {
-; CGSCC-NEXT:  entry:
+; CGSCC-LABEL: define i32 @may_access_after_return_no_choice2(
+; CGSCC-SAME: i32 noundef [[N:%.*]], i32 noundef [[M:%.*]]) #[[ATTR16]] {
+; CGSCC-NEXT:  [[ENTRY:.*:]]
 ; CGSCC-NEXT:    [[A:%.*]] = alloca i32, align 4
 ; CGSCC-NEXT:    [[B:%.*]] = alloca i32, align 4
 ; CGSCC-NEXT:    call void @write_both(ptr noalias nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(4) [[B]], ptr noalias nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(4) [[A]]) #[[ATTR21]]
@@ -3364,9 +3364,9 @@ entry:
 
 define internal ptr @passthrough_no_choice_true(i1 %c, ptr noundef %P, ptr noundef %Q) {
 ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
-; CGSCC-LABEL: define {{[^@]+}}@passthrough_no_choice_true
-; CGSCC-SAME: (ptr noalias nofree noundef nonnull readnone returned align 4 dereferenceable(4) "no-capture-maybe-returned" [[P:%.*]], i32 [[TMP0:%.*]]) #[[ATTR4]] {
-; CGSCC-NEXT:  entry:
+; CGSCC-LABEL: define internal noundef nonnull align 4 dereferenceable(4) ptr @passthrough_no_choice_true(
+; CGSCC-SAME: ptr noalias nofree noundef nonnull readnone returned align 4 dereferenceable(4) "no-capture-maybe-returned" [[P:%.*]], i32 [[TMP0:%.*]]) #[[ATTR4]] {
+; CGSCC-NEXT:  [[ENTRY:.*:]]
 ; CGSCC-NEXT:    [[Q_PRIV:%.*]] = alloca i32, align 4
 ; CGSCC-NEXT:    store i32 [[TMP0]], ptr [[Q_PRIV]], align 4
 ; CGSCC-NEXT:    ret ptr [[P]]
@@ -3377,9 +3377,9 @@ entry:
 }
 define internal ptr @passthrough_no_choice_false(i1 %c, ptr noundef %P, ptr noundef %Q) {
 ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
-; CGSCC-LABEL: define {{[^@]+}}@passthrough_no_choice_false
-; CGSCC-SAME: (i32 [[TMP0:%.*]], ptr noalias nofree noundef nonnull readnone returned align 4 dereferenceable(4) "no-capture-maybe-returned" [[Q:%.*]]) #[[ATTR4]] {
-; CGSCC-NEXT:  entry:
+; CGSCC-LABEL: define internal noundef nonnull align 4 dereferenceable(4) ptr @passthrough_no_choice_false(
+; CGSCC-SAME: i32 [[TMP0:%.*]], ptr noalias nofree noundef nonnull readnone returned align 4 dereferenceable(4) "no-capture-maybe-returned" [[Q:%.*]]) #[[ATTR4]] {
+; CGSCC-NEXT:  [[ENTRY:.*:]]
 ; CGSCC-NEXT:    [[P_PRIV:%.*]] = alloca i32, align 4
 ; CGSCC-NEXT:    store i32 [[TMP0]], ptr [[P_PRIV]], align 4
 ; CGSCC-NEXT:    ret ptr [[Q]]
@@ -3391,8 +3391,8 @@ entry:
 
 define ptr @move2(ptr %p) {
 ; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
-; CHECK-LABEL: define {{[^@]+}}@move2
-; CHECK-SAME: (ptr nofree readnone "no-capture-maybe-returned" [[P:%.*]]) #[[ATTR4]] {
+; CHECK-LABEL: define ptr @move2(
+; CHECK-SAME: ptr nofree readnone "no-capture-maybe-returned" [[P:%.*]]) #[[ATTR4]] {
 ; CHECK-NEXT:    [[G:%.*]] = getelementptr i8, ptr [[P]], i32 2
 ; CHECK-NEXT:    ret ptr [[G]]
 ;
@@ -3401,8 +3401,8 @@ define ptr @move2(ptr %p) {
 }
 define internal ptr @move4(ptr %p) {
 ; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
-; CHECK-LABEL: define {{[^@]+}}@move4
-; CHECK-SAME: (ptr noalias nofree readnone "no-capture-maybe-returned" [[P:%.*]]) #[[ATTR4]] {
+; CHECK-LABEL: define internal ptr @move4(
+; CHECK-SAME: ptr noalias nofree readnone "no-capture-maybe-returned" [[P:%.*]]) #[[ATTR4]] {
 ; CHECK-NEXT:    [[G:%.*]] = getelementptr i8, ptr [[P]], i32 4
 ; CHECK-NEXT:    ret ptr [[G]]
 ;
@@ -3412,20 +3412,20 @@ define internal ptr @move4(ptr %p) {
 
 define ptr @move246(i32 %i, ptr %p) {
 ; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
-; CHECK-LABEL: define {{[^@]+}}@move246
-; CHECK-SAME: (i32 [[I:%.*]], ptr nofree readnone "no-capture-maybe-returned" [[P:%.*]]) #[[ATTR4]] {
+; CHECK-LABEL: define ptr @move246(
+; CHECK-SAME: i32 [[I:%.*]], ptr nofree readnone "no-capture-maybe-returned" [[P:%.*]]) #[[ATTR4]] {
 ; CHECK-NEXT:    [[C0:%.*]] = icmp eq i32 [[I]], 0
-; CHECK-NEXT:    br i1 [[C0]], label [[BG2:%.*]], label [[BG46:%.*]]
-; CHECK:       bg2:
+; CHECK-NEXT:    br i1 [[C0]], label %[[BG2:.*]], label %[[BG46:.*]]
+; CHECK:       [[BG2]]:
 ; CHECK-NEXT:    [[G2:%.*]] = getelementptr i8, ptr [[P]], i32 2
 ; CHECK-NEXT:    ret ptr [[G2]]
-; CHECK:       bg46:
+; CHECK:       [[BG46]]:
 ; CHECK-NEXT:    [[C1:%.*]] = icmp eq i32 [[I]], 1
-; CHECK-NEXT:    br i1 [[C1]], label [[BG4:%.*]], label [[BG6:%.*]]
-; CHECK:       bg4:
+; CHECK-NEXT:    br i1 [[C1]], label %[[BG4:.*]], label %[[BG6:.*]]
+; CHECK:       [[BG4]]:
 ; CHECK-NEXT:    [[G4:%.*]] = getelementptr i8, ptr [[P]], i32 4
 ; CHECK-NEXT:    ret ptr [[G4]]
-; CHECK:       bg6:
+; CHECK:       [[BG6]]:
 ; CHECK-NEXT:    [[G6:%.*]] = getelementptr i8, ptr [[P]], i32 6
 ; CHECK-NEXT:    ret ptr [[G6]]
 ;
@@ -3448,7 +3448,7 @@ bg6:
 declare void @use3i8(i8, i8, i8)
 
 define void @returnedPtrAccesses() {
-; TUNIT-LABEL: define {{[^@]+}}@returnedPtrAccesses() {
+; TUNIT-LABEL: define void @returnedPtrAccesses() {
 ; TUNIT-NEXT:    [[A:%.*]] = alloca i64, align 8
 ; TUNIT-NEXT:    [[A2:%.*]] = call ptr @move2(ptr noalias nofree noundef nonnull readnone align 8 dereferenceable(8) "no-capture-maybe-returned" [[A]]) #[[ATTR23]]
 ; TUNIT-NEXT:    [[A4:%.*]] = call ptr @move4(ptr noalias nofree noundef nonnull readnone align 8 dereferenceable(8) "no-capture-maybe-returned" [[A]]) #[[ATTR23]]
@@ -3459,7 +3459,7 @@ define void @returnedPtrAccesses() {
 ; TUNIT-NEXT:    call void @use3i8(i8 2, i8 4, i8 6)
 ; TUNIT-NEXT:    ret void
 ;
-; CGSCC-LABEL: define {{[^@]+}}@returnedPtrAccesses() {
+; CGSCC-LABEL: define void @returnedPtrAccesses() {
 ; CGSCC-NEXT:    [[A:%.*]] = alloca i64, align 8
 ; CGSCC-NEXT:    [[A2:%.*]] = call nonnull dereferenceable(1) ptr @move2(ptr noalias nofree noundef nonnull readnone align 8 dereferenceable(8) [[A]]) #[[ATTR20]]
 ; CGSCC-NEXT:    [[A4:%.*]] = call ptr @move4(ptr noalias nofree noundef nonnull readnone align 8 dereferenceable(8) [[A]]) #[[ATTR20]]
@@ -3494,16 +3494,16 @@ define void @returnedPtrAccesses() {
 }
 
 define void @returnedPtrAccessesMultiple(i32 %i) {
-; TUNIT-LABEL: define {{[^@]+}}@returnedPtrAccessesMultiple
-; TUNIT-SAME: (i32 [[I:%.*]]) {
+; TUNIT-LABEL: define void @returnedPtrAccessesMultiple(
+; TUNIT-SAME: i32 [[I:%.*]]) {
 ; TUNIT-NEXT:    [[A:%.*]] = alloca i64, align 8
 ; TUNIT-NEXT:    [[AP:%.*]] = call ptr @move246(i32 [[I]], ptr noalias nofree noundef nonnull readnone align 8 dereferenceable(8) "no-capture-maybe-returned" [[A]]) #[[ATTR23]]
 ; TUNIT-NEXT:    store i8 2, ptr [[AP]], align 1
 ; TUNIT-NEXT:    call void @use3i8(i8 2, i8 2, i8 2)
 ; TUNIT-NEXT:    ret void
 ;
-; CGSCC-LABEL: define {{[^@]+}}@returnedPtrAccessesMultiple
-; CGSCC-SAME: (i32 [[I:%.*]]) {
+; CGSCC-LABEL: define void @returnedPtrAccessesMultiple(
+; CGSCC-SAME: i32 [[I:%.*]]) {
 ; CGSCC-NEXT:    [[A:%.*]] = alloca i64, align 8
 ; CGSCC-NEXT:    [[AP:%.*]] = call ptr @move246(i32 [[I]], ptr noalias nofree noundef nonnull readnone align 8 dereferenceable(8) [[A]]) #[[ATTR20]]
 ; CGSCC-NEXT:    [[G2:%.*]] = getelementptr i8, ptr [[A]], i32 2
@@ -3530,8 +3530,8 @@ define void @returnedPtrAccessesMultiple(i32 %i) {
 }
 
 define void @returnedPtrAccessesMultiple2(i32 %i) {
-; TUNIT-LABEL: define {{[^@]+}}@returnedPtrAccessesMultiple2
-; TUNIT-SAME: (i32 [[I:%.*]]) {
+; TUNIT-LABEL: define void @returnedPtrAccessesMultiple2(
+; TUNIT-SAME: i32 [[I:%.*]]) {
 ; TUNIT-NEXT:    [[A:%.*]] = alloca i64, align 8
 ; TUNIT-NEXT:    [[G2:%.*]] = getelementptr i8, ptr [[A]], i32 2
 ; TUNIT-NEXT:    [[G4:%.*]] = getelementptr i8, ptr [[A]], i32 4
@@ -3547,8 +3547,8 @@ define void @returnedPtrAccessesMultiple2(i32 %i) {
 ; TUNIT-NEXT:    call void @use3i8(i8 noundef [[L2]], i8 noundef [[L4]], i8 noundef [[L6]])
 ; TUNIT-NEXT:    ret void
 ;
-; CGSCC-LABEL: define {{[^@]+}}@returnedPtrAccessesMultiple2
-; CGSCC-SAME: (i32 [[I:%.*]]) {
+; CGSCC-LABEL: define void @returnedPtrAccessesMultiple2(
+; CGSCC-SAME: i32 [[I:%.*]]) {
 ; CGSCC-NEXT:    [[A:%.*]] = alloca i64, align 8
 ; CGSCC-NEXT:    [[G2:%.*]] = getelementptr i8, ptr [[A]], i32 2
 ; CGSCC-NEXT:    [[G4:%.*]] = getelementptr i8, ptr [[A]], i32 4
@@ -3677,57 +3677,58 @@ declare void @llvm.assume(i1 noundef)
 ; TUNIT: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
 ; TUNIT: [[META1:![0-9]+]] = !{i32 7, !"uwtable", i32 1}
 ; TUNIT: [[META2:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
-; TUNIT: [[TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0}
+; TUNIT: [[INT_TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0}
 ; TUNIT: [[META4]] = !{!"int", [[META5:![0-9]+]], i64 0}
 ; TUNIT: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0}
 ; TUNIT: [[META6]] = !{!"Simple C/C++ TBAA"}
-; TUNIT: [[TBAA7]] = !{[[META8:![0-9]+]], [[META9:![0-9]+]], i64 12}
+; TUNIT: [[FLOAT_TBAA7]] = !{[[META8:![0-9]+]], [[META9:![0-9]+]], i64 12}
 ; TUNIT: [[META8]] = !{!"S", [[META4]], i64 0, [[META4]], i64 4, [[META4]], i64 8, [[META9]], i64 12, [[META9]], i64 16, [[META9]], i64 20}
 ; TUNIT: [[META9]] = !{!"float", [[META5]], i64 0}
-; TUNIT: [[TBAA10]] = !{[[META8]], [[META9]], i64 16}
-; TUNIT: [[TBAA11]] = !{[[META8]], [[META9]], i64 20}
-; TUNIT: [[TBAA12]] = !{[[META8]], [[META4]], i64 0}
-; TUNIT: [[TBAA13]] = !{[[META8]], [[META4]], i64 4}
-; TUNIT: [[TBAA14]] = !{[[META8]], [[META4]], i64 8}
+; TUNIT: [[FLOAT_TBAA10]] = !{[[META8]], [[META9]], i64 16}
+; TUNIT: [[FLOAT_TBAA11]] = !{[[META8]], [[META9]], i64 20}
+; TUNIT: [[INT_TBAA12]] = !{[[META8]], [[META4]], i64 0}
+; TUNIT: [[INT_TBAA13]] = !{[[META8]], [[META4]], i64 4}
+; TUNIT: [[INT_TBAA14]] = !{[[META8]], [[META4]], i64 8}
 ; TUNIT: [[LOOP15]] = distinct !{[[LOOP15]], [[META16:![0-9]+]]}
 ; TUNIT: [[META16]] = !{!"llvm.loop.mustprogress"}
 ; TUNIT: [[LOOP17]] = distinct !{[[LOOP17]], [[META16]]}
 ; TUNIT: [[LOOP18]] = distinct !{[[LOOP18]], [[META16]]}
-; TUNIT: [[TBAA19]] = !{[[META5]], [[META5]], i64 0}
+; TUNIT: [[CHAR_TBAA19]] = !{[[META5]], [[META5]], i64 0}
 ; TUNIT: [[LOOP20]] = distinct !{[[LOOP20]], [[META16]]}
 ; TUNIT: [[LOOP21]] = distinct !{[[LOOP21]], [[META16]]}
 ; TUNIT: [[LOOP22]] = distinct !{[[LOOP22]], [[META16]]}
 ; TUNIT: [[LOOP23]] = distinct !{[[LOOP23]], [[META16]]}
 ; TUNIT: [[LOOP24]] = distinct !{[[LOOP24]], [[META16]]}
 ; TUNIT: [[LOOP25]] = distinct !{[[LOOP25]], [[META16]]}
-; TUNIT: [[TBAA26]] = !{[[META9]], [[META9]], i64 0}
+; TUNIT: [[FLOAT_TBAA26]] = !{[[META9]], [[META9]], i64 0}
 ; TUNIT: [[LOOP27]] = distinct !{[[LOOP27]], [[META16]]}
-; TUNIT: [[TBAA28]] = !{[[META29:![0-9]+]], [[META29]], i64 0}
+; TUNIT: [[LONG_LONG_TBAA28]] = !{[[META29:![0-9]+]], [[META29]], i64 0}
 ; TUNIT: [[META29]] = !{!"long long", [[META5]], i64 0}
 ; TUNIT: [[LOOP30]] = distinct !{[[LOOP30]], [[META16]]}
 ; TUNIT: [[LOOP31]] = distinct !{[[LOOP31]], [[META16]]}
+; TUNIT: [[META32]] = !{}
 ;.
 ; CGSCC: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
 ; CGSCC: [[META1:![0-9]+]] = !{i32 7, !"uwtable", i32 1}
 ; CGSCC: [[META2:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
-; CGSCC: [[TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0}
+; CGSCC: [[INT_TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0}
 ; CGSCC: [[META4]] = !{!"int", [[META5:![0-9]+]], i64 0}
 ; CGSCC: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0}
 ; CGSCC: [[META6]] = !{!"Simple C/C++ TBAA"}
-; CGSCC: [[TBAA7]] = !{[[META8:![0-9]+]], [[META9:![0-9]+]], i64 12}
+; CGSCC: [[FLOAT_TBAA7]] = !{[[META8:![0-9]+]], [[META9:![0-9]+]], i64 12}
 ; CGSCC: [[META8]] = !{!"S", [[META4]], i64 0, [[META4]], i64 4, [[META4]], i64 8, [[META9]], i64 12, [[META9]], i64 16, [[META9]], i64 20}
 ; CGSCC: [[META9]] = !{!"float", [[META5]], i64 0}
-; CGSCC: [[TBAA10]] = !{[[META8]], [[META9]], i64 16}
-; CGSCC: [[TBAA11]] = !{[[META8]], [[META9]], i64 20}
-; CGSCC: [[TBAA12]] = !{[[META8]], [[META4]], i64 0}
-; CGSCC: [[TBAA13]] = !{[[META8]], [[META4]], i64 4}
-; CGSCC: [[TBAA14]] = !{[[META8]], [[META4]], i64 8}
-; CGSCC: [[TBAA15]] = !{[[META5]], [[META5]], i64 0}
+; CGSCC: [[FLOAT_TBAA10]] = !{[[META8]], [[META9]], i64 16}
+; CGSCC: [[FLOAT_TBAA11]] = !{[[META8]], [[META9]], i64 20}
+; CGSCC: [[INT_TBAA12]] = !{[[META8]], [[META4]], i64 0}
+; CGSCC: [[INT_TBAA13]] = !{[[META8]], [[META4]], i64 4}
+; CGSCC: [[INT_TBAA14]] = !{[[META8]], [[META4]], i64 8}
+; CGSCC: [[CHAR_TBAA15]] = !{[[META5]], [[META5]], i64 0}
 ; CGSCC: [[LOOP16]] = distinct !{[[LOOP16]], [[META17:![0-9]+]]}
 ; CGSCC: [[META17]] = !{!"llvm.loop.mustprogress"}
-; CGSCC: [[TBAA18]] = !{[[META9]], [[META9]], i64 0}
+; CGSCC: [[FLOAT_TBAA18]] = !{[[META9]], [[META9]], i64 0}
 ; CGSCC: [[LOOP19]] = distinct !{[[LOOP19]], [[META17]]}
-; CGSCC: [[TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0}
+; CGSCC: [[LONG_LONG_TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0}
 ; CGSCC: [[META21]] = !{!"long long", [[META5]], i64 0}
 ; CGSCC: [[LOOP22]] = distinct !{[[LOOP22]], [[META17]]}
 ; CGSCC: [[LOOP23]] = distinct !{[[LOOP23]], [[META17]]}
@@ -3739,4 +3740,5 @@ declare void @llvm.assume(i1 noundef)
 ; CGSCC: [[LOOP29]] = distinct !{[[LOOP29]], [[META17]]}
 ; CGSCC: [[LOOP30]] = distinct !{[[LOOP30]], [[META17]]}
 ; CGSCC: [[LOOP31]] = distinct !{[[LOOP31]], [[META17]]}
+; CGSCC: [[META32]] = !{}
 ;.
diff --git a/llvm/test/Transforms/GVN/PRE/load-pre-nonlocal.ll b/llvm/test/Transforms/GVN/PRE/load-pre-nonlocal.ll
index 9dba73a..7348df3 100644
--- a/llvm/test/Transforms/GVN/PRE/load-pre-nonlocal.ll
+++ b/llvm/test/Transforms/GVN/PRE/load-pre-nonlocal.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt -S -o - -passes=gvn %s | FileCheck %s --check-prefixes=CHECK,MDEP
 ; RUN: opt -S -o - -passes='gvn<memoryssa>' %s | FileCheck %s --check-prefixes=CHECK,MSSA
 
@@ -13,32 +13,33 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 ; Check that GVN doesn't determine %2 is partially redundant.
 
 define i32 @volatile_load(i32 %n) {
-; CHECK-LABEL: @volatile_load(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CMP6:%.*]] = icmp sgt i32 [[N:%.*]], 0
-; CHECK-NEXT:    br i1 [[CMP6]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]]
-; CHECK:       for.body.lr.ph:
-; CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr @a2, align 8, !tbaa [[TBAA5:![0-9]+]]
-; CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr @a, align 8, !tbaa [[TBAA5]]
-; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
-; CHECK:       for.body:
-; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[S_09:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[P_08:%.*]] = phi ptr [ [[TMP0]], [[FOR_BODY_LR_PH]] ], [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[P_08]], align 4, !tbaa [[TBAA9:![0-9]+]]
+; CHECK-LABEL: define i32 @volatile_load(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[CMP6:%.*]] = icmp sgt i32 [[N]], 0
+; CHECK-NEXT:    br i1 [[CMP6]], label %[[FOR_BODY_LR_PH:.*]], label %[[FOR_END:.*]]
+; CHECK:       [[FOR_BODY_LR_PH]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr @a2, align 8, !tbaa [[ANYPTR_TBAA5:![0-9]+]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr @a, align 8, !tbaa [[ANYPTR_TBAA5]]
+; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
+; CHECK:       [[FOR_BODY]]:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[FOR_BODY_LR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT:    [[S_09:%.*]] = phi i32 [ 0, %[[FOR_BODY_LR_PH]] ], [ [[ADD:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT:    [[P_08:%.*]] = phi ptr [ [[TMP0]], %[[FOR_BODY_LR_PH]] ], [ [[INCDEC_PTR:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[P_08]], align 4, !tbaa [[INT_TBAA9:![0-9]+]]
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    store i32 [[TMP2]], ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA9]]
-; CHECK-NEXT:    [[TMP3:%.*]] = load volatile i32, ptr [[P_08]], align 4, !tbaa [[TBAA9]]
+; CHECK-NEXT:    store i32 [[TMP2]], ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA9]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load volatile i32, ptr [[P_08]], align 4, !tbaa [[INT_TBAA9]]
 ; CHECK-NEXT:    [[ADD]] = add nsw i32 [[TMP3]], [[S_09]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds i32, ptr [[P_08]], i64 1
 ; CHECK-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i32 [[LFTR_WIDEIV]], [[N]]
-; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]]
-; CHECK:       for.cond.for.end_crit_edge:
-; CHECK-NEXT:    br label [[FOR_END]]
-; CHECK:       for.end:
-; CHECK-NEXT:    [[S_0_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_COND_FOR_END_CRIT_EDGE]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label %[[FOR_BODY]], label %[[FOR_COND_FOR_END_CRIT_EDGE:.*]]
+; CHECK:       [[FOR_COND_FOR_END_CRIT_EDGE]]:
+; CHECK-NEXT:    br label %[[FOR_END]]
+; CHECK:       [[FOR_END]]:
+; CHECK-NEXT:    [[S_0_LCSSA:%.*]] = phi i32 [ [[ADD]], %[[FOR_COND_FOR_END_CRIT_EDGE]] ], [ 0, %[[ENTRY]] ]
 ; CHECK-NEXT:    ret i32 [[S_0_LCSSA]]
 ;
 entry:
@@ -78,22 +79,23 @@ for.end:
 ; But we should not widen %0 to 64-bit load.
 
 define i32 @overaligned_load(i32 %a, ptr nocapture %b) !dbg !13 {
-; CHECK-LABEL: @overaligned_load(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[A:%.*]], 0, !dbg [[DBG14:![0-9]+]]
-; CHECK-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]], !dbg [[DBG14]]
-; CHECK:       if.then:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr @s1, align 8, !dbg [[DBG15:![0-9]+]], !tbaa [[TBAA9]]
-; CHECK-NEXT:    br label [[IF_END:%.*]], !dbg [[DBG15]]
-; CHECK:       if.else:
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 2, !dbg [[DBG16:![0-9]+]]
-; CHECK-NEXT:    store i32 10, ptr [[ARRAYIDX]], align 4, !dbg [[DBG16]], !tbaa [[TBAA9]]
-; CHECK-NEXT:    br label [[IF_END]], !dbg [[DBG16]]
-; CHECK:       if.end:
-; CHECK-NEXT:    [[I_0:%.*]] = phi i32 [ [[TMP0]], [[IF_THEN]] ], [ 0, [[IF_ELSE]] ]
-; CHECK-NEXT:    [[P_0:%.*]] = phi ptr [ @s1, [[IF_THEN]] ], [ [[B]], [[IF_ELSE]] ]
+; CHECK-LABEL: define i32 @overaligned_load(
+; CHECK-SAME: i32 [[A:%.*]], ptr captures(none) [[B:%.*]]) !dbg [[DBG11:![0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[A]], 0, !dbg [[DBG14:![0-9]+]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_ELSE:.*]], !dbg [[DBG14]]
+; CHECK:       [[IF_THEN]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr @s1, align 8, !dbg [[DBG15:![0-9]+]], !tbaa [[INT_TBAA9]]
+; CHECK-NEXT:    br label %[[IF_END:.*]], !dbg [[DBG15]]
+; CHECK:       [[IF_ELSE]]:
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 2, !dbg [[DBG16:![0-9]+]]
+; CHECK-NEXT:    store i32 10, ptr [[ARRAYIDX]], align 4, !dbg [[DBG16]], !tbaa [[INT_TBAA9]]
+; CHECK-NEXT:    br label %[[IF_END]], !dbg [[DBG16]]
+; CHECK:       [[IF_END]]:
+; CHECK-NEXT:    [[I_0:%.*]] = phi i32 [ [[TMP0]], %[[IF_THEN]] ], [ 0, %[[IF_ELSE]] ]
+; CHECK-NEXT:    [[P_0:%.*]] = phi ptr [ @s1, %[[IF_THEN]] ], [ [[B]], %[[IF_ELSE]] ]
 ; CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds i32, ptr [[P_0]], i64 1, !dbg [[DBG17:![0-9]+]]
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ADD_PTR]], align 4, !dbg [[DBG17]], !tbaa [[TBAA9]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ADD_PTR]], align 4, !dbg [[DBG17]], !tbaa [[INT_TBAA9]]
 ; CHECK-NEXT:    [[ADD1:%.*]] = add nsw i32 [[TMP1]], [[I_0]], !dbg [[DBG17]]
 ; CHECK-NEXT:    ret i32 [[ADD1]], !dbg [[DBG17]]
 ;
@@ -144,6 +146,23 @@ if.end:
   file: !12,
   isOptimized: true, flags: "-O2",
   splitDebugFilename: "abc.debug", emissionKind: 2)
+;.
+; CHECK: [[META3:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C99, file: [[META4:![0-9]+]], producer: "clang", isOptimized: true, flags: "-O2", runtimeVersion: 0, splitDebugFilename: "abc.debug", emissionKind: LineTablesOnly)
+; CHECK: [[META4]] = !DIFile(filename: "{{.*}}test.cpp", directory: {{.*}})
+; CHECK: [[ANYPTR_TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0}
+; CHECK: [[META6]] = !{!"any pointer", [[META7:![0-9]+]], i64 0}
+; CHECK: [[META7]] = !{!"omnipotent char", [[META8:![0-9]+]], i64 0}
+; CHECK: [[META8]] = !{!"Simple C/C++ TBAA"}
+; CHECK: [[INT_TBAA9]] = !{[[META10:![0-9]+]], [[META10]], i64 0}
+; CHECK: [[META10]] = !{!"int", [[META7]], i64 0}
+; CHECK: [[DBG11]] = distinct !DISubprogram(name: "test", scope: [[META4]], file: [[META4]], line: 99, type: [[META12:![0-9]+]], scopeLine: 100, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: [[META3]], retainedNodes: [[META13:![0-9]+]])
+; CHECK: [[META12]] = !DISubroutineType(types: [[META13]])
+; CHECK: [[META13]] = !{}
+; CHECK: [[DBG14]] = !DILocation(line: 100, column: 1, scope: [[DBG11]])
+; CHECK: [[DBG15]] = !DILocation(line: 101, column: 1, scope: [[DBG11]])
+; CHECK: [[DBG16]] = !DILocation(line: 102, column: 1, scope: [[DBG11]])
+; CHECK: [[DBG17]] = !DILocation(line: 103, column: 1, scope: [[DBG11]])
+;.
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; MDEP: {{.*}}
 ; MSSA: {{.*}}
diff --git a/llvm/test/Transforms/GVN/PRE/preserve-tbaa.ll b/llvm/test/Transforms/GVN/PRE/preserve-tbaa.ll
index abbb17f..49ee089 100644
--- a/llvm/test/Transforms/GVN/PRE/preserve-tbaa.ll
+++ b/llvm/test/Transforms/GVN/PRE/preserve-tbaa.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt -passes=gvn -S < %s | FileCheck %s --check-prefixes=CHECK,MDEP
 ; RUN: opt -passes='gvn<memoryssa>' -S < %s | FileCheck %s --check-prefixes=CHECK,MSSA
 
@@ -12,12 +12,12 @@ define void @test(ptr %P, ptr %Q, i1 %arg) nounwind {
 ; MDEP-NEXT:  [[ENTRY:.*:]]
 ; MDEP-NEXT:    br i1 [[ARG]], label %[[BB_NPH:.*]], label %[[FOR_END:.*]]
 ; MDEP:       [[BB_NPH]]:
-; MDEP-NEXT:    [[TMP33_PRE:%.*]] = load i16, ptr [[P]], align 2, !tbaa [[TBAA0:![0-9]+]]
+; MDEP-NEXT:    [[TMP33_PRE:%.*]] = load i16, ptr [[P]], align 2, !tbaa [[SHORT_TBAA0:![0-9]+]]
 ; MDEP-NEXT:    br label %[[FOR_BODY:.*]]
 ; MDEP:       [[FOR_BODY]]:
 ; MDEP-NEXT:    [[TMP33:%.*]] = phi i16 [ 0, %[[FOR_BODY]] ], [ [[TMP33_PRE]], %[[BB_NPH]] ]
 ; MDEP-NEXT:    store i16 [[TMP33]], ptr [[Q]], align 2
-; MDEP-NEXT:    store i16 0, ptr [[P]], align 2, !tbaa [[TBAA0]]
+; MDEP-NEXT:    store i16 0, ptr [[P]], align 2, !tbaa [[SHORT_TBAA0]]
 ; MDEP-NEXT:    br i1 false, label %[[FOR_BODY_FOR_END_CRIT_EDGE:.*]], label %[[FOR_BODY]]
 ; MDEP:       [[FOR_BODY_FOR_END_CRIT_EDGE]]:
 ; MDEP-NEXT:    br label %[[FOR_END]]
@@ -31,9 +31,9 @@ define void @test(ptr %P, ptr %Q, i1 %arg) nounwind {
 ; MSSA:       [[BB_NPH]]:
 ; MSSA-NEXT:    br label %[[FOR_BODY:.*]]
 ; MSSA:       [[FOR_BODY]]:
-; MSSA-NEXT:    [[TMP33:%.*]] = load i16, ptr [[P]], align 2, !tbaa [[TBAA0:![0-9]+]]
+; MSSA-NEXT:    [[TMP33:%.*]] = load i16, ptr [[P]], align 2, !tbaa [[SHORT_TBAA0:![0-9]+]]
 ; MSSA-NEXT:    store i16 [[TMP33]], ptr [[Q]], align 2
-; MSSA-NEXT:    store i16 0, ptr [[P]], align 2, !tbaa [[TBAA0]]
+; MSSA-NEXT:    store i16 0, ptr [[P]], align 2, !tbaa [[SHORT_TBAA0]]
 ; MSSA-NEXT:    br i1 false, label %[[FOR_BODY_FOR_END_CRIT_EDGE:.*]], label %[[FOR_BODY]]
 ; MSSA:       [[FOR_BODY_FOR_END_CRIT_EDGE]]:
 ; MSSA-NEXT:    br label %[[FOR_END]]
@@ -62,12 +62,12 @@ for.end:                                          ; preds = %for.body, %entry
 !2 = !{!"Simple C/C++ TBAA"}
 !3 = !{!"short", !1}
 ;.
-; MDEP: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
+; MDEP: [[SHORT_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
 ; MDEP: [[META1]] = !{!"short", [[META2:![0-9]+]]}
 ; MDEP: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]]}
 ; MDEP: [[META3]] = !{!"Simple C/C++ TBAA"}
 ;.
-; MSSA: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
+; MSSA: [[SHORT_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
 ; MSSA: [[META1]] = !{!"short", [[META2:![0-9]+]]}
 ; MSSA: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]]}
 ; MSSA: [[META3]] = !{!"Simple C/C++ TBAA"}
diff --git a/llvm/test/Transforms/GVN/pr33549.ll b/llvm/test/Transforms/GVN/pr33549.ll
index e0d7712..a8ce37c 100644
--- a/llvm/test/Transforms/GVN/pr33549.ll
+++ b/llvm/test/Transforms/GVN/pr33549.ll
@@ -1,41 +1,42 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt -passes=gvn -S < %s | FileCheck %s
 
 @Data = common local_unnamed_addr global [32 x i32] zeroinitializer, align 4
 
 ; Function Attrs: norecurse nounwind
 define void @testshl() local_unnamed_addr #0 {
-; CHECK-LABEL: @testshl(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
-; CHECK:       for.body:
-; CHECK-NEXT:    [[K_031:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[INC9:%.*]], [[FOR_INC8:%.*]] ]
+; CHECK-LABEL: define void @testshl(
+; CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
+; CHECK:       [[FOR_BODY]]:
+; CHECK-NEXT:    [[K_031:%.*]] = phi i32 [ 1, %[[ENTRY]] ], [ [[INC9:%.*]], %[[FOR_INC8:.*]] ]
 ; CHECK-NEXT:    [[SHL:%.*]] = shl i32 1, [[K_031]]
 ; CHECK-NEXT:    [[SHR:%.*]] = ashr exact i32 [[SHL]], 1
 ; CHECK-NEXT:    [[CMP229:%.*]] = icmp slt i32 [[SHL]], 64
-; CHECK-NEXT:    br i1 [[CMP229]], label [[FOR_BODY3_PREHEADER:%.*]], label [[FOR_INC8]]
-; CHECK:       for.body3.preheader:
+; CHECK-NEXT:    br i1 [[CMP229]], label %[[FOR_BODY3_PREHEADER:.*]], label %[[FOR_INC8]]
+; CHECK:       [[FOR_BODY3_PREHEADER]]:
 ; CHECK-NEXT:    [[DIV:%.*]] = sdiv i32 [[SHR]], 2
-; CHECK-NEXT:    br label [[FOR_BODY3:%.*]]
-; CHECK:       for.body3:
-; CHECK-NEXT:    [[I_030:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY3]] ], [ [[DIV]], [[FOR_BODY3_PREHEADER]] ]
+; CHECK-NEXT:    br label %[[FOR_BODY3:.*]]
+; CHECK:       [[FOR_BODY3]]:
+; CHECK-NEXT:    [[I_030:%.*]] = phi i32 [ [[INC:%.*]], %[[FOR_BODY3]] ], [ [[DIV]], %[[FOR_BODY3_PREHEADER]] ]
 ; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[I_030]], [[SHR]]
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [32 x i32], ptr @Data, i32 0, i32 [[ADD]]
 ; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds [32 x i32], ptr @Data, i32 0, i32 [[I_030]]
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, [[TBAA3:!tbaa !.*]]
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4, [[TBAA3]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA3:![0-9]+]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4, !tbaa [[INT_TBAA3]]
 ; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP1]], [[TMP0]]
-; CHECK-NEXT:    store i32 [[SUB]], ptr [[ARRAYIDX]], align 4, [[TBAA3]]
+; CHECK-NEXT:    store i32 [[SUB]], ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA3]]
 ; CHECK-NEXT:    [[ADD7:%.*]] = add nsw i32 [[TMP1]], [[TMP0]]
-; CHECK-NEXT:    store i32 [[ADD7]], ptr [[ARRAYIDX4]], align 4, [[TBAA3]]
+; CHECK-NEXT:    store i32 [[ADD7]], ptr [[ARRAYIDX4]], align 4, !tbaa [[INT_TBAA3]]
 ; CHECK-NEXT:    [[INC]] = add nsw i32 [[I_030]], 1
 ; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I_030]], 15
-; CHECK-NEXT:    br i1 [[CMP2]], label [[FOR_BODY3]], label [[FOR_INC8]]
-; CHECK:       for.inc8:
+; CHECK-NEXT:    br i1 [[CMP2]], label %[[FOR_BODY3]], label %[[FOR_INC8]]
+; CHECK:       [[FOR_INC8]]:
 ; CHECK-NEXT:    [[INC9]] = add nuw nsw i32 [[K_031]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC9]], 8
-; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END10:%.*]], label [[FOR_BODY]]
-; CHECK:       for.end10:
+; CHECK-NEXT:    br i1 [[EXITCOND]], label %[[FOR_END10:.*]], label %[[FOR_BODY]]
+; CHECK:       [[FOR_END10]]:
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -89,3 +90,9 @@ attributes #0 = { norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="fa
 !4 = !{!"int", !5, i64 0}
 !5 = !{!"omnipotent char", !6, i64 0}
 !6 = !{!"Simple C/C++ TBAA"}
+;.
+; CHECK: [[INT_TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0}
+; CHECK: [[META4]] = !{!"int", [[META5:![0-9]+]], i64 0}
+; CHECK: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0}
+; CHECK: [[META6]] = !{!"Simple C/C++ TBAA"}
+;.
diff --git a/llvm/test/Transforms/GVN/pr64598.ll b/llvm/test/Transforms/GVN/pr64598.ll
index 902af98..80a9198 100644
--- a/llvm/test/Transforms/GVN/pr64598.ll
+++ b/llvm/test/Transforms/GVN/pr64598.ll
@@ -1,61 +1,61 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt -S -passes=gvn < %s | FileCheck %s
 
 define i32 @main(i64 %x, ptr %d, ptr noalias %p) {
-; CHECK-LABEL: define i32 @main
-; CHECK-SAME: (i64 [[X:%.*]], ptr [[D:%.*]], ptr noalias [[P:%.*]]) {
-; CHECK-NEXT:  entry:
+; CHECK-LABEL: define i32 @main(
+; CHECK-SAME: i64 [[X:%.*]], ptr [[D:%.*]], ptr noalias [[P:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
 ; CHECK-NEXT:    [[T1_PRE_PRE_PRE:%.*]] = load ptr, ptr [[P]], align 8
-; CHECK-NEXT:    [[T2_PRE_PRE_PRE:%.*]] = load ptr, ptr [[T1_PRE_PRE_PRE]], align 8, !tbaa [[TBAA0:![0-9]+]]
+; CHECK-NEXT:    [[T2_PRE_PRE_PRE:%.*]] = load ptr, ptr [[T1_PRE_PRE_PRE]], align 8, !tbaa [[ANYPTR_TBAA0:![0-9]+]]
 ; CHECK-NEXT:    [[T3_PRE_PRE_PRE:%.*]] = load ptr, ptr [[T2_PRE_PRE_PRE]], align 8
-; CHECK-NEXT:    br label [[LOOP:%.*]]
-; CHECK:       loop:
-; CHECK-NEXT:    [[T2_PRE_PRE:%.*]] = phi ptr [ [[T2_PRE_PRE23:%.*]], [[LOOP_LATCH:%.*]] ], [ [[T2_PRE_PRE_PRE]], [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[T1_PRE_PRE:%.*]] = phi ptr [ [[T1_PRE_PRE19:%.*]], [[LOOP_LATCH]] ], [ [[T1_PRE_PRE_PRE]], [[ENTRY]] ]
-; CHECK-NEXT:    br label [[LOOP2:%.*]]
-; CHECK:       loop2:
-; CHECK-NEXT:    [[T2_PRE_PRE25:%.*]] = phi ptr [ [[T2_PRE_PRE23]], [[LOOP2_LATCH_LOOP2_CRIT_EDGE:%.*]] ], [ [[T2_PRE_PRE]], [[LOOP]] ]
-; CHECK-NEXT:    [[T1_PRE_PRE21:%.*]] = phi ptr [ [[T1_PRE_PRE19]], [[LOOP2_LATCH_LOOP2_CRIT_EDGE]] ], [ [[T1_PRE_PRE]], [[LOOP]] ]
-; CHECK-NEXT:    [[T3_PRE:%.*]] = phi ptr [ [[T3_PRE16:%.*]], [[LOOP2_LATCH_LOOP2_CRIT_EDGE]] ], [ [[T3_PRE_PRE_PRE]], [[LOOP]] ]
-; CHECK-NEXT:    [[T2_PRE:%.*]] = phi ptr [ [[T2_PRE13:%.*]], [[LOOP2_LATCH_LOOP2_CRIT_EDGE]] ], [ [[T2_PRE_PRE]], [[LOOP]] ]
-; CHECK-NEXT:    [[T1_PRE:%.*]] = phi ptr [ [[T1_PRE10:%.*]], [[LOOP2_LATCH_LOOP2_CRIT_EDGE]] ], [ [[T1_PRE_PRE]], [[LOOP]] ]
-; CHECK-NEXT:    br label [[LOOP3:%.*]]
-; CHECK:       loop3:
-; CHECK-NEXT:    [[T2_PRE_PRE24:%.*]] = phi ptr [ [[T2_PRE_PRE23]], [[LOOP3_LATCH:%.*]] ], [ [[T2_PRE_PRE25]], [[LOOP2]] ]
-; CHECK-NEXT:    [[T1_PRE_PRE20:%.*]] = phi ptr [ [[T1_PRE_PRE19]], [[LOOP3_LATCH]] ], [ [[T1_PRE_PRE21]], [[LOOP2]] ]
-; CHECK-NEXT:    [[T3_PRE17:%.*]] = phi ptr [ [[T3_PRE16]], [[LOOP3_LATCH]] ], [ [[T3_PRE]], [[LOOP2]] ]
-; CHECK-NEXT:    [[T2_PRE14:%.*]] = phi ptr [ [[T2_PRE13]], [[LOOP3_LATCH]] ], [ [[T2_PRE]], [[LOOP2]] ]
-; CHECK-NEXT:    [[T1_PRE11:%.*]] = phi ptr [ [[T1_PRE10]], [[LOOP3_LATCH]] ], [ [[T1_PRE]], [[LOOP2]] ]
-; CHECK-NEXT:    [[T78:%.*]] = phi ptr [ [[T7:%.*]], [[LOOP3_LATCH]] ], [ [[T3_PRE]], [[LOOP2]] ]
-; CHECK-NEXT:    [[T66:%.*]] = phi ptr [ [[T6:%.*]], [[LOOP3_LATCH]] ], [ [[T2_PRE]], [[LOOP2]] ]
-; CHECK-NEXT:    [[T54:%.*]] = phi ptr [ [[T5:%.*]], [[LOOP3_LATCH]] ], [ [[T1_PRE]], [[LOOP2]] ]
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[T2_PRE_PRE:%.*]] = phi ptr [ [[T2_PRE_PRE23:%.*]], %[[LOOP_LATCH:.*]] ], [ [[T2_PRE_PRE_PRE]], %[[ENTRY]] ]
+; CHECK-NEXT:    [[T1_PRE_PRE:%.*]] = phi ptr [ [[T1_PRE_PRE19:%.*]], %[[LOOP_LATCH]] ], [ [[T1_PRE_PRE_PRE]], %[[ENTRY]] ]
+; CHECK-NEXT:    br label %[[LOOP2:.*]]
+; CHECK:       [[LOOP2]]:
+; CHECK-NEXT:    [[T2_PRE_PRE25:%.*]] = phi ptr [ [[T2_PRE_PRE23]], %[[LOOP2_LATCH_LOOP2_CRIT_EDGE:.*]] ], [ [[T2_PRE_PRE]], %[[LOOP]] ]
+; CHECK-NEXT:    [[T1_PRE_PRE21:%.*]] = phi ptr [ [[T1_PRE_PRE19]], %[[LOOP2_LATCH_LOOP2_CRIT_EDGE]] ], [ [[T1_PRE_PRE]], %[[LOOP]] ]
+; CHECK-NEXT:    [[T3_PRE:%.*]] = phi ptr [ [[T3_PRE16:%.*]], %[[LOOP2_LATCH_LOOP2_CRIT_EDGE]] ], [ [[T3_PRE_PRE_PRE]], %[[LOOP]] ]
+; CHECK-NEXT:    [[T2_PRE:%.*]] = phi ptr [ [[T2_PRE13:%.*]], %[[LOOP2_LATCH_LOOP2_CRIT_EDGE]] ], [ [[T2_PRE_PRE]], %[[LOOP]] ]
+; CHECK-NEXT:    [[T1_PRE:%.*]] = phi ptr [ [[T1_PRE10:%.*]], %[[LOOP2_LATCH_LOOP2_CRIT_EDGE]] ], [ [[T1_PRE_PRE]], %[[LOOP]] ]
+; CHECK-NEXT:    br label %[[LOOP3:.*]]
+; CHECK:       [[LOOP3]]:
+; CHECK-NEXT:    [[T2_PRE_PRE24:%.*]] = phi ptr [ [[T2_PRE_PRE23]], %[[LOOP3_LATCH:.*]] ], [ [[T2_PRE_PRE25]], %[[LOOP2]] ]
+; CHECK-NEXT:    [[T1_PRE_PRE20:%.*]] = phi ptr [ [[T1_PRE_PRE19]], %[[LOOP3_LATCH]] ], [ [[T1_PRE_PRE21]], %[[LOOP2]] ]
+; CHECK-NEXT:    [[T3_PRE17:%.*]] = phi ptr [ [[T3_PRE16]], %[[LOOP3_LATCH]] ], [ [[T3_PRE]], %[[LOOP2]] ]
+; CHECK-NEXT:    [[T2_PRE14:%.*]] = phi ptr [ [[T2_PRE13]], %[[LOOP3_LATCH]] ], [ [[T2_PRE]], %[[LOOP2]] ]
+; CHECK-NEXT:    [[T1_PRE11:%.*]] = phi ptr [ [[T1_PRE10]], %[[LOOP3_LATCH]] ], [ [[T1_PRE]], %[[LOOP2]] ]
+; CHECK-NEXT:    [[T78:%.*]] = phi ptr [ [[T7:%.*]], %[[LOOP3_LATCH]] ], [ [[T3_PRE]], %[[LOOP2]] ]
+; CHECK-NEXT:    [[T66:%.*]] = phi ptr [ [[T6:%.*]], %[[LOOP3_LATCH]] ], [ [[T2_PRE]], %[[LOOP2]] ]
+; CHECK-NEXT:    [[T54:%.*]] = phi ptr [ [[T5:%.*]], %[[LOOP3_LATCH]] ], [ [[T1_PRE]], %[[LOOP2]] ]
 ; CHECK-NEXT:    [[TOBOOL_NOT2_I:%.*]] = icmp eq i64 [[X]], 0
-; CHECK-NEXT:    br i1 false, label [[LOOP3_LOOP3_LATCH_CRIT_EDGE:%.*]], label [[FOR_BODY_LR_PH_I:%.*]]
-; CHECK:       loop3.loop3.latch_crit_edge:
-; CHECK-NEXT:    br label [[LOOP3_LATCH]]
-; CHECK:       for.body.lr.ph.i:
+; CHECK-NEXT:    br i1 false, label %[[LOOP3_LOOP3_LATCH_CRIT_EDGE:.*]], label %[[FOR_BODY_LR_PH_I:.*]]
+; CHECK:       [[LOOP3_LOOP3_LATCH_CRIT_EDGE]]:
+; CHECK-NEXT:    br label %[[LOOP3_LATCH]]
+; CHECK:       [[FOR_BODY_LR_PH_I]]:
 ; CHECK-NEXT:    store i32 0, ptr [[P]], align 4
 ; CHECK-NEXT:    [[T5_PRE:%.*]] = load ptr, ptr [[P]], align 8
-; CHECK-NEXT:    [[T6_PRE:%.*]] = load ptr, ptr [[T5_PRE]], align 8, !tbaa [[TBAA0]]
+; CHECK-NEXT:    [[T6_PRE:%.*]] = load ptr, ptr [[T5_PRE]], align 8, !tbaa [[ANYPTR_TBAA0]]
 ; CHECK-NEXT:    [[T7_PRE:%.*]] = load ptr, ptr [[T6_PRE]], align 8
-; CHECK-NEXT:    br label [[LOOP3_LATCH]]
-; CHECK:       loop3.latch:
-; CHECK-NEXT:    [[T2_PRE_PRE23]] = phi ptr [ [[T2_PRE_PRE24]], [[LOOP3_LOOP3_LATCH_CRIT_EDGE]] ], [ [[T6_PRE]], [[FOR_BODY_LR_PH_I]] ]
-; CHECK-NEXT:    [[T1_PRE_PRE19]] = phi ptr [ [[T1_PRE_PRE20]], [[LOOP3_LOOP3_LATCH_CRIT_EDGE]] ], [ [[T5_PRE]], [[FOR_BODY_LR_PH_I]] ]
-; CHECK-NEXT:    [[T3_PRE16]] = phi ptr [ [[T3_PRE17]], [[LOOP3_LOOP3_LATCH_CRIT_EDGE]] ], [ [[T7_PRE]], [[FOR_BODY_LR_PH_I]] ]
-; CHECK-NEXT:    [[T2_PRE13]] = phi ptr [ [[T2_PRE14]], [[LOOP3_LOOP3_LATCH_CRIT_EDGE]] ], [ [[T6_PRE]], [[FOR_BODY_LR_PH_I]] ]
-; CHECK-NEXT:    [[T1_PRE10]] = phi ptr [ [[T1_PRE11]], [[LOOP3_LOOP3_LATCH_CRIT_EDGE]] ], [ [[T5_PRE]], [[FOR_BODY_LR_PH_I]] ]
-; CHECK-NEXT:    [[T7]] = phi ptr [ [[T78]], [[LOOP3_LOOP3_LATCH_CRIT_EDGE]] ], [ [[T7_PRE]], [[FOR_BODY_LR_PH_I]] ]
-; CHECK-NEXT:    [[T6]] = phi ptr [ [[T66]], [[LOOP3_LOOP3_LATCH_CRIT_EDGE]] ], [ [[T6_PRE]], [[FOR_BODY_LR_PH_I]] ]
-; CHECK-NEXT:    [[T5]] = phi ptr [ [[T54]], [[LOOP3_LOOP3_LATCH_CRIT_EDGE]] ], [ [[T5_PRE]], [[FOR_BODY_LR_PH_I]] ]
-; CHECK-NEXT:    br i1 false, label [[LOOP2_LATCH:%.*]], label [[LOOP3]]
-; CHECK:       loop2.latch:
-; CHECK-NEXT:    br i1 false, label [[LOOP2_LATCH_LOOP2_CRIT_EDGE]], label [[LOOP_LATCH]]
-; CHECK:       loop2.latch.loop2_crit_edge:
-; CHECK-NEXT:    br label [[LOOP2]]
-; CHECK:       loop.latch:
-; CHECK-NEXT:    store i32 0, ptr [[D]], align 4, !tbaa [[TBAA4:![0-9]+]]
-; CHECK-NEXT:    br label [[LOOP]]
+; CHECK-NEXT:    br label %[[LOOP3_LATCH]]
+; CHECK:       [[LOOP3_LATCH]]:
+; CHECK-NEXT:    [[T2_PRE_PRE23]] = phi ptr [ [[T2_PRE_PRE24]], %[[LOOP3_LOOP3_LATCH_CRIT_EDGE]] ], [ [[T6_PRE]], %[[FOR_BODY_LR_PH_I]] ]
+; CHECK-NEXT:    [[T1_PRE_PRE19]] = phi ptr [ [[T1_PRE_PRE20]], %[[LOOP3_LOOP3_LATCH_CRIT_EDGE]] ], [ [[T5_PRE]], %[[FOR_BODY_LR_PH_I]] ]
+; CHECK-NEXT:    [[T3_PRE16]] = phi ptr [ [[T3_PRE17]], %[[LOOP3_LOOP3_LATCH_CRIT_EDGE]] ], [ [[T7_PRE]], %[[FOR_BODY_LR_PH_I]] ]
+; CHECK-NEXT:    [[T2_PRE13]] = phi ptr [ [[T2_PRE14]], %[[LOOP3_LOOP3_LATCH_CRIT_EDGE]] ], [ [[T6_PRE]], %[[FOR_BODY_LR_PH_I]] ]
+; CHECK-NEXT:    [[T1_PRE10]] = phi ptr [ [[T1_PRE11]], %[[LOOP3_LOOP3_LATCH_CRIT_EDGE]] ], [ [[T5_PRE]], %[[FOR_BODY_LR_PH_I]] ]
+; CHECK-NEXT:    [[T7]] = phi ptr [ [[T78]], %[[LOOP3_LOOP3_LATCH_CRIT_EDGE]] ], [ [[T7_PRE]], %[[FOR_BODY_LR_PH_I]] ]
+; CHECK-NEXT:    [[T6]] = phi ptr [ [[T66]], %[[LOOP3_LOOP3_LATCH_CRIT_EDGE]] ], [ [[T6_PRE]], %[[FOR_BODY_LR_PH_I]] ]
+; CHECK-NEXT:    [[T5]] = phi ptr [ [[T54]], %[[LOOP3_LOOP3_LATCH_CRIT_EDGE]] ], [ [[T5_PRE]], %[[FOR_BODY_LR_PH_I]] ]
+; CHECK-NEXT:    br i1 false, label %[[LOOP2_LATCH:.*]], label %[[LOOP3]]
+; CHECK:       [[LOOP2_LATCH]]:
+; CHECK-NEXT:    br i1 false, label %[[LOOP2_LATCH_LOOP2_CRIT_EDGE]], label %[[LOOP_LATCH]]
+; CHECK:       [[LOOP2_LATCH_LOOP2_CRIT_EDGE]]:
+; CHECK-NEXT:    br label %[[LOOP2]]
+; CHECK:       [[LOOP_LATCH]]:
+; CHECK-NEXT:    store i32 0, ptr [[D]], align 4, !tbaa [[INT_TBAA4:![0-9]+]]
+; CHECK-NEXT:    br label %[[LOOP]]
 ;
 entry:
   br label %loop
@@ -101,3 +101,11 @@ loop.latch:
 !3 = !{!"Simple C/C++ TBAA"}
 !4 = !{!5, !5, i64 0}
 !5 = !{!"int", !2, i64 0}
+;.
+; CHECK: [[ANYPTR_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
+; CHECK: [[META1]] = !{!"any pointer", [[META2:![0-9]+]], i64 0}
+; CHECK: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0}
+; CHECK: [[META3]] = !{!"Simple C/C++ TBAA"}
+; CHECK: [[INT_TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0}
+; CHECK: [[META5]] = !{!"int", [[META2]], i64 0}
+;.
diff --git a/llvm/test/Transforms/GVN/tbaa.ll b/llvm/test/Transforms/GVN/tbaa.ll
index 59ace14..bb9b0de 100644
--- a/llvm/test/Transforms/GVN/tbaa.ll
+++ b/llvm/test/Transforms/GVN/tbaa.ll
@@ -1,17 +1,17 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt -passes=gvn -S < %s | FileCheck --check-prefixes=CHECK,MDEP %s
 ; RUN: opt -passes='gvn<memoryssa>' -S < %s | FileCheck --check-prefixes=CHECK,MSSA %s
 
 define i32 @test1(ptr %p, ptr %q) {
 ; MDEP-LABEL: define i32 @test1(
 ; MDEP-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) {
-; MDEP-NEXT:    [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA0:![0-9]+]]
+; MDEP-NEXT:    [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[C_TBAA0:![0-9]+]]
 ; MDEP-NEXT:    [[C:%.*]] = add i32 [[A]], [[A]]
 ; MDEP-NEXT:    ret i32 [[C]]
 ;
 ; MSSA-LABEL: define i32 @test1(
 ; MSSA-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) {
-; MSSA-NEXT:    [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA0:![0-9]+]]
+; MSSA-NEXT:    [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[C_TBAA0:![0-9]+]]
 ; MSSA-NEXT:    [[B:%.*]] = call i32 @foo(ptr [[P]])
 ; MSSA-NEXT:    [[C:%.*]] = add i32 [[A]], [[B]]
 ; MSSA-NEXT:    ret i32 [[C]]
@@ -25,14 +25,14 @@ define i32 @test1(ptr %p, ptr %q) {
 define i32 @test2(ptr %p, ptr %q) {
 ; MDEP-LABEL: define i32 @test2(
 ; MDEP-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) {
-; MDEP-NEXT:    [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA0]]
+; MDEP-NEXT:    [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[C_TBAA0]]
 ; MDEP-NEXT:    [[C:%.*]] = add i32 [[A]], [[A]]
 ; MDEP-NEXT:    ret i32 [[C]]
 ;
 ; MSSA-LABEL: define i32 @test2(
 ; MSSA-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) {
-; MSSA-NEXT:    [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA0]]
-; MSSA-NEXT:    [[B:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA0]]
+; MSSA-NEXT:    [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[C_TBAA0]]
+; MSSA-NEXT:    [[B:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[C_TBAA0]]
 ; MSSA-NEXT:    [[C:%.*]] = add i32 [[A]], [[B]]
 ; MSSA-NEXT:    ret i32 [[C]]
 ;
@@ -45,14 +45,14 @@ define i32 @test2(ptr %p, ptr %q) {
 define i32 @test3(ptr %p, ptr %q) {
 ; MDEP-LABEL: define i32 @test3(
 ; MDEP-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) {
-; MDEP-NEXT:    [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA4:![0-9]+]]
+; MDEP-NEXT:    [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[B_TBAA4:![0-9]+]]
 ; MDEP-NEXT:    [[C:%.*]] = add i32 [[A]], [[A]]
 ; MDEP-NEXT:    ret i32 [[C]]
 ;
 ; MSSA-LABEL: define i32 @test3(
 ; MSSA-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) {
-; MSSA-NEXT:    [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA4:![0-9]+]]
-; MSSA-NEXT:    [[B:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA4]]
+; MSSA-NEXT:    [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[B_TBAA4:![0-9]+]]
+; MSSA-NEXT:    [[B:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[B_TBAA4]]
 ; MSSA-NEXT:    [[C:%.*]] = add i32 [[A]], [[B]]
 ; MSSA-NEXT:    ret i32 [[C]]
 ;
@@ -65,14 +65,14 @@ define i32 @test3(ptr %p, ptr %q) {
 define i32 @test4(ptr %p, ptr %q) {
 ; MDEP-LABEL: define i32 @test4(
 ; MDEP-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) {
-; MDEP-NEXT:    [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA6:![0-9]+]]
+; MDEP-NEXT:    [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[A_TBAA6:![0-9]+]]
 ; MDEP-NEXT:    [[C:%.*]] = add i32 [[A]], [[A]]
 ; MDEP-NEXT:    ret i32 [[C]]
 ;
 ; MSSA-LABEL: define i32 @test4(
 ; MSSA-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) {
-; MSSA-NEXT:    [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA6:![0-9]+]]
-; MSSA-NEXT:    [[B:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA0]]
+; MSSA-NEXT:    [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[A_TBAA6:![0-9]+]]
+; MSSA-NEXT:    [[B:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[C_TBAA0]]
 ; MSSA-NEXT:    [[C:%.*]] = add i32 [[A]], [[B]]
 ; MSSA-NEXT:    ret i32 [[C]]
 ;
@@ -85,14 +85,14 @@ define i32 @test4(ptr %p, ptr %q) {
 define i32 @test5(ptr %p, ptr %q) {
 ; MDEP-LABEL: define i32 @test5(
 ; MDEP-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) {
-; MDEP-NEXT:    [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA0]]
+; MDEP-NEXT:    [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[C_TBAA0]]
 ; MDEP-NEXT:    [[C:%.*]] = add i32 [[A]], [[A]]
 ; MDEP-NEXT:    ret i32 [[C]]
 ;
 ; MSSA-LABEL: define i32 @test5(
 ; MSSA-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) {
-; MSSA-NEXT:    [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA0]]
-; MSSA-NEXT:    [[B:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA6]]
+; MSSA-NEXT:    [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[C_TBAA0]]
+; MSSA-NEXT:    [[B:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[A_TBAA6]]
 ; MSSA-NEXT:    [[C:%.*]] = add i32 [[A]], [[B]]
 ; MSSA-NEXT:    ret i32 [[C]]
 ;
@@ -105,14 +105,14 @@ define i32 @test5(ptr %p, ptr %q) {
 define i32 @test6(ptr %p, ptr %q) {
 ; MDEP-LABEL: define i32 @test6(
 ; MDEP-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) {
-; MDEP-NEXT:    [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA0]]
+; MDEP-NEXT:    [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[C_TBAA0]]
 ; MDEP-NEXT:    [[C:%.*]] = add i32 [[A]], [[A]]
 ; MDEP-NEXT:    ret i32 [[C]]
 ;
 ; MSSA-LABEL: define i32 @test6(
 ; MSSA-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) {
-; MSSA-NEXT:    [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA0]]
-; MSSA-NEXT:    [[B:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA4]]
+; MSSA-NEXT:    [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[C_TBAA0]]
+; MSSA-NEXT:    [[B:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[B_TBAA4]]
 ; MSSA-NEXT:    [[C:%.*]] = add i32 [[A]], [[B]]
 ; MSSA-NEXT:    ret i32 [[C]]
 ;
@@ -125,14 +125,14 @@ define i32 @test6(ptr %p, ptr %q) {
 define i32 @test7(ptr %p, ptr %q) {
 ; MDEP-LABEL: define i32 @test7(
 ; MDEP-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) {
-; MDEP-NEXT:    [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA7:![0-9]+]]
+; MDEP-NEXT:    [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[SCALAR_TYPE_TBAA7:![0-9]+]]
 ; MDEP-NEXT:    [[C:%.*]] = add i32 [[A]], [[A]]
 ; MDEP-NEXT:    ret i32 [[C]]
 ;
 ; MSSA-LABEL: define i32 @test7(
 ; MSSA-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) {
-; MSSA-NEXT:    [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA7:![0-9]+]]
-; MSSA-NEXT:    [[B:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA4]]
+; MSSA-NEXT:    [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[SCALAR_TYPE_TBAA7:![0-9]+]]
+; MSSA-NEXT:    [[B:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[B_TBAA4]]
 ; MSSA-NEXT:    [[C:%.*]] = add i32 [[A]], [[B]]
 ; MSSA-NEXT:    ret i32 [[C]]
 ;
@@ -150,9 +150,9 @@ define i32 @test8(ptr %p, ptr %q) {
 ;
 ; MSSA-LABEL: define i32 @test8(
 ; MSSA-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) {
-; MSSA-NEXT:    [[A:%.*]] = load i32, ptr [[Q]], align 4, !tbaa [[TBAA10:![0-9]+]]
+; MSSA-NEXT:    [[A:%.*]] = load i32, ptr [[Q]], align 4, !tbaa [[NODE_TBAA10:![0-9]+]]
 ; MSSA-NEXT:    store i32 15, ptr [[P]], align 4
-; MSSA-NEXT:    [[B:%.*]] = load i32, ptr [[Q]], align 4, !tbaa [[TBAA10]]
+; MSSA-NEXT:    [[B:%.*]] = load i32, ptr [[Q]], align 4, !tbaa [[NODE_TBAA10]]
 ; MSSA-NEXT:    [[C:%.*]] = sub i32 [[A]], [[B]]
 ; MSSA-NEXT:    ret i32 [[C]]
 ;
@@ -174,9 +174,9 @@ define i32 @test9(ptr %p, ptr %q) {
 ;
 ; MSSA-LABEL: define i32 @test9(
 ; MSSA-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) {
-; MSSA-NEXT:    [[A:%.*]] = load i32, ptr [[Q]], align 4, !tbaa [[TBAA10]]
+; MSSA-NEXT:    [[A:%.*]] = load i32, ptr [[Q]], align 4, !tbaa [[NODE_TBAA10]]
 ; MSSA-NEXT:    call void @clobber()
-; MSSA-NEXT:    [[B:%.*]] = load i32, ptr [[Q]], align 4, !tbaa [[TBAA10]]
+; MSSA-NEXT:    [[B:%.*]] = load i32, ptr [[Q]], align 4, !tbaa [[NODE_TBAA10]]
 ; MSSA-NEXT:    [[C:%.*]] = sub i32 [[A]], [[B]]
 ; MSSA-NEXT:    ret i32 [[C]]
 ;
@@ -195,14 +195,14 @@ define i32 @test10(ptr %p, ptr %q) {
 ; and not just the common final access type.
 ; MDEP-LABEL: define i32 @test10(
 ; MDEP-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) {
-; MDEP-NEXT:    [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA10:![0-9]+]]
+; MDEP-NEXT:    [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[INT_TBAA10:![0-9]+]]
 ; MDEP-NEXT:    [[C:%.*]] = add i32 [[A]], [[A]]
 ; MDEP-NEXT:    ret i32 [[C]]
 ;
 ; MSSA-LABEL: define i32 @test10(
 ; MSSA-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) {
-; MSSA-NEXT:    [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA13:![0-9]+]]
-; MSSA-NEXT:    [[B:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA17:![0-9]+]]
+; MSSA-NEXT:    [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[INT_TBAA13:![0-9]+]]
+; MSSA-NEXT:    [[B:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[INT_TBAA17:![0-9]+]]
 ; MSSA-NEXT:    [[C:%.*]] = add i32 [[A]], [[B]]
 ; MSSA-NEXT:    ret i32 [[C]]
 ;
@@ -238,39 +238,39 @@ declare i32 @foo(ptr) readonly
 !9 = !{!"yet another root"}
 !10 = !{!"node", !9, i64 1}
 ;.
-; MDEP: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
+; MDEP: [[C_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
 ; MDEP: [[META1]] = !{!"C", [[META2:![0-9]+]]}
 ; MDEP: [[META2]] = !{!"A", [[META3:![0-9]+]]}
 ; MDEP: [[META3]] = !{!"tbaa root"}
-; MDEP: [[TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0}
+; MDEP: [[B_TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0}
 ; MDEP: [[META5]] = !{!"B", [[META2]]}
-; MDEP: [[TBAA6]] = !{[[META2]], [[META2]], i64 0}
-; MDEP: [[TBAA7]] = !{[[META8:![0-9]+]], [[META8]], i64 0}
+; MDEP: [[A_TBAA6]] = !{[[META2]], [[META2]], i64 0}
+; MDEP: [[SCALAR_TYPE_TBAA7]] = !{[[META8:![0-9]+]], [[META8]], i64 0}
 ; MDEP: [[META8]] = !{!"scalar type", [[META9:![0-9]+]]}
 ; MDEP: [[META9]] = !{!"another root"}
-; MDEP: [[TBAA10]] = !{[[META11:![0-9]+]], [[META12:![0-9]+]], i64 0}
+; MDEP: [[INT_TBAA10]] = !{[[META11:![0-9]+]], [[META12:![0-9]+]], i64 0}
 ; MDEP: [[META11]] = !{!"struct X", [[META12]], i64 0}
 ; MDEP: [[META12]] = !{!"int", [[META13:![0-9]+]], i64 0}
 ; MDEP: [[META13]] = !{!"char", [[META3]], i64 0}
 ;.
-; MSSA: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
+; MSSA: [[C_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
 ; MSSA: [[META1]] = !{!"C", [[META2:![0-9]+]]}
 ; MSSA: [[META2]] = !{!"A", [[META3:![0-9]+]]}
 ; MSSA: [[META3]] = !{!"tbaa root"}
-; MSSA: [[TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0}
+; MSSA: [[B_TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0}
 ; MSSA: [[META5]] = !{!"B", [[META2]]}
-; MSSA: [[TBAA6]] = !{[[META2]], [[META2]], i64 0}
-; MSSA: [[TBAA7]] = !{[[META8:![0-9]+]], [[META8]], i64 0}
+; MSSA: [[A_TBAA6]] = !{[[META2]], [[META2]], i64 0}
+; MSSA: [[SCALAR_TYPE_TBAA7]] = !{[[META8:![0-9]+]], [[META8]], i64 0}
 ; MSSA: [[META8]] = !{!"scalar type", [[META9:![0-9]+]]}
 ; MSSA: [[META9]] = !{!"another root"}
-; MSSA: [[TBAA10]] = !{[[META11:![0-9]+]], [[META11]], i64 0, i64 1}
+; MSSA: [[NODE_TBAA10]] = !{[[META11:![0-9]+]], [[META11]], i64 0, i64 1}
 ; MSSA: [[META11]] = !{!"node", [[META12:![0-9]+]]}
 ; MSSA: [[META12]] = !{!"yet another root"}
-; MSSA: [[TBAA13]] = !{[[META14:![0-9]+]], [[META15:![0-9]+]], i64 0}
+; MSSA: [[INT_TBAA13]] = !{[[META14:![0-9]+]], [[META15:![0-9]+]], i64 0}
 ; MSSA: [[META14]] = !{!"struct X", [[META15]], i64 0}
 ; MSSA: [[META15]] = !{!"int", [[META16:![0-9]+]], i64 0}
 ; MSSA: [[META16]] = !{!"char", [[META3]], i64 0}
-; MSSA: [[TBAA17]] = !{[[META18:![0-9]+]], [[META15]], i64 0}
+; MSSA: [[INT_TBAA17]] = !{[[META18:![0-9]+]], [[META15]], i64 0}
 ; MSSA: [[META18]] = !{!"struct Y", [[META14]], i64 0}
 ;.
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
diff --git a/llvm/test/Transforms/GVNHoist/hoist-md.ll b/llvm/test/Transforms/GVNHoist/hoist-md.ll
index 26fe475..2ef9bc3 100644
--- a/llvm/test/Transforms/GVNHoist/hoist-md.ll
+++ b/llvm/test/Transforms/GVNHoist/hoist-md.ll
@@ -1,19 +1,19 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals --version 2
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 6
 ; RUN: opt -S -passes=gvn-hoist < %s | FileCheck %s
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
 define void @test1(i1 %b, ptr %x) {
-; CHECK-LABEL: define void @test1
-; CHECK-SAME: (i1 [[B:%.*]], ptr [[X:%.*]]) {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    store i32 2, ptr [[X]], align 4, !tbaa [[TBAA0:![0-9]+]]
-; CHECK-NEXT:    br i1 [[B]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
-; CHECK:       if.then:
-; CHECK-NEXT:    br label [[IF_END:%.*]]
-; CHECK:       if.else:
-; CHECK-NEXT:    br label [[IF_END]]
-; CHECK:       if.end:
+; CHECK-LABEL: define void @test1(
+; CHECK-SAME: i1 [[B:%.*]], ptr [[X:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    store i32 2, ptr [[X]], align 4, !tbaa [[CHAR_TBAA0:![0-9]+]]
+; CHECK-NEXT:    br i1 [[B]], label %[[IF_THEN:.*]], label %[[IF_ELSE:.*]]
+; CHECK:       [[IF_THEN]]:
+; CHECK-NEXT:    br label %[[IF_END:.*]]
+; CHECK:       [[IF_ELSE]]:
+; CHECK-NEXT:    br label %[[IF_END]]
+; CHECK:       [[IF_END]]:
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -32,19 +32,19 @@ if.end:                                           ; preds = %if.else, %if.then
 }
 
 define void @test2(i1 %b, ptr %x) {
-; CHECK-LABEL: define void @test2
-; CHECK-SAME: (i1 [[B:%.*]], ptr [[X:%.*]]) {
-; CHECK-NEXT:  entry:
+; CHECK-LABEL: define void @test2(
+; CHECK-SAME: i1 [[B:%.*]], ptr [[X:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 1
-; CHECK-NEXT:    store i32 2, ptr [[TMP0]], align 4, !tbaa [[TBAA0]]
-; CHECK-NEXT:    br i1 [[B]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
-; CHECK:       if.then:
+; CHECK-NEXT:    store i32 2, ptr [[TMP0]], align 4, !tbaa [[CHAR_TBAA0]]
+; CHECK-NEXT:    br i1 [[B]], label %[[IF_THEN:.*]], label %[[IF_ELSE:.*]]
+; CHECK:       [[IF_THEN]]:
 ; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 1
-; CHECK-NEXT:    br label [[IF_END:%.*]]
-; CHECK:       if.else:
+; CHECK-NEXT:    br label %[[IF_END:.*]]
+; CHECK:       [[IF_ELSE]]:
 ; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 1
-; CHECK-NEXT:    br label [[IF_END]]
-; CHECK:       if.end:
+; CHECK-NEXT:    br label %[[IF_END]]
+; CHECK:       [[IF_END]]:
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -65,19 +65,19 @@ if.end:                                           ; preds = %if.else, %if.then
 }
 
 define void @test3(i1 %b, ptr %x) {
-; CHECK-LABEL: define void @test3
-; CHECK-SAME: (i1 [[B:%.*]], ptr [[X:%.*]]) {
-; CHECK-NEXT:  entry:
+; CHECK-LABEL: define void @test3(
+; CHECK-SAME: i1 [[B:%.*]], ptr [[X:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i32, ptr [[X]], i64 1
-; CHECK-NEXT:    store i32 2, ptr [[TMP0]], align 4, !tbaa [[TBAA0]]
-; CHECK-NEXT:    br i1 [[B]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
-; CHECK:       if.then:
+; CHECK-NEXT:    store i32 2, ptr [[TMP0]], align 4, !tbaa [[CHAR_TBAA0]]
+; CHECK-NEXT:    br i1 [[B]], label %[[IF_THEN:.*]], label %[[IF_ELSE:.*]]
+; CHECK:       [[IF_THEN]]:
 ; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 1
-; CHECK-NEXT:    br label [[IF_END:%.*]]
-; CHECK:       if.else:
+; CHECK-NEXT:    br label %[[IF_END:.*]]
+; CHECK:       [[IF_ELSE]]:
 ; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr i32, ptr [[X]], i64 1
-; CHECK-NEXT:    br label [[IF_END]]
-; CHECK:       if.end:
+; CHECK-NEXT:    br label %[[IF_END]]
+; CHECK:       [[IF_END]]:
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -98,17 +98,17 @@ if.end:                                           ; preds = %if.else, %if.then
 }
 
 define i32 @test4(i1 %b, ptr %y) {
-; CHECK-LABEL: define i32 @test4
-; CHECK-SAME: (i1 [[B:%.*]], ptr [[Y:%.*]]) {
-; CHECK-NEXT:  entry:
+; CHECK-LABEL: define i32 @test4(
+; CHECK-SAME: i1 [[B:%.*]], ptr [[Y:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[Y]], align 4, !range [[RNG3:![0-9]+]]
-; CHECK-NEXT:    br i1 [[B]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
-; CHECK:       if.then:
-; CHECK-NEXT:    br label [[RETURN:%.*]]
-; CHECK:       if.end:
-; CHECK-NEXT:    br label [[RETURN]]
-; CHECK:       return:
-; CHECK-NEXT:    [[RETVAL_0:%.*]] = phi i32 [ [[TMP0]], [[IF_THEN]] ], [ [[TMP0]], [[IF_END]] ]
+; CHECK-NEXT:    br i1 [[B]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
+; CHECK:       [[IF_THEN]]:
+; CHECK-NEXT:    br label %[[RETURN:.*]]
+; CHECK:       [[IF_END]]:
+; CHECK-NEXT:    br label %[[RETURN]]
+; CHECK:       [[RETURN]]:
+; CHECK-NEXT:    [[RETVAL_0:%.*]] = phi i32 [ [[TMP0]], %[[IF_THEN]] ], [ [[TMP0]], %[[IF_END]] ]
 ; CHECK-NEXT:    ret i32 [[RETVAL_0]]
 ;
 entry:
@@ -128,17 +128,17 @@ return:                                           ; preds = %if.end, %if.then
 }
 
 define ptr @test5(i1 %b, ptr %y) {
-; CHECK-LABEL: define ptr @test5
-; CHECK-SAME: (i1 [[B:%.*]], ptr [[Y:%.*]]) {
-; CHECK-NEXT:  entry:
+; CHECK-LABEL: define ptr @test5(
+; CHECK-SAME: i1 [[B:%.*]], ptr [[Y:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[Y]], align 4
-; CHECK-NEXT:    br i1 [[B]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
-; CHECK:       if.then:
-; CHECK-NEXT:    br label [[RETURN:%.*]]
-; CHECK:       if.end:
-; CHECK-NEXT:    br label [[RETURN]]
-; CHECK:       return:
-; CHECK-NEXT:    [[RETVAL_0:%.*]] = phi ptr [ [[TMP0]], [[IF_THEN]] ], [ [[TMP0]], [[IF_END]] ]
+; CHECK-NEXT:    br i1 [[B]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
+; CHECK:       [[IF_THEN]]:
+; CHECK-NEXT:    br label %[[RETURN:.*]]
+; CHECK:       [[IF_END]]:
+; CHECK-NEXT:    br label %[[RETURN]]
+; CHECK:       [[RETURN]]:
+; CHECK-NEXT:    [[RETVAL_0:%.*]] = phi ptr [ [[TMP0]], %[[IF_THEN]] ], [ [[TMP0]], %[[IF_END]] ]
 ; CHECK-NEXT:    ret ptr [[RETVAL_0]]
 ;
 entry:
@@ -167,8 +167,8 @@ return:                                           ; preds = %if.end, %if.then
 !8 = !{i32 3, i32 4}
 !9 = !{}
 ;.
-; CHECK: [[TBAA0]] = !{!1, !1, i64 0}
-; CHECK: [[META1:![0-9]+]] = !{!"omnipotent char", !2, i64 0}
-; CHECK: [[META2:![0-9]+]] = !{!"Simple C++ TBAA"}
+; CHECK: [[CHAR_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
+; CHECK: [[META1]] = !{!"omnipotent char", [[META2:![0-9]+]], i64 0}
+; CHECK: [[META2]] = !{!"Simple C++ TBAA"}
 ; CHECK: [[RNG3]] = !{i32 0, i32 2, i32 3, i32 4}
 ;.
diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll
index 1c31778..ebc5c0d 100644
--- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll
+++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll
@@ -1,10 +1,10 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=infer-address-spaces %s | FileCheck %s
 
 define amdgpu_kernel void @memset_group_to_flat(ptr addrspace(3) %group.ptr, i32 %y) #0 {
 ; CHECK-LABEL: define amdgpu_kernel void @memset_group_to_flat(
 ; CHECK-SAME: ptr addrspace(3) [[GROUP_PTR:%.*]], i32 [[Y:%.*]]) #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT:    call void @llvm.memset.p3.i64(ptr addrspace(3) align 4 [[GROUP_PTR]], i8 4, i64 32, i1 false), !tbaa [[TBAA0:![0-9]+]], !alias.scope [[META3:![0-9]+]], !noalias [[META6:![0-9]+]]
+; CHECK-NEXT:    call void @llvm.memset.p3.i64(ptr addrspace(3) align 4 [[GROUP_PTR]], i8 4, i64 32, i1 false), !tbaa [[A_TBAA0:![0-9]+]], !alias.scope [[META3:![0-9]+]], !noalias [[META6:![0-9]+]]
 ; CHECK-NEXT:    ret void
 ;
   %cast = addrspacecast ptr addrspace(3) %group.ptr to ptr
@@ -15,7 +15,7 @@ define amdgpu_kernel void @memset_group_to_flat(ptr addrspace(3) %group.ptr, i32
 define amdgpu_kernel void @memset_global_to_flat(ptr addrspace(1) %global.ptr, i32 %y) #0 {
 ; CHECK-LABEL: define amdgpu_kernel void @memset_global_to_flat(
 ; CHECK-SAME: ptr addrspace(1) [[GLOBAL_PTR:%.*]], i32 [[Y:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    call void @llvm.memset.p1.i64(ptr addrspace(1) align 4 [[GLOBAL_PTR]], i8 4, i64 32, i1 false), !tbaa [[TBAA0]], !alias.scope [[META3]], !noalias [[META6]]
+; CHECK-NEXT:    call void @llvm.memset.p1.i64(ptr addrspace(1) align 4 [[GLOBAL_PTR]], i8 4, i64 32, i1 false), !tbaa [[A_TBAA0]], !alias.scope [[META3]], !noalias [[META6]]
 ; CHECK-NEXT:    ret void
 ;
   %cast = addrspacecast ptr addrspace(1) %global.ptr to ptr
@@ -49,7 +49,7 @@ define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group(ptr %dest,
 ; CHECK-LABEL: define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group(
 ; CHECK-SAME: ptr [[DEST:%.*]], ptr addrspace(3) [[SRC_GROUP_PTR:%.*]], i64 [[SIZE:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[TMP1:%.*]] = addrspacecast ptr [[DEST]] to ptr addrspace(1)
-; CHECK-NEXT:    call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) align 4 [[TMP1]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 [[SIZE]], i1 false), !tbaa [[TBAA0]], !alias.scope [[META3]], !noalias [[META6]]
+; CHECK-NEXT:    call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) align 4 [[TMP1]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 [[SIZE]], i1 false), !tbaa [[A_TBAA0]], !alias.scope [[META3]], !noalias [[META6]]
 ; CHECK-NEXT:    ret void
 ;
   %cast.src = addrspacecast ptr addrspace(3) %src.group.ptr to ptr
@@ -61,7 +61,7 @@ define amdgpu_kernel void @memcpy_inline_flat_to_flat_replace_src_with_group(ptr
 ; CHECK-LABEL: define amdgpu_kernel void @memcpy_inline_flat_to_flat_replace_src_with_group(
 ; CHECK-SAME: ptr [[DEST:%.*]], ptr addrspace(3) [[SRC_GROUP_PTR:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[TMP1:%.*]] = addrspacecast ptr [[DEST]] to ptr addrspace(1)
-; CHECK-NEXT:    call void @llvm.memcpy.inline.p1.p3.i64(ptr addrspace(1) align 4 [[TMP1]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 42, i1 false), !tbaa [[TBAA0]], !alias.scope [[META3]], !noalias [[META6]]
+; CHECK-NEXT:    call void @llvm.memcpy.inline.p1.p3.i64(ptr addrspace(1) align 4 [[TMP1]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 42, i1 false), !tbaa [[A_TBAA0]], !alias.scope [[META3]], !noalias [[META6]]
 ; CHECK-NEXT:    ret void
 ;
   %cast.src = addrspacecast ptr addrspace(3) %src.group.ptr to ptr
@@ -73,7 +73,7 @@ define amdgpu_kernel void @memcpy_flat_to_flat_replace_dest_with_group(ptr addrs
 ; CHECK-LABEL: define amdgpu_kernel void @memcpy_flat_to_flat_replace_dest_with_group(
 ; CHECK-SAME: ptr addrspace(3) [[DEST_GROUP_PTR:%.*]], ptr [[SRC_PTR:%.*]], i64 [[SIZE:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[TMP1:%.*]] = addrspacecast ptr [[SRC_PTR]] to ptr addrspace(1)
-; CHECK-NEXT:    call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) align 4 [[DEST_GROUP_PTR]], ptr addrspace(1) align 4 [[TMP1]], i64 [[SIZE]], i1 false), !tbaa [[TBAA0]], !alias.scope [[META3]], !noalias [[META6]]
+; CHECK-NEXT:    call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) align 4 [[DEST_GROUP_PTR]], ptr addrspace(1) align 4 [[TMP1]], i64 [[SIZE]], i1 false), !tbaa [[A_TBAA0]], !alias.scope [[META3]], !noalias [[META6]]
 ; CHECK-NEXT:    ret void
 ;
   %cast.dest = addrspacecast ptr addrspace(3) %dest.group.ptr to ptr
@@ -84,7 +84,7 @@ define amdgpu_kernel void @memcpy_flat_to_flat_replace_dest_with_group(ptr addrs
 define amdgpu_kernel void @memcpy_flat_to_flat_replace_dest_src_with_group(ptr addrspace(3) %dest.group.ptr, ptr addrspace(3) %src.group.ptr, i64 %size) #0 {
 ; CHECK-LABEL: define amdgpu_kernel void @memcpy_flat_to_flat_replace_dest_src_with_group(
 ; CHECK-SAME: ptr addrspace(3) [[DEST_GROUP_PTR:%.*]], ptr addrspace(3) [[SRC_GROUP_PTR:%.*]], i64 [[SIZE:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 [[SIZE]], i1 false), !tbaa [[TBAA0]], !alias.scope [[META3]], !noalias [[META6]]
+; CHECK-NEXT:    call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 [[SIZE]], i1 false), !tbaa [[A_TBAA0]], !alias.scope [[META3]], !noalias [[META6]]
 ; CHECK-NEXT:    ret void
 ;
   %cast.src = addrspacecast ptr addrspace(3) %src.group.ptr to ptr
@@ -96,7 +96,7 @@ define amdgpu_kernel void @memcpy_flat_to_flat_replace_dest_src_with_group(ptr a
 define amdgpu_kernel void @memcpy_flat_to_flat_replace_dest_group_src_global(ptr addrspace(3) %dest.group.ptr, ptr addrspace(1) %src.global.ptr, i64 %size) #0 {
 ; CHECK-LABEL: define amdgpu_kernel void @memcpy_flat_to_flat_replace_dest_group_src_global(
 ; CHECK-SAME: ptr addrspace(3) [[DEST_GROUP_PTR:%.*]], ptr addrspace(1) [[SRC_GLOBAL_PTR:%.*]], i64 [[SIZE:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) align 4 [[DEST_GROUP_PTR]], ptr addrspace(1) align 4 [[SRC_GLOBAL_PTR]], i64 [[SIZE]], i1 false), !tbaa [[TBAA0]], !alias.scope [[META3]], !noalias [[META6]]
+; CHECK-NEXT:    call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) align 4 [[DEST_GROUP_PTR]], ptr addrspace(1) align 4 [[SRC_GLOBAL_PTR]], i64 [[SIZE]], i1 false), !tbaa [[A_TBAA0]], !alias.scope [[META3]], !noalias [[META6]]
 ; CHECK-NEXT:    ret void
 ;
   %cast.src = addrspacecast ptr addrspace(1) %src.global.ptr to ptr
@@ -108,7 +108,7 @@ define amdgpu_kernel void @memcpy_flat_to_flat_replace_dest_group_src_global(ptr
 define amdgpu_kernel void @memcpy_group_to_flat_replace_dest_global(ptr addrspace(1) %dest.global.ptr, ptr addrspace(3) %src.group.ptr, i32 %size) #0 {
 ; CHECK-LABEL: define amdgpu_kernel void @memcpy_group_to_flat_replace_dest_global(
 ; CHECK-SAME: ptr addrspace(1) [[DEST_GLOBAL_PTR:%.*]], ptr addrspace(3) [[SRC_GROUP_PTR:%.*]], i32 [[SIZE:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    call void @llvm.memcpy.p1.p3.i32(ptr addrspace(1) align 4 [[DEST_GLOBAL_PTR]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i32 [[SIZE]], i1 false), !tbaa [[TBAA0]], !alias.scope [[META3]], !noalias [[META6]]
+; CHECK-NEXT:    call void @llvm.memcpy.p1.p3.i32(ptr addrspace(1) align 4 [[DEST_GLOBAL_PTR]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i32 [[SIZE]], i1 false), !tbaa [[A_TBAA0]], !alias.scope [[META3]], !noalias [[META6]]
 ; CHECK-NEXT:    ret void
 ;
   %cast.dest = addrspacecast ptr addrspace(1) %dest.global.ptr to ptr
@@ -159,7 +159,7 @@ define amdgpu_kernel void @multiple_memcpy_flat_to_flat_replace_src_with_group_n
 define amdgpu_kernel void @memcpy_group_flat_to_flat_self(ptr addrspace(3) %group.ptr) #0 {
 ; CHECK-LABEL: define amdgpu_kernel void @memcpy_group_flat_to_flat_self(
 ; CHECK-SAME: ptr addrspace(3) [[GROUP_PTR:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) align 4 [[GROUP_PTR]], ptr addrspace(3) align 4 [[GROUP_PTR]], i64 32, i1 false), !tbaa [[TBAA0]], !alias.scope [[META3]], !noalias [[META6]]
+; CHECK-NEXT:    call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) align 4 [[GROUP_PTR]], ptr addrspace(3) align 4 [[GROUP_PTR]], i64 32, i1 false), !tbaa [[A_TBAA0]], !alias.scope [[META3]], !noalias [[META6]]
 ; CHECK-NEXT:    ret void
 ;
   %cast = addrspacecast ptr addrspace(3) %group.ptr to ptr
@@ -170,7 +170,7 @@ define amdgpu_kernel void @memmove_flat_to_flat_replace_src_with_group(ptr %dest
 ; CHECK-LABEL: define amdgpu_kernel void @memmove_flat_to_flat_replace_src_with_group(
 ; CHECK-SAME: ptr [[DEST:%.*]], ptr addrspace(3) [[SRC_GROUP_PTR:%.*]], i64 [[SIZE:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[TMP1:%.*]] = addrspacecast ptr [[DEST]] to ptr addrspace(1)
-; CHECK-NEXT:    call void @llvm.memmove.p1.p3.i64(ptr addrspace(1) align 4 [[TMP1]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 [[SIZE]], i1 false), !tbaa [[TBAA0]], !alias.scope [[META3]], !noalias [[META6]]
+; CHECK-NEXT:    call void @llvm.memmove.p1.p3.i64(ptr addrspace(1) align 4 [[TMP1]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 [[SIZE]], i1 false), !tbaa [[A_TBAA0]], !alias.scope [[META3]], !noalias [[META6]]
 ; CHECK-NEXT:    ret void
 ;
   %cast.src = addrspacecast ptr addrspace(3) %src.group.ptr to ptr
@@ -236,7 +236,7 @@ attributes #1 = { argmemonly nounwind }
 !7 = distinct !{!7, !5, !"some scope 2"}
 !8 = !{i64 0, i64 8, null}
 ;.
-; CHECK: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
+; CHECK: [[A_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
 ; CHECK: [[META1]] = !{!"A", [[META2:![0-9]+]]}
 ; CHECK: [[META2]] = !{!"tbaa root"}
 ; CHECK: [[META3]] = !{[[META4:![0-9]+]]}
diff --git a/llvm/test/Transforms/InstCombine/2004-09-20-BadLoadCombine2.ll b/llvm/test/Transforms/InstCombine/2004-09-20-BadLoadCombine2.ll
index f558e35..1d89dd6 100644
--- a/llvm/test/Transforms/InstCombine/2004-09-20-BadLoadCombine2.ll
+++ b/llvm/test/Transforms/InstCombine/2004-09-20-BadLoadCombine2.ll
@@ -1,25 +1,35 @@
-; RUN: opt < %s -passes=instcombine,mem2reg,simplifycfg -simplifycfg-require-and-preserve-domtree=1 | \
-; RUN:   llvm-dis | grep -v store | not grep "i32 1"
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt %s -passes=instcombine,mem2reg,simplifycfg -simplifycfg-require-and-preserve-domtree=1 -S -o - | FileCheck %s
 
 ; Test to make sure that instcombine does not accidentally propagate the load
 ; into the PHI, which would break the program.
 
 define i32 @test(i1 %C) {
+; CHECK-LABEL: define i32 @test(
+; CHECK-SAME: i1 [[C:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[X:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[X2:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    store i32 1, ptr [[X]], align 4
+; CHECK-NEXT:    store i32 2, ptr [[X2]], align 4
+; CHECK-NEXT:    [[SPEC_SELECT:%.*]] = select i1 [[C]], ptr [[X]], ptr [[X2]]
+; CHECK-NEXT:    store i32 3, ptr [[X]], align 4
+; CHECK-NEXT:    [[TMP_3:%.*]] = load i32, ptr [[SPEC_SELECT]], align 4
+; CHECK-NEXT:    ret i32 [[TMP_3]]
+;
 entry:
-        %X = alloca i32         ; <ptr> [#uses=3]
-        %X2 = alloca i32                ; <ptr> [#uses=2]
-        store i32 1, ptr %X
-        store i32 2, ptr %X2
-        br i1 %C, label %cond_true.i, label %cond_continue.i
+  %X = alloca i32         ; <ptr> [#uses=3]
+  %X2 = alloca i32                ; <ptr> [#uses=2]
+  store i32 1, ptr %X
+  store i32 2, ptr %X2
+  br i1 %C, label %cond_true.i, label %cond_continue.i
 
 cond_true.i:            ; preds = %entry
-        br label %cond_continue.i
+  br label %cond_continue.i
 
 cond_continue.i:                ; preds = %cond_true.i, %entry
-        %mem_tmp.i.0 = phi ptr [ %X, %cond_true.i ], [ %X2, %entry ]           ; <ptr> [#uses=1]
-        store i32 3, ptr %X
-        %tmp.3 = load i32, ptr %mem_tmp.i.0         ; <i32> [#uses=1]
-        ret i32 %tmp.3
+  %mem_tmp.i.0 = phi ptr [ %X, %cond_true.i ], [ %X2, %entry ]           ; <ptr> [#uses=1]
+  store i32 3, ptr %X
+  %tmp.3 = load i32, ptr %mem_tmp.i.0         ; <i32> [#uses=1]
+  ret i32 %tmp.3
 }
-
-
diff --git a/llvm/test/Transforms/InstCombine/AArch64/sme-intrinsic-opts-counting-elems.ll b/llvm/test/Transforms/InstCombine/AArch64/sme-intrinsic-opts-counting-elems.ll
index f213c0b..c1d12b8 100644
--- a/llvm/test/Transforms/InstCombine/AArch64/sme-intrinsic-opts-counting-elems.ll
+++ b/llvm/test/Transforms/InstCombine/AArch64/sme-intrinsic-opts-counting-elems.ll
@@ -5,48 +5,6 @@
 
 target triple = "aarch64-unknown-linux-gnu"
 
-define i64 @cntsb() {
-; CHECK-LABEL: @cntsb(
-; CHECK-NEXT:    [[OUT:%.*]] = call i64 @llvm.aarch64.sme.cntsb()
-; CHECK-NEXT:    ret i64 [[OUT]]
-;
-; CHECK-STREAMING-LABEL: @cntsb(
-; CHECK-STREAMING-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-STREAMING-NEXT:    [[OUT:%.*]] = shl nuw i64 [[TMP1]], 4
-; CHECK-STREAMING-NEXT:    ret i64 [[OUT]]
-;
-  %out = call i64 @llvm.aarch64.sme.cntsb()
-  ret i64 %out
-}
-
-define i64 @cntsh() {
-; CHECK-LABEL: @cntsh(
-; CHECK-NEXT:    [[OUT:%.*]] = call i64 @llvm.aarch64.sme.cntsh()
-; CHECK-NEXT:    ret i64 [[OUT]]
-;
-; CHECK-STREAMING-LABEL: @cntsh(
-; CHECK-STREAMING-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-STREAMING-NEXT:    [[OUT:%.*]] = shl nuw i64 [[TMP1]], 3
-; CHECK-STREAMING-NEXT:    ret i64 [[OUT]]
-;
-  %out = call i64 @llvm.aarch64.sme.cntsh()
-  ret i64 %out
-}
-
-define i64 @cntsw() {
-; CHECK-LABEL: @cntsw(
-; CHECK-NEXT:    [[OUT:%.*]] = call i64 @llvm.aarch64.sme.cntsw()
-; CHECK-NEXT:    ret i64 [[OUT]]
-;
-; CHECK-STREAMING-LABEL: @cntsw(
-; CHECK-STREAMING-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-STREAMING-NEXT:    [[OUT:%.*]] = shl nuw i64 [[TMP1]], 2
-; CHECK-STREAMING-NEXT:    ret i64 [[OUT]]
-;
-  %out = call i64 @llvm.aarch64.sme.cntsw()
-  ret i64 %out
-}
-
 define i64 @cntsd() {
 ; CHECK-LABEL: @cntsd(
 ; CHECK-NEXT:    [[OUT:%.*]] = call i64 @llvm.aarch64.sme.cntsd()
@@ -61,8 +19,5 @@ define i64 @cntsd() {
   ret i64 %out
 }
 
-declare i64 @llvm.aarch64.sve.cntsb()
-declare i64 @llvm.aarch64.sve.cntsh()
-declare i64 @llvm.aarch64.sve.cntsw()
 declare i64 @llvm.aarch64.sve.cntsd()
 
diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-whilelo.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-whilelo.ll
new file mode 100644
index 0000000..181a417
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-whilelo.ll
@@ -0,0 +1,102 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes=instcombine < %s | FileCheck %s
+
+target triple = "aarch64-unknown-linux-gnu"
+
+define <vscale x 2 x i1> @whilelo_nxv2i1.i32(i32 %a, i32 %b) {
+; CHECK-LABEL: define <vscale x 2 x i1> @whilelo_nxv2i1.i32(
+; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]]) {
+; CHECK-NEXT:    [[MASK:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i32(i32 [[A]], i32 [[B]])
+; CHECK-NEXT:    ret <vscale x 2 x i1> [[MASK]]
+;
+  %mask = tail call <vscale x 2 x i1> @llvm.aarch64.sve.whilelo.nxv2i1.i32(i32 %a, i32 %b)
+  ret <vscale x 2 x i1> %mask
+}
+
+define <vscale x 4 x i1> @whilelo_nxv4i1.i32(i32 %a, i32 %b) {
+; CHECK-LABEL: define <vscale x 4 x i1> @whilelo_nxv4i1.i32(
+; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]]) {
+; CHECK-NEXT:    [[MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 [[A]], i32 [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x i1> [[MASK]]
+;
+  %mask = tail call <vscale x 4 x i1> @llvm.aarch64.sve.whilelo.nxv4i1.i32(i32 %a, i32 %b)
+  ret <vscale x 4 x i1> %mask
+}
+
+define <vscale x 8 x i1> @whilelo_nxv8i1.i32(i32 %a, i32 %b) {
+; CHECK-LABEL: define <vscale x 8 x i1> @whilelo_nxv8i1.i32(
+; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]]) {
+; CHECK-NEXT:    [[MASK:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i32(i32 [[A]], i32 [[B]])
+; CHECK-NEXT:    ret <vscale x 8 x i1> [[MASK]]
+;
+  %mask = tail call <vscale x 8 x i1> @llvm.aarch64.sve.whilelo.nxv8i1.i32(i32 %a, i32 %b)
+  ret <vscale x 8 x i1> %mask
+}
+
+define <vscale x 16 x i1> @whilelo_nxv16i1.i32(i32 %a, i32 %b) {
+; CHECK-LABEL: define <vscale x 16 x i1> @whilelo_nxv16i1.i32(
+; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]]) {
+; CHECK-NEXT:    [[MASK:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i32(i32 [[A]], i32 [[B]])
+; CHECK-NEXT:    ret <vscale x 16 x i1> [[MASK]]
+;
+  %mask = tail call <vscale x 16 x i1> @llvm.aarch64.sve.whilelo.nxv16i1.i32(i32 %a, i32 %b)
+  ret <vscale x 16 x i1> %mask
+}
+
+define <vscale x 2 x i1> @whilelo_nxv2i1.i64(i64 %a, i64 %b) {
+; CHECK-LABEL: define <vscale x 2 x i1> @whilelo_nxv2i1.i64(
+; CHECK-SAME: i64 [[A:%.*]], i64 [[B:%.*]]) {
+; CHECK-NEXT:    [[MASK:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[A]], i64 [[B]])
+; CHECK-NEXT:    ret <vscale x 2 x i1> [[MASK]]
+;
+  %mask = tail call <vscale x 2 x i1> @llvm.aarch64.sve.whilelo.nxv2i1.i64(i64 %a, i64 %b)
+  ret <vscale x 2 x i1> %mask
+}
+
+define <vscale x 4 x i1> @whilelo_nxv4i1.i64(i64 %a, i64 %b) {
+; CHECK-LABEL: define <vscale x 4 x i1> @whilelo_nxv4i1.i64(
+; CHECK-SAME: i64 [[A:%.*]], i64 [[B:%.*]]) {
+; CHECK-NEXT:    [[MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[A]], i64 [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x i1> [[MASK]]
+;
+  %mask = tail call <vscale x 4 x i1> @llvm.aarch64.sve.whilelo.nxv4i1.i64(i64 %a, i64 %b)
+  ret <vscale x 4 x i1> %mask
+}
+
+define <vscale x 8 x i1> @whilelo_nxv8i1.i64(i64 %a, i64 %b) {
+; CHECK-LABEL: define <vscale x 8 x i1> @whilelo_nxv8i1.i64(
+; CHECK-SAME: i64 [[A:%.*]], i64 [[B:%.*]]) {
+; CHECK-NEXT:    [[MASK:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[A]], i64 [[B]])
+; CHECK-NEXT:    ret <vscale x 8 x i1> [[MASK]]
+;
+  %mask = tail call <vscale x 8 x i1> @llvm.aarch64.sve.whilelo.nxv8i1.i64(i64 %a, i64 %b)
+  ret <vscale x 8 x i1> %mask
+}
+
+define <vscale x 16 x i1> @whilelo_nxv16i1.i64(i64 %a, i64 %b) {
+; CHECK-LABEL: define <vscale x 16 x i1> @whilelo_nxv16i1.i64(
+; CHECK-SAME: i64 [[A:%.*]], i64 [[B:%.*]]) {
+; CHECK-NEXT:    [[MASK:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[A]], i64 [[B]])
+; CHECK-NEXT:    ret <vscale x 16 x i1> [[MASK]]
+;
+  %mask = tail call <vscale x 16 x i1> @llvm.aarch64.sve.whilelo.nxv16i1.i64(i64 %a, i64 %b)
+  ret <vscale x 16 x i1> %mask
+}
+
+define <vscale x 16 x i1> @whilelo_nxv16i1.i64_const() {
+; CHECK-LABEL: define <vscale x 16 x i1> @whilelo_nxv16i1.i64_const() {
+; CHECK-NEXT:    [[MASK:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 16)
+; CHECK-NEXT:    ret <vscale x 16 x i1> [[MASK]]
+;
+  %mask = tail call <vscale x 16 x i1> @llvm.aarch64.sve.whilelo.nxv16i1.i64(i64 0, i64 16)
+  ret <vscale x 16 x i1> %mask
+}
+
+define <vscale x 16 x i1> @whilelo_nxv16i1.i32_const() {
+; CHECK-LABEL: define <vscale x 16 x i1> @whilelo_nxv16i1.i32_const() {
+; CHECK-NEXT:    [[MASK:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i32(i32 0, i32 16)
+; CHECK-NEXT:    ret <vscale x 16 x i1> [[MASK]]
+;
+  %mask = tail call <vscale x 16 x i1> @llvm.aarch64.sve.whilelo.nxv16i1.i32(i32 0, i32 16)
+  ret <vscale x 16 x i1> %mask
+}
diff --git a/llvm/test/Transforms/InstCombine/alloca-cast-debuginfo.ll b/llvm/test/Transforms/InstCombine/alloca-cast-debuginfo.ll
index 08ce83b..ee3f230 100644
--- a/llvm/test/Transforms/InstCombine/alloca-cast-debuginfo.ll
+++ b/llvm/test/Transforms/InstCombine/alloca-cast-debuginfo.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt < %s -S -passes=instcombine -instcombine-lower-dbg-declare=0 | FileCheck %s
 
 ; In this example, instcombine wants to turn "local" into an i64, since that's
@@ -24,12 +24,13 @@ target triple = "x86_64-pc-windows-msvc19.11.25508"
 %struct.Foo = type { i32, i32 }
 
 define void @f(ptr %p) !dbg !11 {
-; CHECK-LABEL: @f(
-; CHECK-NEXT:  entry:
+; CHECK-LABEL: define void @f(
+; CHECK-SAME: ptr [[P:%.*]]) !dbg [[DBG11:![0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[LOCAL:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
 ; CHECK-NEXT:      #dbg_declare(ptr [[LOCAL]], [[META22:![0-9]+]], !DIExpression(), [[META23:![0-9]+]])
-; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr [[P:%.*]], align 8, !dbg [[DBG24:![0-9]+]], !tbaa [[TBAA25:![0-9]+]]
-; CHECK-NEXT:    store i64 [[TMP0]], ptr [[LOCAL]], align 4, !dbg [[DBG29:![0-9]+]], !tbaa [[TBAA25]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr [[P]], align 8, !dbg [[DBG24:![0-9]+]], !tbaa [[LONG_LONG_TBAA25:![0-9]+]]
+; CHECK-NEXT:    store i64 [[TMP0]], ptr [[LOCAL]], align 4, !dbg [[DBG29:![0-9]+]], !tbaa [[LONG_LONG_TBAA25]]
 ; CHECK-NEXT:    call void @escape(ptr nonnull [[LOCAL]]), !dbg [[DBG30:![0-9]+]]
 ; CHECK-NEXT:    ret void, !dbg [[DBG31:![0-9]+]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/assume-align.ll b/llvm/test/Transforms/InstCombine/assume-align.ll
index f0e0257..2746326 100644
--- a/llvm/test/Transforms/InstCombine/assume-align.ll
+++ b/llvm/test/Transforms/InstCombine/assume-align.ll
@@ -175,7 +175,6 @@ define ptr @dont_fold_assume_align_zero_of_loaded_pointer_into_align_metadata(pt
 define ptr @redundant_assume_align_1(ptr %p) {
 ; CHECK-LABEL: @redundant_assume_align_1(
 ; CHECK-NEXT:    [[P2:%.*]] = load ptr, ptr [[P:%.*]], align 8
-; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr [[P2]], i32 1) ]
 ; CHECK-NEXT:    call void @foo(ptr [[P2]])
 ; CHECK-NEXT:    ret ptr [[P2]]
 ;
@@ -189,7 +188,6 @@ define ptr @redundant_assume_align_1(ptr %p) {
 define ptr @redundant_assume_align_8_via_align_metadata(ptr %p) {
 ; CHECK-LABEL: @redundant_assume_align_8_via_align_metadata(
 ; CHECK-NEXT:    [[P2:%.*]] = load ptr, ptr [[P:%.*]], align 8, !align [[META0:![0-9]+]]
-; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr [[P2]], i32 8) ]
 ; CHECK-NEXT:    call void @foo(ptr [[P2]])
 ; CHECK-NEXT:    ret ptr [[P2]]
 ;
@@ -250,6 +248,19 @@ define ptr @redundant_assume_align_8_via_asume(ptr %p) {
 }
 
 declare void @foo(ptr)
+
+; !align must have a constant integer alignment.
+define ptr @assume_load_pointer_result(ptr %p, i64 %align) {
+; CHECK-LABEL: @assume_load_pointer_result(
+; CHECK-NEXT:    [[P2:%.*]] = load ptr, ptr [[P:%.*]], align 8
+; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr [[P2]], i64 [[ALIGN:%.*]]) ]
+; CHECK-NEXT:    ret ptr [[P2]]
+;
+  %p2 = load ptr, ptr %p
+  call void @llvm.assume(i1 true) [ "align"(ptr %p2, i64 %align) ]
+  ret ptr %p2
+}
+
 ;.
 ; CHECK: [[META0]] = !{i64 8}
 ;.
diff --git a/llvm/test/Transforms/InstCombine/fold-fadd-with-zero-gh154238.ll b/llvm/test/Transforms/InstCombine/fold-fadd-with-zero-gh154238.ll
index b9d951d..f9f0ca8 100644
--- a/llvm/test/Transforms/InstCombine/fold-fadd-with-zero-gh154238.ll
+++ b/llvm/test/Transforms/InstCombine/fold-fadd-with-zero-gh154238.ll
@@ -24,14 +24,3 @@ define float @src2(float %arg1) {
   %v4 = fsub float %v2, %v3
   ret float %v4
 }
-
-define float @src_sub(float %arg1) {
-; CHECK-LABEL: define float @src_sub(
-; CHECK-SAME: float [[ARG1:%.*]]) {
-; CHECK-NEXT:    [[V3:%.*]] = call float @llvm.fabs.f32(float [[ARG1]])
-; CHECK-NEXT:    ret float [[V3]]
-;
-  %v2 = fsub float %arg1, 0.000000e+00
-  %v3 = call float @llvm.fabs.f32(float %v2)
-  ret float %v3
-}
diff --git a/llvm/test/Transforms/InstCombine/get_active_lane_mask.ll b/llvm/test/Transforms/InstCombine/get_active_lane_mask.ll
new file mode 100644
index 0000000..c642904
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/get_active_lane_mask.ll
@@ -0,0 +1,38 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+define <vscale x 4 x i1> @rewrite_range_nxv4i1() {
+; CHECK-LABEL: define <vscale x 4 x i1> @rewrite_range_nxv4i1() {
+; CHECK-NEXT:    [[MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 3)
+; CHECK-NEXT:    ret <vscale x 4 x i1> [[MASK]]
+;
+  %mask = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 1, i32 4)
+  ret <vscale x 4 x i1> %mask
+}
+
+define <vscale x 16 x i1> @rewrite_range_nxv16i1() {
+; CHECK-LABEL: define <vscale x 16 x i1> @rewrite_range_nxv16i1() {
+; CHECK-NEXT:    [[MASK:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 7)
+; CHECK-NEXT:    ret <vscale x 16 x i1> [[MASK]]
+;
+  %mask = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 123123, i64 123130)
+  ret <vscale x 16 x i1> %mask
+}
+
+define <vscale x 16 x i1> @rewrite_range_nxv16i1_i128() {
+; CHECK-LABEL: define <vscale x 16 x i1> @rewrite_range_nxv16i1_i128() {
+; CHECK-NEXT:    [[MASK:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i128(i128 0, i128 10)
+; CHECK-NEXT:    ret <vscale x 16 x i1> [[MASK]]
+;
+  %mask = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i128(i128 18446744073709551616, i128 18446744073709551626)
+  ret <vscale x 16 x i1> %mask
+}
+
+define <vscale x 4 x i1> @bail_lhs_is_zero() {
+; CHECK-LABEL: define <vscale x 4 x i1> @bail_lhs_is_zero() {
+; CHECK-NEXT:    [[MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 4)
+; CHECK-NEXT:    ret <vscale x 4 x i1> [[MASK]]
+;
+  %mask = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 4)
+  ret <vscale x 4 x i1> %mask
+}
diff --git a/llvm/test/Transforms/InstCombine/load-no-aliasing.ll b/llvm/test/Transforms/InstCombine/load-no-aliasing.ll
index 67dfe9d..a938921 100644
--- a/llvm/test/Transforms/InstCombine/load-no-aliasing.ll
+++ b/llvm/test/Transforms/InstCombine/load-no-aliasing.ll
@@ -1,12 +1,13 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt -passes=instcombine -S < %s | FileCheck %s
 
 ; Check that load to load forwarding works with non aliasing store inbetween.
 define i32 @test_load_store_load_combine(ptr, ptr) {
-; CHECK-LABEL: @test_load_store_load_combine(
-; CHECK-NEXT:    [[A:%.*]] = load i32, ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0:![0-9]+]]
+; CHECK-LABEL: define i32 @test_load_store_load_combine(
+; CHECK-SAME: ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) {
+; CHECK-NEXT:    [[A:%.*]] = load i32, ptr [[TMP0]], align 4, !tbaa [[INT_TBAA0:![0-9]+]]
 ; CHECK-NEXT:    [[F:%.*]] = sitofp i32 [[A]] to float
-; CHECK-NEXT:    store float [[F]], ptr [[TMP1:%.*]], align 4, !tbaa [[TBAA4:![0-9]+]]
+; CHECK-NEXT:    store float [[F]], ptr [[TMP1]], align 4, !tbaa [[FLOAT_TBAA4:![0-9]+]]
 ; CHECK-NEXT:    ret i32 [[A]]
 ;
   %a = load i32, ptr %0, align 4, !tbaa !0
@@ -22,3 +23,11 @@ define i32 @test_load_store_load_combine(ptr, ptr) {
 !3 = !{!"Simple C++ TBAA"}
 !4 = !{!5, !5, i64 0}
 !5 = !{!"float", !2, i64 0}
+;.
+; CHECK: [[INT_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
+; CHECK: [[META1]] = !{!"int", [[META2:![0-9]+]], i64 0}
+; CHECK: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0}
+; CHECK: [[META3]] = !{!"Simple C++ TBAA"}
+; CHECK: [[FLOAT_TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0}
+; CHECK: [[META5]] = !{!"float", [[META2]], i64 0}
+;.
diff --git a/llvm/test/Transforms/InstCombine/loadstore-metadata.ll b/llvm/test/Transforms/InstCombine/loadstore-metadata.ll
index 859c9b8..7611299 100644
--- a/llvm/test/Transforms/InstCombine/loadstore-metadata.ll
+++ b/llvm/test/Transforms/InstCombine/loadstore-metadata.ll
@@ -1,13 +1,14 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals smart
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt -passes=instcombine -S < %s | FileCheck %s
 
 target datalayout = "e-m:e-p:64:64:64-i64:64-f80:128-n8:16:32:64-S128"
 
 define i32 @test_load_cast_combine_tbaa(ptr %ptr) {
 ; Ensure (cast (load (...))) -> (load (cast (...))) preserves TBAA.
-; CHECK-LABEL: @test_load_cast_combine_tbaa(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[L1:%.*]] = load i32, ptr [[PTR:%.*]], align 4, !tbaa [[TBAA0:![0-9]+]]
+; CHECK-LABEL: define i32 @test_load_cast_combine_tbaa(
+; CHECK-SAME: ptr [[PTR:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[L1:%.*]] = load i32, ptr [[PTR]], align 4, !tbaa [[SCALAR_TYPE_TBAA0:![0-9]+]]
 ; CHECK-NEXT:    ret i32 [[L1]]
 ;
 entry:
@@ -18,9 +19,10 @@ entry:
 
 define i32 @test_load_cast_combine_noalias(ptr %ptr) {
 ; Ensure (cast (load (...))) -> (load (cast (...))) preserves no-alias metadata.
-; CHECK-LABEL: @test_load_cast_combine_noalias(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[L1:%.*]] = load i32, ptr [[PTR:%.*]], align 4, !alias.scope [[META3:![0-9]+]], !noalias [[META3]]
+; CHECK-LABEL: define i32 @test_load_cast_combine_noalias(
+; CHECK-SAME: ptr [[PTR:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[L1:%.*]] = load i32, ptr [[PTR]], align 4, !alias.scope [[META3:![0-9]+]], !noalias [[META3]]
 ; CHECK-NEXT:    ret i32 [[L1]]
 ;
 entry:
@@ -33,9 +35,10 @@ define float @test_load_cast_combine_range(ptr %ptr) {
 ; Ensure (cast (load (...))) -> (load (cast (...))) drops range metadata. It
 ; would be nice to preserve or update it somehow but this is hard when moving
 ; between types.
-; CHECK-LABEL: @test_load_cast_combine_range(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[L1:%.*]] = load float, ptr [[PTR:%.*]], align 4
+; CHECK-LABEL: define float @test_load_cast_combine_range(
+; CHECK-SAME: ptr [[PTR:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[L1:%.*]] = load float, ptr [[PTR]], align 4
 ; CHECK-NEXT:    ret float [[L1]]
 ;
 entry:
@@ -46,9 +49,10 @@ entry:
 
 define i32 @test_load_cast_combine_invariant(ptr %ptr) {
 ; Ensure (cast (load (...))) -> (load (cast (...))) preserves invariant metadata.
-; CHECK-LABEL: @test_load_cast_combine_invariant(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[L1:%.*]] = load i32, ptr [[PTR:%.*]], align 4, !invariant.load [[META6:![0-9]+]]
+; CHECK-LABEL: define i32 @test_load_cast_combine_invariant(
+; CHECK-SAME: ptr [[PTR:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[L1:%.*]] = load i32, ptr [[PTR]], align 4, !invariant.load [[META6:![0-9]+]]
 ; CHECK-NEXT:    ret i32 [[L1]]
 ;
 entry:
@@ -60,9 +64,10 @@ entry:
 define i32 @test_load_cast_combine_nontemporal(ptr %ptr) {
 ; Ensure (cast (load (...))) -> (load (cast (...))) preserves nontemporal
 ; metadata.
-; CHECK-LABEL: @test_load_cast_combine_nontemporal(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[L1:%.*]] = load i32, ptr [[PTR:%.*]], align 4, !nontemporal [[META7:![0-9]+]]
+; CHECK-LABEL: define i32 @test_load_cast_combine_nontemporal(
+; CHECK-SAME: ptr [[PTR:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[L1:%.*]] = load i32, ptr [[PTR]], align 4, !nontemporal [[META7:![0-9]+]]
 ; CHECK-NEXT:    ret i32 [[L1]]
 ;
 entry:
@@ -74,9 +79,10 @@ entry:
 define ptr @test_load_cast_combine_align(ptr %ptr) {
 ; Ensure (cast (load (...))) -> (load (cast (...))) preserves align
 ; metadata.
-; CHECK-LABEL: @test_load_cast_combine_align(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[L:%.*]] = load ptr, ptr [[PTR:%.*]], align 8, !align [[META8:![0-9]+]]
+; CHECK-LABEL: define ptr @test_load_cast_combine_align(
+; CHECK-SAME: ptr [[PTR:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[L:%.*]] = load ptr, ptr [[PTR]], align 8, !align [[META8:![0-9]+]]
 ; CHECK-NEXT:    ret ptr [[L]]
 ;
 entry:
@@ -87,9 +93,10 @@ entry:
 define ptr @test_load_cast_combine_deref(ptr %ptr) {
 ; Ensure (cast (load (...))) -> (load (cast (...))) preserves dereferenceable
 ; metadata.
-; CHECK-LABEL: @test_load_cast_combine_deref(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[L:%.*]] = load ptr, ptr [[PTR:%.*]], align 8, !dereferenceable [[META8]]
+; CHECK-LABEL: define ptr @test_load_cast_combine_deref(
+; CHECK-SAME: ptr [[PTR:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[L:%.*]] = load ptr, ptr [[PTR]], align 8, !dereferenceable [[META8]]
 ; CHECK-NEXT:    ret ptr [[L]]
 ;
 entry:
@@ -100,9 +107,10 @@ entry:
 define ptr @test_load_cast_combine_deref_or_null(ptr %ptr) {
 ; Ensure (cast (load (...))) -> (load (cast (...))) preserves
 ; dereferenceable_or_null metadata.
-; CHECK-LABEL: @test_load_cast_combine_deref_or_null(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[L:%.*]] = load ptr, ptr [[PTR:%.*]], align 8, !dereferenceable_or_null [[META8]]
+; CHECK-LABEL: define ptr @test_load_cast_combine_deref_or_null(
+; CHECK-SAME: ptr [[PTR:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[L:%.*]] = load ptr, ptr [[PTR]], align 8, !dereferenceable_or_null [[META8]]
 ; CHECK-NEXT:    ret ptr [[L]]
 ;
 entry:
@@ -113,21 +121,22 @@ entry:
 define void @test_load_cast_combine_loop(ptr %src, ptr %dst, i32 %n) {
 ; Ensure (cast (load (...))) -> (load (cast (...))) preserves loop access
 ; metadata.
-; CHECK-LABEL: @test_load_cast_combine_loop(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br label [[LOOP:%.*]]
-; CHECK:       loop:
-; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[LOOP]] ]
+; CHECK-LABEL: define void @test_load_cast_combine_loop(
+; CHECK-SAME: ptr [[SRC:%.*]], ptr [[DST:%.*]], i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[LOOP]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = sext i32 [[I]] to i64
-; CHECK-NEXT:    [[SRC_GEP:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i64 [[TMP0]]
+; CHECK-NEXT:    [[SRC_GEP:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = sext i32 [[I]] to i64
-; CHECK-NEXT:    [[DST_GEP:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[TMP1]]
+; CHECK-NEXT:    [[DST_GEP:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP1]]
 ; CHECK-NEXT:    [[L1:%.*]] = load i32, ptr [[SRC_GEP]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]]
 ; CHECK-NEXT:    store i32 [[L1]], ptr [[DST_GEP]], align 4
 ; CHECK-NEXT:    [[I_NEXT]] = add i32 [[I]], 1
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I_NEXT]], [[N:%.*]]
-; CHECK-NEXT:    br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]], !llvm.loop [[LOOP1:![0-9]+]]
-; CHECK:       exit:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]], !llvm.loop [[LOOP1:![0-9]+]]
+; CHECK:       [[EXIT]]:
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -149,9 +158,10 @@ exit:
 }
 
 define void @test_load_cast_combine_nonnull(ptr %ptr) {
-; CHECK-LABEL: @test_load_cast_combine_nonnull(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[P:%.*]] = load ptr, ptr [[PTR:%.*]], align 8, !nonnull [[META6]]
+; CHECK-LABEL: define void @test_load_cast_combine_nonnull(
+; CHECK-SAME: ptr [[PTR:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[P:%.*]] = load ptr, ptr [[PTR]], align 8, !nonnull [[META6]]
 ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i8, ptr [[PTR]], i64 336
 ; CHECK-NEXT:    store ptr [[P]], ptr [[GEP]], align 8
 ; CHECK-NEXT:    ret void
@@ -164,8 +174,9 @@ entry:
 }
 
 define i32 @test_load_cast_combine_noundef(ptr %ptr) {
-; CHECK-LABEL: @test_load_cast_combine_noundef(
-; CHECK-NEXT:    [[L1:%.*]] = load i32, ptr [[PTR:%.*]], align 4, !noundef [[META6]]
+; CHECK-LABEL: define i32 @test_load_cast_combine_noundef(
+; CHECK-SAME: ptr [[PTR:%.*]]) {
+; CHECK-NEXT:    [[L1:%.*]] = load i32, ptr [[PTR]], align 4, !noundef [[META6]]
 ; CHECK-NEXT:    ret i32 [[L1]]
 ;
   %l = load float, ptr %ptr, !noundef !{}
@@ -175,9 +186,10 @@ define i32 @test_load_cast_combine_noundef(ptr %ptr) {
 
 define i32 @test_load_cast_combine_noalias_addrspace(ptr %ptr) {
 ; Ensure (cast (load (...))) -> (load (cast (...))) preserves TBAA.
-; CHECK-LABEL: @test_load_cast_combine_noalias_addrspace(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[L1:%.*]] = load i32, ptr [[PTR:%.*]], align 4, !noalias.addrspace [[META10:![0-9]+]]
+; CHECK-LABEL: define i32 @test_load_cast_combine_noalias_addrspace(
+; CHECK-SAME: ptr [[PTR:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[L1:%.*]] = load i32, ptr [[PTR]], align 4, !noalias.addrspace [[META10:![0-9]+]]
 ; CHECK-NEXT:    ret i32 [[L1]]
 ;
 entry:
@@ -188,11 +200,12 @@ entry:
 
 ; Preserve none-UB metadata on loads.
 define ptr @preserve_load_metadata_after_select_transform1(i1 %c, ptr dereferenceable(8) %a, ptr dereferenceable(8) %b) {
-; CHECK-LABEL: @preserve_load_metadata_after_select_transform1(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[B_VAL:%.*]] = load ptr, ptr [[B:%.*]], align 1, !nonnull [[META6]], !align [[META8]]
-; CHECK-NEXT:    [[A_VAL:%.*]] = load ptr, ptr [[A:%.*]], align 1, !nonnull [[META6]], !align [[META8]]
-; CHECK-NEXT:    [[L_SEL:%.*]] = select i1 [[C:%.*]], ptr [[B_VAL]], ptr [[A_VAL]]
+; CHECK-LABEL: define ptr @preserve_load_metadata_after_select_transform1(
+; CHECK-SAME: i1 [[C:%.*]], ptr dereferenceable(8) [[A:%.*]], ptr dereferenceable(8) [[B:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[B_VAL:%.*]] = load ptr, ptr [[B]], align 1, !nonnull [[META6]], !align [[META8]]
+; CHECK-NEXT:    [[A_VAL:%.*]] = load ptr, ptr [[A]], align 1, !nonnull [[META6]], !align [[META8]]
+; CHECK-NEXT:    [[L_SEL:%.*]] = select i1 [[C]], ptr [[B_VAL]], ptr [[A_VAL]]
 ; CHECK-NEXT:    ret ptr [[L_SEL]]
 ;
 entry:
@@ -203,11 +216,12 @@ entry:
 
 ; Preserve none-UB metadata on loads.
 define i32 @preserve_load_metadata_after_select_transform_range(i1 %c, ptr dereferenceable(8) %a, ptr dereferenceable(8) %b) {
-; CHECK-LABEL: @preserve_load_metadata_after_select_transform_range(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[B_VAL:%.*]] = load i32, ptr [[B:%.*]], align 1, !range [[RNG10:![0-9]+]]
-; CHECK-NEXT:    [[A_VAL:%.*]] = load i32, ptr [[A:%.*]], align 1, !range [[RNG10]]
-; CHECK-NEXT:    [[L_SEL:%.*]] = select i1 [[C:%.*]], i32 [[B_VAL]], i32 [[A_VAL]]
+; CHECK-LABEL: define i32 @preserve_load_metadata_after_select_transform_range(
+; CHECK-SAME: i1 [[C:%.*]], ptr dereferenceable(8) [[A:%.*]], ptr dereferenceable(8) [[B:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[B_VAL:%.*]] = load i32, ptr [[B]], align 1, !range [[RNG11:![0-9]+]]
+; CHECK-NEXT:    [[A_VAL:%.*]] = load i32, ptr [[A]], align 1, !range [[RNG11]]
+; CHECK-NEXT:    [[L_SEL:%.*]] = select i1 [[C]], i32 [[B_VAL]], i32 [[A_VAL]]
 ; CHECK-NEXT:    ret i32 [[L_SEL]]
 ;
 entry:
@@ -217,10 +231,11 @@ entry:
 }
 
 define double @preserve_load_metadata_after_select_transform2(ptr %a, ptr %b) {
-; CHECK-LABEL: @preserve_load_metadata_after_select_transform2(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[L_A:%.*]] = load double, ptr [[A:%.*]], align 8, !tbaa [[TBAA0]], !llvm.access.group [[META6]]
-; CHECK-NEXT:    [[L_B:%.*]] = load double, ptr [[B:%.*]], align 8, !tbaa [[TBAA0]], !llvm.access.group [[META6]]
+; CHECK-LABEL: define double @preserve_load_metadata_after_select_transform2(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[L_A:%.*]] = load double, ptr [[A]], align 8, !tbaa [[SCALAR_TYPE_TBAA0]], !llvm.access.group [[META6]]
+; CHECK-NEXT:    [[L_B:%.*]] = load double, ptr [[B]], align 8, !tbaa [[SCALAR_TYPE_TBAA0]], !llvm.access.group [[META6]]
 ; CHECK-NEXT:    [[CMP_I:%.*]] = fcmp fast olt double [[L_A]], [[L_B]]
 ; CHECK-NEXT:    [[L_SEL:%.*]] = select i1 [[CMP_I]], double [[L_B]], double [[L_A]]
 ; CHECK-NEXT:    ret double [[L_SEL]]
@@ -235,10 +250,11 @@ entry:
 }
 
 define double @preserve_load_metadata_after_select_transform_metadata_missing_1(ptr %a, ptr %b) {
-; CHECK-LABEL: @preserve_load_metadata_after_select_transform_metadata_missing_1(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[L_A:%.*]] = load double, ptr [[A:%.*]], align 8, !llvm.access.group [[META6]]
-; CHECK-NEXT:    [[L_B:%.*]] = load double, ptr [[B:%.*]], align 8, !tbaa [[TBAA0]], !llvm.access.group [[META6]]
+; CHECK-LABEL: define double @preserve_load_metadata_after_select_transform_metadata_missing_1(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[L_A:%.*]] = load double, ptr [[A]], align 8, !llvm.access.group [[META6]]
+; CHECK-NEXT:    [[L_B:%.*]] = load double, ptr [[B]], align 8, !tbaa [[SCALAR_TYPE_TBAA0]], !llvm.access.group [[META6]]
 ; CHECK-NEXT:    [[CMP_I:%.*]] = fcmp fast olt double [[L_A]], [[L_B]]
 ; CHECK-NEXT:    [[L_SEL:%.*]] = select i1 [[CMP_I]], double [[L_B]], double [[L_A]]
 ; CHECK-NEXT:    ret double [[L_SEL]]
@@ -253,10 +269,11 @@ entry:
 }
 
 define double @preserve_load_metadata_after_select_transform_metadata_missing_2(ptr %a, ptr %b) {
-; CHECK-LABEL: @preserve_load_metadata_after_select_transform_metadata_missing_2(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[L_A:%.*]] = load double, ptr [[A:%.*]], align 8, !llvm.access.group [[META6]]
-; CHECK-NEXT:    [[L_B:%.*]] = load double, ptr [[B:%.*]], align 8, !llvm.access.group [[META6]]
+; CHECK-LABEL: define double @preserve_load_metadata_after_select_transform_metadata_missing_2(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[L_A:%.*]] = load double, ptr [[A]], align 8, !llvm.access.group [[META6]]
+; CHECK-NEXT:    [[L_B:%.*]] = load double, ptr [[B]], align 8, !llvm.access.group [[META6]]
 ; CHECK-NEXT:    [[CMP_I:%.*]] = fcmp fast olt double [[L_A]], [[L_B]]
 ; CHECK-NEXT:    [[L_SEL:%.*]] = select i1 [[CMP_I]], double [[L_B]], double [[L_A]]
 ; CHECK-NEXT:    ret double [[L_SEL]]
@@ -271,10 +288,11 @@ entry:
 }
 
 define double @preserve_load_metadata_after_select_transform_metadata_missing_3(ptr %a, ptr %b) {
-; CHECK-LABEL: @preserve_load_metadata_after_select_transform_metadata_missing_3(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[L_A:%.*]] = load double, ptr [[A:%.*]], align 8, !tbaa [[TBAA0]], !llvm.access.group [[META6]]
-; CHECK-NEXT:    [[L_B:%.*]] = load double, ptr [[B:%.*]], align 8, !tbaa [[TBAA0]], !llvm.access.group [[META6]]
+; CHECK-LABEL: define double @preserve_load_metadata_after_select_transform_metadata_missing_3(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[L_A:%.*]] = load double, ptr [[A]], align 8, !tbaa [[SCALAR_TYPE_TBAA0]], !llvm.access.group [[META6]]
+; CHECK-NEXT:    [[L_B:%.*]] = load double, ptr [[B]], align 8, !tbaa [[SCALAR_TYPE_TBAA0]], !llvm.access.group [[META6]]
 ; CHECK-NEXT:    [[CMP_I:%.*]] = fcmp fast olt double [[L_A]], [[L_B]]
 ; CHECK-NEXT:    [[L_SEL:%.*]] = select i1 [[CMP_I]], double [[L_B]], double [[L_A]]
 ; CHECK-NEXT:    ret double [[L_SEL]]
@@ -291,10 +309,11 @@ entry:
 ; Like preserve_load_metadata_after_select_transform_metadata_missing_3, but
 ; with different access groups on all loads.
 define double @preserve_load_metadata_after_select_transform_metadata_missing_4(ptr %a, ptr %b) {
-; CHECK-LABEL: @preserve_load_metadata_after_select_transform_metadata_missing_4(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[L_A:%.*]] = load double, ptr [[A:%.*]], align 8, !tbaa [[TBAA0]], !alias.scope [[META3]], !noalias [[META3]], !llvm.access.group [[META6]]
-; CHECK-NEXT:    [[L_B:%.*]] = load double, ptr [[B:%.*]], align 8, !tbaa [[TBAA0]], !alias.scope [[META11:![0-9]+]], !noalias [[META11]], !llvm.access.group [[ACC_GRP14:![0-9]+]]
+; CHECK-LABEL: define double @preserve_load_metadata_after_select_transform_metadata_missing_4(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[L_A:%.*]] = load double, ptr [[A]], align 8, !tbaa [[SCALAR_TYPE_TBAA0]], !alias.scope [[META3]], !noalias [[META3]], !llvm.access.group [[META6]]
+; CHECK-NEXT:    [[L_B:%.*]] = load double, ptr [[B]], align 8, !tbaa [[SCALAR_TYPE_TBAA0]], !alias.scope [[META12:![0-9]+]], !noalias [[META12]], !llvm.access.group [[ACC_GRP15:![0-9]+]]
 ; CHECK-NEXT:    [[CMP_I:%.*]] = fcmp fast olt double [[L_A]], [[L_B]]
 ; CHECK-NEXT:    [[L_SEL:%.*]] = select i1 [[CMP_I]], double [[L_B]], double [[L_A]]
 ; CHECK-NEXT:    ret double [[L_SEL]]
@@ -327,7 +346,7 @@ entry:
 !16 = distinct !{!16}
 
 ;.
-; CHECK: [[TBAA0]] = !{[[LOOP1]], [[LOOP1]], i64 0}
+; CHECK: [[SCALAR_TYPE_TBAA0]] = !{[[LOOP1]], [[LOOP1]], i64 0}
 ; CHECK: [[LOOP1]] = !{!"scalar type", [[META2:![0-9]+]]}
 ; CHECK: [[META2]] = !{!"root"}
 ; CHECK: [[META3]] = !{[[META4:![0-9]+]]}
@@ -337,9 +356,10 @@ entry:
 ; CHECK: [[META7]] = !{i32 1}
 ; CHECK: [[META8]] = !{i64 8}
 ; CHECK: [[ACC_GRP9]] = distinct !{}
-; CHECK: [[RNG10]] = !{i32 0, i32 42}
-; CHECK: [[META11]] = !{[[META12:![0-9]+]]}
-; CHECK: [[META12]] = distinct !{[[META12]], [[META13:![0-9]+]]}
-; CHECK: [[META13]] = distinct !{[[META13]]}
-; CHECK: [[ACC_GRP14]] = distinct !{}
+; CHECK: [[META10]] = !{i32 5, i32 6}
+; CHECK: [[RNG11]] = !{i32 0, i32 42}
+; CHECK: [[META12]] = !{[[META13:![0-9]+]]}
+; CHECK: [[META13]] = distinct !{[[META13]], [[META14:![0-9]+]]}
+; CHECK: [[META14]] = distinct !{[[META14]]}
+; CHECK: [[ACC_GRP15]] = distinct !{}
 ;.
diff --git a/llvm/test/Transforms/InstCombine/masked_intrinsics_keep_metadata.ll b/llvm/test/Transforms/InstCombine/masked_intrinsics_keep_metadata.ll
index 1a57110..0832561 100644
--- a/llvm/test/Transforms/InstCombine/masked_intrinsics_keep_metadata.ll
+++ b/llvm/test/Transforms/InstCombine/masked_intrinsics_keep_metadata.ll
@@ -1,12 +1,13 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt -S -passes=instcombine < %s | FileCheck %s
 
 @g0 = global <4 x i32> zeroinitializer, align 16
 
 define inreg <4 x i32> @mload1(ptr nocapture readonly %a0) #0 {
-; CHECK-LABEL: @mload1(
-; CHECK-NEXT:  b0:
-; CHECK-NEXT:    [[UNMASKEDLOAD:%.*]] = load <4 x i32>, ptr [[A0:%.*]], align 16, !tbaa [[TBAA0:![0-9]+]]
+; CHECK-LABEL: define inreg <4 x i32> @mload1(
+; CHECK-SAME: ptr readonly captures(none) [[A0:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[B0:.*:]]
+; CHECK-NEXT:    [[UNMASKEDLOAD:%.*]] = load <4 x i32>, ptr [[A0]], align 16, !tbaa [[CHAR_TBAA0:![0-9]+]]
 ; CHECK-NEXT:    ret <4 x i32> [[UNMASKEDLOAD]]
 ;
 b0:
@@ -15,9 +16,10 @@ b0:
 }
 
 define inreg <4 x i32> @mload2() #0 {
-; CHECK-LABEL: @mload2(
-; CHECK-NEXT:  b0:
-; CHECK-NEXT:    [[UNMASKEDLOAD:%.*]] = load <4 x i32>, ptr @g0, align 16, !tbaa [[TBAA0]]
+; CHECK-LABEL: define inreg <4 x i32> @mload2(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT:  [[B0:.*:]]
+; CHECK-NEXT:    [[UNMASKEDLOAD:%.*]] = load <4 x i32>, ptr @g0, align 16, !tbaa [[CHAR_TBAA0]]
 ; CHECK-NEXT:    [[V01:%.*]] = insertelement <4 x i32> [[UNMASKEDLOAD]], i32 0, i64 0
 ; CHECK-NEXT:    ret <4 x i32> [[V01]]
 ;
@@ -27,9 +29,10 @@ b0:
 }
 
 define void @mstore(<4 x i32> %a0, ptr nocapture readonly %a1) #0 {
-; CHECK-LABEL: @mstore(
-; CHECK-NEXT:  b0:
-; CHECK-NEXT:    store <4 x i32> [[A0:%.*]], ptr [[A1:%.*]], align 16, !tbaa [[TBAA0]]
+; CHECK-LABEL: define void @mstore(
+; CHECK-SAME: <4 x i32> [[A0:%.*]], ptr readonly captures(none) [[A1:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[B0:.*:]]
+; CHECK-NEXT:    store <4 x i32> [[A0]], ptr [[A1]], align 16, !tbaa [[CHAR_TBAA0]]
 ; CHECK-NEXT:    ret void
 ;
 b0:
@@ -46,3 +49,8 @@ declare void @llvm.masked.store.v4i1.p0(<4 x i32>, ptr, i32, <4 x i1>)
 !1 = !{!"omnipotent char", !2, i64 0}
 !2 = !{!"Simple C/C++ TBAA"}
 
+;.
+; CHECK: [[CHAR_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
+; CHECK: [[META1]] = !{!"omnipotent char", [[META2:![0-9]+]], i64 0}
+; CHECK: [[META2]] = !{!"Simple C/C++ TBAA"}
+;.
diff --git a/llvm/test/Transforms/InstCombine/struct-assign-tbaa.ll b/llvm/test/Transforms/InstCombine/struct-assign-tbaa.ll
index e96452a..6fc29bc 100644
--- a/llvm/test/Transforms/InstCombine/struct-assign-tbaa.ll
+++ b/llvm/test/Transforms/InstCombine/struct-assign-tbaa.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 6
 ; RUN: opt -passes=instcombine -S < %s | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
@@ -11,10 +11,11 @@ declare void @llvm.memcpy.p0.p0.i64(ptr nocapture, ptr nocapture, i64, i1) nounw
 %struct.test1 = type { float }
 
 define void @test1(ptr nocapture %a, ptr nocapture %b) {
-; CHECK-LABEL: @test1(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[B:%.*]], align 4, !tbaa [[TBAA0:![0-9]+]]
-; CHECK-NEXT:    store i32 [[TMP0]], ptr [[A:%.*]], align 4, !tbaa [[TBAA0]]
+; CHECK-LABEL: define void @test1(
+; CHECK-SAME: ptr captures(none) [[A:%.*]], ptr captures(none) [[B:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[B]], align 4, !tbaa [[FLOAT_TBAA0:![0-9]+]]
+; CHECK-NEXT:    store i32 [[TMP0]], ptr [[A]], align 4, !tbaa [[FLOAT_TBAA0]]
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -25,7 +26,7 @@ entry:
 %struct.test2 = type { ptr }
 
 define ptr @test2() {
-; CHECK-LABEL: @test2(
+; CHECK-LABEL: define ptr @test2() {
 ; CHECK-NEXT:    store i1 true, ptr poison, align 1
 ; CHECK-NEXT:    ret ptr poison
 ;
@@ -36,10 +37,11 @@ define ptr @test2() {
 }
 
 define void @test3_multiple_fields(ptr nocapture %a, ptr nocapture %b) {
-; CHECK-LABEL: @test3_multiple_fields(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr [[B:%.*]], align 4
-; CHECK-NEXT:    store i64 [[TMP0]], ptr [[A:%.*]], align 4
+; CHECK-LABEL: define void @test3_multiple_fields(
+; CHECK-SAME: ptr captures(none) [[A:%.*]], ptr captures(none) [[B:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr [[B]], align 4
+; CHECK-NEXT:    store i64 [[TMP0]], ptr [[A]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -48,10 +50,11 @@ entry:
 }
 
 define void @test4_multiple_copy_first_field(ptr nocapture %a, ptr nocapture %b) {
-; CHECK-LABEL: @test4_multiple_copy_first_field(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[B:%.*]], align 4, !tbaa [[TBAA0]]
-; CHECK-NEXT:    store i32 [[TMP0]], ptr [[A:%.*]], align 4, !tbaa [[TBAA0]]
+; CHECK-LABEL: define void @test4_multiple_copy_first_field(
+; CHECK-SAME: ptr captures(none) [[A:%.*]], ptr captures(none) [[B:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[B]], align 4, !tbaa [[FLOAT_TBAA0]]
+; CHECK-NEXT:    store i32 [[TMP0]], ptr [[A]], align 4, !tbaa [[FLOAT_TBAA0]]
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -60,10 +63,11 @@ entry:
 }
 
 define void @test5_multiple_copy_more_than_first_field(ptr nocapture %a, ptr nocapture %b) {
-; CHECK-LABEL: @test5_multiple_copy_more_than_first_field(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[B:%.*]], align 4
-; CHECK-NEXT:    store i32 [[TMP0]], ptr [[A:%.*]], align 4
+; CHECK-LABEL: define void @test5_multiple_copy_more_than_first_field(
+; CHECK-SAME: ptr captures(none) [[A:%.*]], ptr captures(none) [[B:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[B]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], ptr [[A]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -83,7 +87,7 @@ entry:
 ;.
 ; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
 ;.
-; CHECK: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
+; CHECK: [[FLOAT_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
 ; CHECK: [[META1]] = !{!"float", [[META2:![0-9]+]]}
 ; CHECK: [[META2]] = !{!"Simple C/C++ TBAA"}
 ;.
diff --git a/llvm/test/Transforms/InstCombine/trunc-lshr.ll b/llvm/test/Transforms/InstCombine/trunc-lshr.ll
index c443b35..0e996e5 100644
--- a/llvm/test/Transforms/InstCombine/trunc-lshr.ll
+++ b/llvm/test/Transforms/InstCombine/trunc-lshr.ll
@@ -219,3 +219,77 @@ define i1 @negative_test_fold_ashr(i8 %x) {
   %trunc = trunc i8 %ashr to i1
   ret i1 %trunc
 }
+
+define i1 @fold_lshr_negated_power_of_2(i8 %x) {
+; CHECK-LABEL: define i1 @fold_lshr_negated_power_of_2(
+; CHECK-SAME: i8 [[X:%.*]]) {
+; CHECK-NEXT:    [[TRUNC:%.*]] = icmp ugt i8 [[X]], 3
+; CHECK-NEXT:    ret i1 [[TRUNC]]
+;
+  %lshr = lshr i8 -16, %x
+  %trunc = trunc i8 %lshr to i1
+  ret i1 %trunc
+}
+
+define i1 @fold_ashr_negated_power_of_2(i8 %x) {
+; CHECK-LABEL: define i1 @fold_ashr_negated_power_of_2(
+; CHECK-SAME: i8 [[X:%.*]]) {
+; CHECK-NEXT:    [[TRUNC:%.*]] = icmp ugt i8 [[X]], 3
+; CHECK-NEXT:    ret i1 [[TRUNC]]
+;
+  %ashr = ashr i8 -16, %x
+  %trunc = trunc i8 %ashr to i1
+  ret i1 %trunc
+}
+
+define i1 @fold_lshr_negated_power_of_2_multi_use(i8 %x) {
+; CHECK-LABEL: define i1 @fold_lshr_negated_power_of_2_multi_use(
+; CHECK-SAME: i8 [[X:%.*]]) {
+; CHECK-NEXT:    [[LSHR:%.*]] = lshr i8 -16, [[X]]
+; CHECK-NEXT:    call void @use(i8 [[LSHR]])
+; CHECK-NEXT:    [[TRUNC:%.*]] = icmp ugt i8 [[X]], 3
+; CHECK-NEXT:    ret i1 [[TRUNC]]
+;
+  %lshr = lshr i8 -16, %x
+  call void @use(i8 %lshr)
+  %trunc = trunc i8 %lshr to i1
+  ret i1 %trunc
+}
+
+define i1 @fold_ashr_negated_power_of_2_multi_use(i8 %x) {
+; CHECK-LABEL: define i1 @fold_ashr_negated_power_of_2_multi_use(
+; CHECK-SAME: i8 [[X:%.*]]) {
+; CHECK-NEXT:    [[ASHR:%.*]] = ashr i8 -16, [[X]]
+; CHECK-NEXT:    call void @use(i8 [[ASHR]])
+; CHECK-NEXT:    [[TRUNC:%.*]] = icmp ugt i8 [[X]], 3
+; CHECK-NEXT:    ret i1 [[TRUNC]]
+;
+  %ashr = ashr i8 -16, %x
+  call void @use(i8 %ashr)
+  %trunc = trunc i8 %ashr to i1
+  ret i1 %trunc
+}
+
+define i1 @negative_test_fold_lshr_negated_power_of_2(i8 %x) {
+; CHECK-LABEL: define i1 @negative_test_fold_lshr_negated_power_of_2(
+; CHECK-SAME: i8 [[X:%.*]]) {
+; CHECK-NEXT:    [[LSHR:%.*]] = lshr i8 -17, [[X]]
+; CHECK-NEXT:    [[TRUNC:%.*]] = trunc i8 [[LSHR]] to i1
+; CHECK-NEXT:    ret i1 [[TRUNC]]
+;
+  %lshr = lshr i8 -17, %x
+  %trunc = trunc i8 %lshr to i1
+  ret i1 %trunc
+}
+
+define i1 @negative_test_fold_ashr_negated_power_of_2(i8 %x) {
+; CHECK-LABEL: define i1 @negative_test_fold_ashr_negated_power_of_2(
+; CHECK-SAME: i8 [[X:%.*]]) {
+; CHECK-NEXT:    [[ASHR1:%.*]] = lshr i8 -17, [[X]]
+; CHECK-NEXT:    [[TRUNC:%.*]] = trunc i8 [[ASHR1]] to i1
+; CHECK-NEXT:    ret i1 [[TRUNC]]
+;
+  %ashr = ashr i8 -17, %x
+  %trunc = trunc i8 %ashr to i1
+  ret i1 %trunc
+}
diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/active-lane-mask.ll b/llvm/test/Transforms/InstSimplify/ConstProp/active-lane-mask.ll
index 9de0c59..e9d9ac0 100644
--- a/llvm/test/Transforms/InstSimplify/ConstProp/active-lane-mask.ll
+++ b/llvm/test/Transforms/InstSimplify/ConstProp/active-lane-mask.ll
@@ -354,8 +354,7 @@ entry:
 define <vscale x 16 x i1> @nxv16i1_constexpr_0() {
 ; CHECK-LABEL: @nxv16i1_constexpr_0(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[MASK:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 ptrtoint (ptr @glob to i64), i64 0)
-; CHECK-NEXT:    ret <vscale x 16 x i1> [[MASK]]
+; CHECK-NEXT:    ret <vscale x 16 x i1> zeroinitializer
 ;
 entry:
   %mask = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 ptrtoint (ptr @glob to i64), i64 0)
diff --git a/llvm/test/Transforms/InstSimplify/get_active_lane_mask.ll b/llvm/test/Transforms/InstSimplify/get_active_lane_mask.ll
new file mode 100644
index 0000000..a3b8e4e
--- /dev/null
+++ b/llvm/test/Transforms/InstSimplify/get_active_lane_mask.ll
@@ -0,0 +1,20 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes=instsimplify,verify -S | FileCheck %s
+
+define <4 x i1> @foo_v4i1(i32 %a) {
+; CHECK-LABEL: define <4 x i1> @foo_v4i1(
+; CHECK-SAME: i32 [[A:%.*]]) {
+; CHECK-NEXT:    ret <4 x i1> zeroinitializer
+;
+  %mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1(i32 %a, i32 0)
+  ret <4 x i1> %mask
+}
+
+define <vscale x 8 x i1> @foo_nxv8i1(i32 %a) {
+; CHECK-LABEL: define <vscale x 8 x i1> @foo_nxv8i1(
+; CHECK-SAME: i32 [[A:%.*]]) {
+; CHECK-NEXT:    ret <vscale x 8 x i1> zeroinitializer
+;
+  %mask = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1(i32 %a, i32 0)
+  ret <vscale x 8 x i1> %mask
+}
diff --git a/llvm/test/Transforms/JumpThreading/ddt-crash3.ll b/llvm/test/Transforms/JumpThreading/ddt-crash3.ll
index b37987b..edaade32 100644
--- a/llvm/test/Transforms/JumpThreading/ddt-crash3.ll
+++ b/llvm/test/Transforms/JumpThreading/ddt-crash3.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt -S -passes=jump-threading -verify-dom-info < %s | FileCheck %s
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
@@ -10,19 +10,20 @@ target triple = "x86_64-unknown-linux-gnu"
 
 ; Function Attrs: norecurse noreturn nounwind uwtable
 define void @hoge() local_unnamed_addr #0 {
-; CHECK-LABEL: @hoge(
-; CHECK-NEXT:  bb:
-; CHECK-NEXT:    br label [[BB1:%.*]]
-; CHECK:       bb1:
-; CHECK-NEXT:    [[TMP:%.*]] = load i64, ptr @global, align 8, !tbaa [[TBAA1:![0-9]+]]
+; CHECK-LABEL: define void @hoge(
+; CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[BB:.*:]]
+; CHECK-NEXT:    br label %[[BB1:.*]]
+; CHECK:       [[BB1]]:
+; CHECK-NEXT:    [[TMP:%.*]] = load i64, ptr @global, align 8, !tbaa [[LONG_TBAA1:![0-9]+]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[TMP]], 0
-; CHECK-NEXT:    br i1 [[TMP2]], label [[BB26:%.*]], label [[BB3:%.*]]
-; CHECK:       bb3:
-; CHECK-NEXT:    [[TMP4:%.*]] = load i64, ptr @global.1, align 8, !tbaa [[TBAA1]]
+; CHECK-NEXT:    br i1 [[TMP2]], label %[[BB26:.*]], label %[[BB3:.*]]
+; CHECK:       [[BB3]]:
+; CHECK-NEXT:    [[TMP4:%.*]] = load i64, ptr @global.1, align 8, !tbaa [[LONG_TBAA1]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i64 [[TMP4]], 0
-; CHECK-NEXT:    br i1 [[TMP5]], label [[BB26]], label [[BB26]]
-; CHECK:       bb26:
-; CHECK-NEXT:    br label [[BB1]]
+; CHECK-NEXT:    br i1 [[TMP5]], label %[[BB26]], label %[[BB26]]
+; CHECK:       [[BB26]]:
+; CHECK-NEXT:    br label %[[BB1]]
 ;
 bb:
   br label %bb1
@@ -56,3 +57,9 @@ attributes #0 = { norecurse noreturn nounwind uwtable "correctly-rounded-divide-
 !2 = !{!"long", !3, i64 0}
 !3 = !{!"omnipotent char", !4, i64 0}
 !4 = !{!"Simple C/C++ TBAA"}
+;.
+; CHECK: [[LONG_TBAA1]] = !{[[META2:![0-9]+]], [[META2]], i64 0}
+; CHECK: [[META2]] = !{!"long", [[META3:![0-9]+]], i64 0}
+; CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0}
+; CHECK: [[META4]] = !{!"Simple C/C++ TBAA"}
+;.
diff --git a/llvm/test/Transforms/JumpThreading/thread-loads.ll b/llvm/test/Transforms/JumpThreading/thread-loads.ll
index 4749de0..cb10168 100644
--- a/llvm/test/Transforms/JumpThreading/thread-loads.ll
+++ b/llvm/test/Transforms/JumpThreading/thread-loads.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals smart
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt < %s -passes=jump-threading -S | FileCheck %s
 ; RUN: opt < %s -aa-pipeline=basic-aa -passes=jump-threading -S | FileCheck %s
 
@@ -8,23 +8,24 @@ target triple = "i386-apple-darwin7"
 ; Test that we can thread through the block with the partially redundant load (%2).
 ; rdar://6402033
 define i32 @test1(ptr %P) nounwind {
-; CHECK-LABEL: @test1(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 (...) @f1() #[[ATTR0:[0-9]+]]
+; CHECK-LABEL: define i32 @test1(
+; CHECK-SAME: ptr [[P:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 (...) @f1() #[[ATTR0]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0
-; CHECK-NEXT:    br i1 [[TMP1]], label [[BB1:%.*]], label [[BB1_THREAD:%.*]]
-; CHECK:       bb1.thread:
-; CHECK-NEXT:    store i32 42, ptr [[P:%.*]], align 4
-; CHECK-NEXT:    br label [[BB3:%.*]]
-; CHECK:       bb1:
+; CHECK-NEXT:    br i1 [[TMP1]], label %[[BB1:.*]], label %[[BB1_THREAD:.*]]
+; CHECK:       [[BB1_THREAD]]:
+; CHECK-NEXT:    store i32 42, ptr [[P]], align 4
+; CHECK-NEXT:    br label %[[BB3:.*]]
+; CHECK:       [[BB1]]:
 ; CHECK-NEXT:    [[DOTPR:%.*]] = load i32, ptr [[P]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt i32 [[DOTPR]], 36
-; CHECK-NEXT:    br i1 [[TMP2]], label [[BB3]], label [[BB2:%.*]]
-; CHECK:       bb2:
+; CHECK-NEXT:    br i1 [[TMP2]], label %[[BB3]], label %[[BB2:.*]]
+; CHECK:       [[BB2]]:
 ; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 (...) @f2() #[[ATTR0]]
 ; CHECK-NEXT:    ret i32 0
-; CHECK:       bb3:
-; CHECK-NEXT:    [[RES_02:%.*]] = phi i32 [ 1, [[BB1_THREAD]] ], [ 0, [[BB1]] ]
+; CHECK:       [[BB3]]:
+; CHECK-NEXT:    [[RES_02:%.*]] = phi i32 [ 1, %[[BB1_THREAD]] ], [ 0, %[[BB1]] ]
 ; CHECK-NEXT:    ret i32 [[RES_02]]
 ;
 entry:
@@ -59,23 +60,24 @@ declare i32 @f2(...)
 ; rdar://11039258
 
 define i32 @test2(ptr %P) nounwind {
-; CHECK-LABEL: @test2(
-; CHECK-NEXT:  entry:
+; CHECK-LABEL: define i32 @test2(
+; CHECK-SAME: ptr [[P:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 (...) @f1() #[[ATTR0]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0
-; CHECK-NEXT:    br i1 [[TMP1]], label [[BB1:%.*]], label [[BB1_THREAD:%.*]]
-; CHECK:       bb1.thread:
-; CHECK-NEXT:    store i32 42, ptr [[P:%.*]], align 4, !tbaa [[TBAA0:![0-9]+]]
-; CHECK-NEXT:    br label [[BB3:%.*]]
-; CHECK:       bb1:
-; CHECK-NEXT:    [[DOTPR:%.*]] = load i32, ptr [[P]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    br i1 [[TMP1]], label %[[BB1:.*]], label %[[BB1_THREAD:.*]]
+; CHECK:       [[BB1_THREAD]]:
+; CHECK-NEXT:    store i32 42, ptr [[P]], align 4, !tbaa [[INT_TBAA0:![0-9]+]]
+; CHECK-NEXT:    br label %[[BB3:.*]]
+; CHECK:       [[BB1]]:
+; CHECK-NEXT:    [[DOTPR:%.*]] = load i32, ptr [[P]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt i32 [[DOTPR]], 36
-; CHECK-NEXT:    br i1 [[TMP2]], label [[BB3]], label [[BB2:%.*]]
-; CHECK:       bb2:
+; CHECK-NEXT:    br i1 [[TMP2]], label %[[BB3]], label %[[BB2:.*]]
+; CHECK:       [[BB2]]:
 ; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 (...) @f2() #[[ATTR0]]
 ; CHECK-NEXT:    ret i32 0
-; CHECK:       bb3:
-; CHECK-NEXT:    [[RES_02:%.*]] = phi i32 [ 1, [[BB1_THREAD]] ], [ 0, [[BB1]] ]
+; CHECK:       [[BB3]]:
+; CHECK-NEXT:    [[RES_02:%.*]] = phi i32 [ 1, %[[BB1_THREAD]] ], [ 0, %[[BB1]] ]
 ; CHECK-NEXT:    ret i32 [[RES_02]]
 ;
 entry:
@@ -106,17 +108,18 @@ define i32 @test3(ptr %x, i1 %f) {
 ; as necessary in the predecessors. This is especially tricky because the same
 ; predecessor ends up with two entries in the PHI node and they must share
 ; a single cast.
-; CHECK-LABEL: @test3(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[X:%.*]], align 8
-; CHECK-NEXT:    br i1 [[F:%.*]], label [[IF_END57:%.*]], label [[IF_END57]]
-; CHECK:       if.end57:
-; CHECK-NEXT:    [[TMP3:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[TMP1]], [[ENTRY]] ]
+; CHECK-LABEL: define i32 @test3(
+; CHECK-SAME: ptr [[X:%.*]], i1 [[F:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[X]], align 8
+; CHECK-NEXT:    br i1 [[F]], label %[[IF_END57:.*]], label %[[IF_END57]]
+; CHECK:       [[IF_END57]]:
+; CHECK-NEXT:    [[TMP3:%.*]] = phi ptr [ [[TMP1]], %[[ENTRY]] ], [ [[TMP1]], %[[ENTRY]] ]
 ; CHECK-NEXT:    [[TOBOOL59:%.*]] = icmp eq ptr [[TMP3]], null
-; CHECK-NEXT:    br i1 [[TOBOOL59]], label [[RETURN:%.*]], label [[IF_THEN60:%.*]]
-; CHECK:       if.then60:
+; CHECK-NEXT:    br i1 [[TOBOOL59]], label %[[RETURN:.*]], label %[[IF_THEN60:.*]]
+; CHECK:       [[IF_THEN60]]:
 ; CHECK-NEXT:    ret i32 42
-; CHECK:       return:
+; CHECK:       [[RETURN]]:
 ; CHECK-NEXT:    ret i32 13
 ;
 entry:
@@ -139,23 +142,24 @@ return:
 }
 
 define i32 @test4(ptr %P) {
-; CHECK-LABEL: @test4(
-; CHECK-NEXT:  entry:
+; CHECK-LABEL: define i32 @test4(
+; CHECK-SAME: ptr [[P:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[V0:%.*]] = tail call i32 (...) @f1()
 ; CHECK-NEXT:    [[V1:%.*]] = icmp eq i32 [[V0]], 0
-; CHECK-NEXT:    br i1 [[V1]], label [[BB1:%.*]], label [[BB1_THREAD:%.*]]
-; CHECK:       bb1.thread:
-; CHECK-NEXT:    store atomic i32 42, ptr [[P:%.*]] unordered, align 4
-; CHECK-NEXT:    br label [[BB3:%.*]]
-; CHECK:       bb1:
+; CHECK-NEXT:    br i1 [[V1]], label %[[BB1:.*]], label %[[BB1_THREAD:.*]]
+; CHECK:       [[BB1_THREAD]]:
+; CHECK-NEXT:    store atomic i32 42, ptr [[P]] unordered, align 4
+; CHECK-NEXT:    br label %[[BB3:.*]]
+; CHECK:       [[BB1]]:
 ; CHECK-NEXT:    [[V2_PR:%.*]] = load atomic i32, ptr [[P]] unordered, align 4
 ; CHECK-NEXT:    [[V3:%.*]] = icmp sgt i32 [[V2_PR]], 36
-; CHECK-NEXT:    br i1 [[V3]], label [[BB3]], label [[BB2:%.*]]
-; CHECK:       bb2:
+; CHECK-NEXT:    br i1 [[V3]], label %[[BB3]], label %[[BB2:.*]]
+; CHECK:       [[BB2]]:
 ; CHECK-NEXT:    [[V4:%.*]] = tail call i32 (...) @f2()
 ; CHECK-NEXT:    ret i32 0
-; CHECK:       bb3:
-; CHECK-NEXT:    [[RES_04:%.*]] = phi i32 [ 1, [[BB1_THREAD]] ], [ 0, [[BB1]] ]
+; CHECK:       [[BB3]]:
+; CHECK-NEXT:    [[RES_04:%.*]] = phi i32 [ 1, %[[BB1_THREAD]] ], [ 0, %[[BB1]] ]
 ; CHECK-NEXT:    ret i32 [[RES_04]]
 ;
 entry:
@@ -183,23 +187,24 @@ bb3:
 
 define i32 @test5(ptr %P) {
 ; Negative test
-; CHECK-LABEL: @test5(
-; CHECK-NEXT:  entry:
+; CHECK-LABEL: define i32 @test5(
+; CHECK-SAME: ptr [[P:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
 ; CHECK-NEXT:    [[V0:%.*]] = tail call i32 (...) @f1()
 ; CHECK-NEXT:    [[V1:%.*]] = icmp eq i32 [[V0]], 0
-; CHECK-NEXT:    br i1 [[V1]], label [[BB1:%.*]], label [[BB:%.*]]
-; CHECK:       bb:
-; CHECK-NEXT:    store atomic i32 42, ptr [[P:%.*]] release, align 4
-; CHECK-NEXT:    br label [[BB1]]
-; CHECK:       bb1:
-; CHECK-NEXT:    [[RES_0:%.*]] = phi i32 [ 1, [[BB]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    br i1 [[V1]], label %[[BB1:.*]], label %[[BB:.*]]
+; CHECK:       [[BB]]:
+; CHECK-NEXT:    store atomic i32 42, ptr [[P]] release, align 4
+; CHECK-NEXT:    br label %[[BB1]]
+; CHECK:       [[BB1]]:
+; CHECK-NEXT:    [[RES_0:%.*]] = phi i32 [ 1, %[[BB]] ], [ 0, %[[ENTRY]] ]
 ; CHECK-NEXT:    [[V2:%.*]] = load atomic i32, ptr [[P]] acquire, align 4
 ; CHECK-NEXT:    [[V3:%.*]] = icmp sgt i32 [[V2]], 36
-; CHECK-NEXT:    br i1 [[V3]], label [[BB3:%.*]], label [[BB2:%.*]]
-; CHECK:       bb2:
+; CHECK-NEXT:    br i1 [[V3]], label %[[BB3:.*]], label %[[BB2:.*]]
+; CHECK:       [[BB2]]:
 ; CHECK-NEXT:    [[V4:%.*]] = tail call i32 (...) @f2()
 ; CHECK-NEXT:    ret i32 [[RES_0]]
-; CHECK:       bb3:
+; CHECK:       [[BB3]]:
 ; CHECK-NEXT:    ret i32 [[RES_0]]
 ;
 entry:
@@ -228,23 +233,24 @@ bb3:
 
 define i32 @test6(ptr %P) {
 ; Negative test
-; CHECK-LABEL: @test6(
-; CHECK-NEXT:  entry:
+; CHECK-LABEL: define i32 @test6(
+; CHECK-SAME: ptr [[P:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
 ; CHECK-NEXT:    [[V0:%.*]] = tail call i32 (...) @f1()
 ; CHECK-NEXT:    [[V1:%.*]] = icmp eq i32 [[V0]], 0
-; CHECK-NEXT:    br i1 [[V1]], label [[BB1:%.*]], label [[BB:%.*]]
-; CHECK:       bb:
-; CHECK-NEXT:    store i32 42, ptr [[P:%.*]], align 4
-; CHECK-NEXT:    br label [[BB1]]
-; CHECK:       bb1:
-; CHECK-NEXT:    [[RES_0:%.*]] = phi i32 [ 1, [[BB]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    br i1 [[V1]], label %[[BB1:.*]], label %[[BB:.*]]
+; CHECK:       [[BB]]:
+; CHECK-NEXT:    store i32 42, ptr [[P]], align 4
+; CHECK-NEXT:    br label %[[BB1]]
+; CHECK:       [[BB1]]:
+; CHECK-NEXT:    [[RES_0:%.*]] = phi i32 [ 1, %[[BB]] ], [ 0, %[[ENTRY]] ]
 ; CHECK-NEXT:    [[V2:%.*]] = load atomic i32, ptr [[P]] acquire, align 4
 ; CHECK-NEXT:    [[V3:%.*]] = icmp sgt i32 [[V2]], 36
-; CHECK-NEXT:    br i1 [[V3]], label [[BB3:%.*]], label [[BB2:%.*]]
-; CHECK:       bb2:
+; CHECK-NEXT:    br i1 [[V3]], label %[[BB3:.*]], label %[[BB2:.*]]
+; CHECK:       [[BB2]]:
 ; CHECK-NEXT:    [[V4:%.*]] = tail call i32 (...) @f2()
 ; CHECK-NEXT:    ret i32 [[RES_0]]
-; CHECK:       bb3:
+; CHECK:       [[BB3]]:
 ; CHECK-NEXT:    ret i32 [[RES_0]]
 ;
 entry:
@@ -273,23 +279,24 @@ bb3:
 
 define i32 @test7(ptr %P) {
 ; Negative test
-; CHECK-LABEL: @test7(
-; CHECK-NEXT:  entry:
+; CHECK-LABEL: define i32 @test7(
+; CHECK-SAME: ptr [[P:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
 ; CHECK-NEXT:    [[V0:%.*]] = tail call i32 (...) @f1()
 ; CHECK-NEXT:    [[V1:%.*]] = icmp eq i32 [[V0]], 0
-; CHECK-NEXT:    br i1 [[V1]], label [[BB1:%.*]], label [[BB:%.*]]
-; CHECK:       bb:
-; CHECK-NEXT:    [[VAL:%.*]] = load i32, ptr [[P:%.*]], align 4
-; CHECK-NEXT:    br label [[BB1]]
-; CHECK:       bb1:
-; CHECK-NEXT:    [[RES_0:%.*]] = phi i32 [ 1, [[BB]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    br i1 [[V1]], label %[[BB1:.*]], label %[[BB:.*]]
+; CHECK:       [[BB]]:
+; CHECK-NEXT:    [[VAL:%.*]] = load i32, ptr [[P]], align 4
+; CHECK-NEXT:    br label %[[BB1]]
+; CHECK:       [[BB1]]:
+; CHECK-NEXT:    [[RES_0:%.*]] = phi i32 [ 1, %[[BB]] ], [ 0, %[[ENTRY]] ]
 ; CHECK-NEXT:    [[V2:%.*]] = load atomic i32, ptr [[P]] acquire, align 4
 ; CHECK-NEXT:    [[V3:%.*]] = icmp sgt i32 [[V2]], 36
-; CHECK-NEXT:    br i1 [[V3]], label [[BB3:%.*]], label [[BB2:%.*]]
-; CHECK:       bb2:
+; CHECK-NEXT:    br i1 [[V3]], label %[[BB3:.*]], label %[[BB2:.*]]
+; CHECK:       [[BB2]]:
 ; CHECK-NEXT:    [[V4:%.*]] = tail call i32 (...) @f2()
 ; CHECK-NEXT:    ret i32 [[RES_0]]
-; CHECK:       bb3:
+; CHECK:       [[BB3]]:
 ; CHECK-NEXT:    ret i32 [[RES_0]]
 ;
 entry:
@@ -319,10 +326,11 @@ bb3:
 ; We keep the tbaa and range metadata for the first load, as it dominates the
 ; second load. Hence we can eliminate the branch.
 define void @test8(ptr, ptr, ptr) {
-; CHECK-LABEL: @test8(
-; CHECK-NEXT:  ret2:
-; CHECK-NEXT:    [[A:%.*]] = load i32, ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]], !range [[RNG4:![0-9]+]], !alias.scope [[META5:![0-9]+]], !noalias [[META8:![0-9]+]], !noundef [[META10:![0-9]+]]
-; CHECK-NEXT:    store i32 [[A]], ptr [[TMP1:%.*]], align 4
+; CHECK-LABEL: define void @test8(
+; CHECK-SAME: ptr [[TMP0:%.*]], ptr [[TMP1:%.*]], ptr [[TMP2:%.*]]) {
+; CHECK-NEXT:  [[RET2:.*:]]
+; CHECK-NEXT:    [[A:%.*]] = load i32, ptr [[TMP0]], align 4, !tbaa [[INT_TBAA0]], !range [[RNG4:![0-9]+]], !alias.scope [[META5:![0-9]+]], !noalias [[META8:![0-9]+]], !noundef [[META10:![0-9]+]]
+; CHECK-NEXT:    store i32 [[A]], ptr [[TMP1]], align 4
 ; CHECK-NEXT:    [[XXX:%.*]] = tail call i32 (...) @f1() #[[ATTR0]]
 ; CHECK-NEXT:    ret void
 ;
@@ -344,24 +352,25 @@ ret2:
 ; we need to remove metadata from the existing load, and add appropriate
 ; metadata to the newly inserted load.
 define void @test9(ptr, ptr, ptr, i1 %c) {
-; CHECK-LABEL: @test9(
-; CHECK-NEXT:    br i1 [[C:%.*]], label [[D1:%.*]], label [[D2:%.*]]
-; CHECK:       d1:
-; CHECK-NEXT:    [[A:%.*]] = load i32, ptr [[TMP0:%.*]], align 4
-; CHECK-NEXT:    br label [[D3:%.*]]
-; CHECK:       d2:
+; CHECK-LABEL: define void @test9(
+; CHECK-SAME: ptr [[TMP0:%.*]], ptr [[TMP1:%.*]], ptr [[TMP2:%.*]], i1 [[C:%.*]]) {
+; CHECK-NEXT:    br i1 [[C]], label %[[D1:.*]], label %[[D2:.*]]
+; CHECK:       [[D1]]:
+; CHECK-NEXT:    [[A:%.*]] = load i32, ptr [[TMP0]], align 4
+; CHECK-NEXT:    br label %[[D3:.*]]
+; CHECK:       [[D2]]:
 ; CHECK-NEXT:    [[XXXX:%.*]] = tail call i32 (...) @f1() #[[ATTR0]]
-; CHECK-NEXT:    [[B_PR:%.*]] = load i32, ptr [[TMP0]], align 4, !tbaa [[TBAA0]]
-; CHECK-NEXT:    br label [[D3]]
-; CHECK:       d3:
-; CHECK-NEXT:    [[B:%.*]] = phi i32 [ [[B_PR]], [[D2]] ], [ [[A]], [[D1]] ]
-; CHECK-NEXT:    [[P:%.*]] = phi i32 [ 1, [[D2]] ], [ [[A]], [[D1]] ]
-; CHECK-NEXT:    store i32 [[P]], ptr [[TMP1:%.*]], align 4
+; CHECK-NEXT:    [[B_PR:%.*]] = load i32, ptr [[TMP0]], align 4, !tbaa [[INT_TBAA0]]
+; CHECK-NEXT:    br label %[[D3]]
+; CHECK:       [[D3]]:
+; CHECK-NEXT:    [[B:%.*]] = phi i32 [ [[B_PR]], %[[D2]] ], [ [[A]], %[[D1]] ]
+; CHECK-NEXT:    [[P:%.*]] = phi i32 [ 1, %[[D2]] ], [ [[A]], %[[D1]] ]
+; CHECK-NEXT:    store i32 [[P]], ptr [[TMP1]], align 4
 ; CHECK-NEXT:    [[C2:%.*]] = icmp eq i32 [[B]], 8
-; CHECK-NEXT:    br i1 [[C2]], label [[RET1:%.*]], label [[RET2:%.*]]
-; CHECK:       ret1:
+; CHECK-NEXT:    br i1 [[C2]], label %[[RET1:.*]], label %[[RET2:.*]]
+; CHECK:       [[RET1]]:
 ; CHECK-NEXT:    ret void
-; CHECK:       ret2:
+; CHECK:       [[RET2]]:
 ; CHECK-NEXT:    [[XXX:%.*]] = tail call i32 (...) @f1() #[[ATTR0]]
 ; CHECK-NEXT:    ret void
 ;
@@ -391,27 +400,28 @@ ret2:
 }
 
 define i32 @fn_noalias(i1 %c2,ptr noalias %P, ptr noalias %P2) {
-; CHECK-LABEL: @fn_noalias(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br i1 [[C2:%.*]], label [[COND2:%.*]], label [[COND1:%.*]]
-; CHECK:       cond1:
-; CHECK-NEXT:    [[L1:%.*]] = load i64, ptr [[P:%.*]], align 4
-; CHECK-NEXT:    store i64 42, ptr [[P2:%.*]], align 4
+; CHECK-LABEL: define i32 @fn_noalias(
+; CHECK-SAME: i1 [[C2:%.*]], ptr noalias [[P:%.*]], ptr noalias [[P2:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 [[C2]], label %[[COND2:.*]], label %[[COND1:.*]]
+; CHECK:       [[COND1]]:
+; CHECK-NEXT:    [[L1:%.*]] = load i64, ptr [[P]], align 4
+; CHECK-NEXT:    store i64 42, ptr [[P2]], align 4
 ; CHECK-NEXT:    [[C:%.*]] = icmp eq i64 [[L1]], 0
-; CHECK-NEXT:    br i1 [[C]], label [[COND2_THREAD:%.*]], label [[END:%.*]]
-; CHECK:       cond2.thread:
+; CHECK-NEXT:    br i1 [[C]], label %[[COND2_THREAD:.*]], label %[[END:.*]]
+; CHECK:       [[COND2_THREAD]]:
 ; CHECK-NEXT:    call void @fn2(i64 [[L1]])
-; CHECK-NEXT:    br label [[COND3:%.*]]
-; CHECK:       cond2:
+; CHECK-NEXT:    br label %[[COND3:.*]]
+; CHECK:       [[COND2]]:
 ; CHECK-NEXT:    [[L2_PR:%.*]] = load i64, ptr [[P]], align 4
 ; CHECK-NEXT:    call void @fn2(i64 [[L2_PR]])
 ; CHECK-NEXT:    [[C3:%.*]] = icmp eq i64 [[L2_PR]], 0
-; CHECK-NEXT:    br i1 [[C3]], label [[COND3]], label [[END]]
-; CHECK:       cond3:
-; CHECK-NEXT:    [[L23:%.*]] = phi i64 [ [[L1]], [[COND2_THREAD]] ], [ [[L2_PR]], [[COND2]] ]
+; CHECK-NEXT:    br i1 [[C3]], label %[[COND3]], label %[[END]]
+; CHECK:       [[COND3]]:
+; CHECK-NEXT:    [[L23:%.*]] = phi i64 [ [[L1]], %[[COND2_THREAD]] ], [ [[L2_PR]], %[[COND2]] ]
 ; CHECK-NEXT:    call void @fn3(i64 [[L23]])
-; CHECK-NEXT:    br label [[END]]
-; CHECK:       end:
+; CHECK-NEXT:    br label %[[END]]
+; CHECK:       [[END]]:
 ; CHECK-NEXT:    ret i32 0
 ;
 entry:
@@ -447,43 +457,44 @@ end:
 @last = internal unnamed_addr global [65 x ptr] zeroinitializer, align 8
 @next_status = internal unnamed_addr global [65 x %struct.NEXT_MOVE] zeroinitializer, align 8
 define fastcc i32 @Search(i64 %idxprom.i, i64 %idxprom.i89, i32 %c) {
-; CHECK-LABEL: @Search(
-; CHECK-NEXT:  cond.true282:
-; CHECK-NEXT:    [[ARRAYIDX185:%.*]] = getelementptr inbounds [65 x i32], ptr @hash_move, i64 0, i64 [[IDXPROM_I:%.*]]
+; CHECK-LABEL: define fastcc i32 @Search(
+; CHECK-SAME: i64 [[IDXPROM_I:%.*]], i64 [[IDXPROM_I89:%.*]], i32 [[C:%.*]]) {
+; CHECK-NEXT:  [[COND_TRUE282:.*:]]
+; CHECK-NEXT:    [[ARRAYIDX185:%.*]] = getelementptr inbounds [65 x i32], ptr @hash_move, i64 0, i64 [[IDXPROM_I]]
 ; CHECK-NEXT:    [[ARRAYIDX307:%.*]] = getelementptr inbounds [65 x i32], ptr @current_move, i64 0, i64 [[IDXPROM_I]]
 ; CHECK-NEXT:    [[ARRAYIDX89:%.*]] = getelementptr inbounds [65 x ptr], ptr @last, i64 0, i64 [[IDXPROM_I]]
 ; CHECK-NEXT:    [[PHASE:%.*]] = getelementptr inbounds [65 x %struct.NEXT_MOVE], ptr @next_status, i64 0, i64 [[IDXPROM_I]], i32 0
-; CHECK-NEXT:    switch i32 [[C:%.*]], label [[CLEANUP:%.*]] [
-; CHECK-NEXT:      i32 1, label [[SW_BB_I:%.*]]
-; CHECK-NEXT:      i32 0, label [[SW_BB21_I:%.*]]
+; CHECK-NEXT:    switch i32 [[C]], label %[[CLEANUP:.*]] [
+; CHECK-NEXT:      i32 1, label %[[SW_BB_I:.*]]
+; CHECK-NEXT:      i32 0, label %[[SW_BB21_I:.*]]
 ; CHECK-NEXT:    ]
-; CHECK:       sw.bb.i:
+; CHECK:       [[SW_BB_I]]:
 ; CHECK-NEXT:    [[CALL_I62:%.*]] = call fastcc ptr @GenerateCheckEvasions()
 ; CHECK-NEXT:    store ptr [[CALL_I62]], ptr [[ARRAYIDX89]], align 8
 ; CHECK-NEXT:    [[L2:%.*]] = load i32, ptr [[ARRAYIDX185]], align 4
 ; CHECK-NEXT:    [[TOBOOL_I63:%.*]] = icmp eq i32 [[L2]], 0
-; CHECK-NEXT:    br i1 [[TOBOOL_I63]], label [[SW_BB21_I_THREAD:%.*]], label [[IF_THEN_I64:%.*]]
-; CHECK:       sw.bb21.i.thread:
+; CHECK-NEXT:    br i1 [[TOBOOL_I63]], label %[[SW_BB21_I_THREAD:.*]], label %[[IF_THEN_I64:.*]]
+; CHECK:       [[SW_BB21_I_THREAD]]:
 ; CHECK-NEXT:    store i32 10, ptr [[PHASE]], align 8
-; CHECK-NEXT:    br label [[DO_BODY_PREHEADER_I67:%.*]]
-; CHECK:       if.then.i64:
+; CHECK-NEXT:    br label %[[DO_BODY_PREHEADER_I67:.*]]
+; CHECK:       [[IF_THEN_I64]]:
 ; CHECK-NEXT:    store i32 7, ptr [[PHASE]], align 8
 ; CHECK-NEXT:    store i32 [[L2]], ptr [[ARRAYIDX307]], align 4
 ; CHECK-NEXT:    [[CALL16_I:%.*]] = call fastcc i32 @ValidMove(i32 [[L2]])
 ; CHECK-NEXT:    [[TOBOOL17_I:%.*]] = icmp eq i32 [[CALL16_I]], 0
-; CHECK-NEXT:    br i1 [[TOBOOL17_I]], label [[IF_ELSE_I65:%.*]], label [[CLEANUP]]
-; CHECK:       if.else.i65:
+; CHECK-NEXT:    br i1 [[TOBOOL17_I]], label %[[IF_ELSE_I65:.*]], label %[[CLEANUP]]
+; CHECK:       [[IF_ELSE_I65]]:
 ; CHECK-NEXT:    call void @f65()
-; CHECK-NEXT:    br label [[SW_BB21_I]]
-; CHECK:       sw.bb21.i:
+; CHECK-NEXT:    br label %[[SW_BB21_I]]
+; CHECK:       [[SW_BB21_I]]:
 ; CHECK-NEXT:    [[L3_PR:%.*]] = load i32, ptr [[ARRAYIDX185]], align 4
 ; CHECK-NEXT:    store i32 10, ptr [[PHASE]], align 8
 ; CHECK-NEXT:    [[TOBOOL27_I:%.*]] = icmp eq i32 [[L3_PR]], 0
-; CHECK-NEXT:    br i1 [[TOBOOL27_I]], label [[DO_BODY_PREHEADER_I67]], label [[CLEANUP]]
-; CHECK:       do.body.preheader.i67:
+; CHECK-NEXT:    br i1 [[TOBOOL27_I]], label %[[DO_BODY_PREHEADER_I67]], label %[[CLEANUP]]
+; CHECK:       [[DO_BODY_PREHEADER_I67]]:
 ; CHECK-NEXT:    call void @f67()
 ; CHECK-NEXT:    ret i32 67
-; CHECK:       cleanup:
+; CHECK:       [[CLEANUP]]:
 ; CHECK-NEXT:    call void @Cleanup()
 ; CHECK-NEXT:    ret i32 0
 ;
@@ -543,22 +554,23 @@ declare void @Cleanup()
 declare void @f65()
 
 define i32 @fn_SinglePred(i1 %c2,ptr %P) {
-; CHECK-LABEL: @fn_SinglePred(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[L1:%.*]] = load i64, ptr [[P:%.*]], align 4
+; CHECK-LABEL: define i32 @fn_SinglePred(
+; CHECK-SAME: i1 [[C2:%.*]], ptr [[P:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[L1:%.*]] = load i64, ptr [[P]], align 4
 ; CHECK-NEXT:    [[C:%.*]] = icmp eq i64 [[L1]], 0
-; CHECK-NEXT:    br i1 [[C]], label [[COND3:%.*]], label [[COND1:%.*]]
-; CHECK:       cond1:
-; CHECK-NEXT:    br i1 [[C2:%.*]], label [[COND2:%.*]], label [[END:%.*]]
-; CHECK:       cond2:
-; CHECK-NEXT:    [[L2:%.*]] = phi i64 [ [[L1]], [[COND1]] ]
+; CHECK-NEXT:    br i1 [[C]], label %[[COND3:.*]], label %[[COND1:.*]]
+; CHECK:       [[COND1]]:
+; CHECK-NEXT:    br i1 [[C2]], label %[[COND2:.*]], label %[[END:.*]]
+; CHECK:       [[COND2]]:
+; CHECK-NEXT:    [[L2:%.*]] = phi i64 [ [[L1]], %[[COND1]] ]
 ; CHECK-NEXT:    call void @fn2(i64 [[L2]])
-; CHECK-NEXT:    br label [[END]]
-; CHECK:       cond3:
+; CHECK-NEXT:    br label %[[END]]
+; CHECK:       [[COND3]]:
 ; CHECK-NEXT:    call void @fn2(i64 [[L1]])
 ; CHECK-NEXT:    call void @fn3(i64 [[L1]])
-; CHECK-NEXT:    br label [[END]]
-; CHECK:       end:
+; CHECK-NEXT:    br label %[[END]]
+; CHECK:       [[END]]:
 ; CHECK-NEXT:    ret i32 0
 ;
 
@@ -585,24 +597,25 @@ end:
 }
 
 define i32 @fn_SinglePredMultihop(i1 %c1, i1 %c2,ptr %P) {
-; CHECK-LABEL: @fn_SinglePredMultihop(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[L1:%.*]] = load i64, ptr [[P:%.*]], align 4
+; CHECK-LABEL: define i32 @fn_SinglePredMultihop(
+; CHECK-SAME: i1 [[C1:%.*]], i1 [[C2:%.*]], ptr [[P:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[L1:%.*]] = load i64, ptr [[P]], align 4
 ; CHECK-NEXT:    [[C0:%.*]] = icmp eq i64 [[L1]], 0
-; CHECK-NEXT:    br i1 [[C0]], label [[COND3:%.*]], label [[COND0:%.*]]
-; CHECK:       cond0:
-; CHECK-NEXT:    br i1 [[C1:%.*]], label [[COND1:%.*]], label [[END:%.*]]
-; CHECK:       cond1:
-; CHECK-NEXT:    br i1 [[C2:%.*]], label [[COND2:%.*]], label [[END]]
-; CHECK:       cond2:
-; CHECK-NEXT:    [[L2:%.*]] = phi i64 [ [[L1]], [[COND1]] ]
+; CHECK-NEXT:    br i1 [[C0]], label %[[COND3:.*]], label %[[COND0:.*]]
+; CHECK:       [[COND0]]:
+; CHECK-NEXT:    br i1 [[C1]], label %[[COND1:.*]], label %[[END:.*]]
+; CHECK:       [[COND1]]:
+; CHECK-NEXT:    br i1 [[C2]], label %[[COND2:.*]], label %[[END]]
+; CHECK:       [[COND2]]:
+; CHECK-NEXT:    [[L2:%.*]] = phi i64 [ [[L1]], %[[COND1]] ]
 ; CHECK-NEXT:    call void @fn2(i64 [[L2]])
-; CHECK-NEXT:    br label [[END]]
-; CHECK:       cond3:
+; CHECK-NEXT:    br label %[[END]]
+; CHECK:       [[COND3]]:
 ; CHECK-NEXT:    call void @fn2(i64 [[L1]])
 ; CHECK-NEXT:    call void @fn3(i64 [[L1]])
-; CHECK-NEXT:    br label [[END]]
-; CHECK:       end:
+; CHECK-NEXT:    br label %[[END]]
+; CHECK:       [[END]]:
 ; CHECK-NEXT:    ret i32 0
 ;
 
@@ -640,19 +653,20 @@ declare void @fn3(i64)
 ; store.
 ;
 define i32 @phi_translate_partial_redundant_loads(i32, ptr, ptr) {
-; CHECK-LABEL: @phi_translate_partial_redundant_loads(
-; CHECK-NEXT:    [[CMP0:%.*]] = icmp ne i32 [[TMP0:%.*]], 0
-; CHECK-NEXT:    br i1 [[CMP0]], label [[MERGE_THREAD:%.*]], label [[MERGE:%.*]]
-; CHECK:       merge.thread:
-; CHECK-NEXT:    store i32 1, ptr [[TMP1:%.*]], align 4
-; CHECK-NEXT:    br label [[LEFT_X:%.*]]
-; CHECK:       merge:
-; CHECK-NEXT:    [[NEWLOAD_PR:%.*]] = load i32, ptr [[TMP2:%.*]], align 4
+; CHECK-LABEL: define i32 @phi_translate_partial_redundant_loads(
+; CHECK-SAME: i32 [[TMP0:%.*]], ptr [[TMP1:%.*]], ptr [[TMP2:%.*]]) {
+; CHECK-NEXT:    [[CMP0:%.*]] = icmp ne i32 [[TMP0]], 0
+; CHECK-NEXT:    br i1 [[CMP0]], label %[[MERGE_THREAD:.*]], label %[[MERGE:.*]]
+; CHECK:       [[MERGE_THREAD]]:
+; CHECK-NEXT:    store i32 1, ptr [[TMP1]], align 4
+; CHECK-NEXT:    br label %[[LEFT_X:.*]]
+; CHECK:       [[MERGE]]:
+; CHECK-NEXT:    [[NEWLOAD_PR:%.*]] = load i32, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[NEWLOAD_PR]], 5
-; CHECK-NEXT:    br i1 [[CMP1]], label [[LEFT_X]], label [[RIGHT_X:%.*]]
-; CHECK:       left_x:
+; CHECK-NEXT:    br i1 [[CMP1]], label %[[LEFT_X]], label %[[RIGHT_X:.*]]
+; CHECK:       [[LEFT_X]]:
 ; CHECK-NEXT:    ret i32 20
-; CHECK:       right_x:
+; CHECK:       [[RIGHT_X]]:
 ; CHECK-NEXT:    ret i32 10
 ;
   %cmp0 = icmp ne i32 %0, 0
@@ -693,7 +707,7 @@ right_x:
 !10 = !{!8}
 !11 = !{}
 ;.
-; CHECK: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
+; CHECK: [[INT_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
 ; CHECK: [[META1]] = !{!"int", [[META2:![0-9]+]]}
 ; CHECK: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]]}
 ; CHECK: [[META3]] = !{!"Simple C/C++ TBAA"}
diff --git a/llvm/test/Transforms/LICM/2011-04-06-PromoteResultOfPromotion.ll b/llvm/test/Transforms/LICM/2011-04-06-PromoteResultOfPromotion.ll
index 3302718..0d32e50 100644
--- a/llvm/test/Transforms/LICM/2011-04-06-PromoteResultOfPromotion.ll
+++ b/llvm/test/Transforms/LICM/2011-04-06-PromoteResultOfPromotion.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt < %s -passes=licm -S | FileCheck %s
 ; PR9634
 
@@ -7,21 +7,21 @@
 
 define void @f() {
 ; CHECK-LABEL: define void @f() {
-; CHECK-NEXT:  entry:
+; CHECK-NEXT:  [[ENTRY:.*]]:
 ; CHECK-NEXT:    [[L_87_I:%.*]] = alloca [9 x i16], align 16
-; CHECK-NEXT:    [[G_58_PROMOTED:%.*]] = load i32, ptr @g_58, align 4, !tbaa [[TBAA0:![0-9]+]]
-; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
-; CHECK:       for.body:
-; CHECK-NEXT:    [[TMP31:%.*]] = phi i32 [ [[G_58_PROMOTED]], [[ENTRY:%.*]] ], [ [[OR:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[INC12:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[G_58_PROMOTED:%.*]] = load i32, ptr @g_58, align 4, !tbaa [[INT_TBAA0:![0-9]+]]
+; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
+; CHECK:       [[FOR_BODY]]:
+; CHECK-NEXT:    [[TMP31:%.*]] = phi i32 [ [[G_58_PROMOTED]], %[[ENTRY]] ], [ [[OR:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT:    [[INC12:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
 ; CHECK-NEXT:    [[OR]] = or i32 [[TMP31]], 10
 ; CHECK-NEXT:    [[INC]] = add nsw i32 [[INC12]], 1
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[INC]], 4
-; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
-; CHECK:       for.end:
-; CHECK-NEXT:    [[OR_LCSSA:%.*]] = phi i32 [ [[OR]], [[FOR_BODY]] ]
-; CHECK-NEXT:    store ptr @g_58, ptr @g_116, align 8, !tbaa [[TBAA4:![0-9]+]]
-; CHECK-NEXT:    store i32 [[OR_LCSSA]], ptr @g_58, align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_END:.*]]
+; CHECK:       [[FOR_END]]:
+; CHECK-NEXT:    [[OR_LCSSA:%.*]] = phi i32 [ [[OR]], %[[FOR_BODY]] ]
+; CHECK-NEXT:    store ptr @g_58, ptr @g_116, align 8, !tbaa [[ANYPTR_TBAA4:![0-9]+]]
+; CHECK-NEXT:    store i32 [[OR_LCSSA]], ptr @g_58, align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    ret void
 ;
 
@@ -51,3 +51,11 @@ for.end:                                          ; preds = %for.inc
 !4 = !{!6, !6, i64 0}
 !5 = !{!"any pointer", !1}
 !6 = !{!"int", !1}
+;.
+; CHECK: [[INT_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
+; CHECK: [[META1]] = !{!"int", [[META2:![0-9]+]]}
+; CHECK: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]]}
+; CHECK: [[META3]] = !{!"Simple C/C++ TBAA"}
+; CHECK: [[ANYPTR_TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0}
+; CHECK: [[META5]] = !{!"any pointer", [[META2]]}
+;.
diff --git a/llvm/test/Transforms/LICM/pr50367.ll b/llvm/test/Transforms/LICM/pr50367.ll
index 7fd176b..6aafff7 100644
--- a/llvm/test/Transforms/LICM/pr50367.ll
+++ b/llvm/test/Transforms/LICM/pr50367.ll
@@ -1,23 +1,24 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt -S -passes='loop-mssa(licm)' < %s | FileCheck %s
 @e = external dso_local global ptr, align 8
 
 define void @main(i1 %arg, ptr %arg1) {
-; CHECK-LABEL: @main(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br label [[LOOP1:%.*]]
-; CHECK:       loop1:
-; CHECK-NEXT:    br label [[LOOP2:%.*]]
-; CHECK:       loop2:
-; CHECK-NEXT:    br i1 [[ARG:%.*]], label [[LOOP2_LATCH:%.*]], label [[LOOP_LATCH:%.*]]
-; CHECK:       loop2.latch:
-; CHECK-NEXT:    store i32 0, ptr [[ARG1:%.*]], align 4
-; CHECK-NEXT:    br label [[LOOP2]]
-; CHECK:       loop.latch:
-; CHECK-NEXT:    store ptr null, ptr @e, align 8, !tbaa [[TBAA0:![0-9]+]]
-; CHECK-NEXT:    [[PTR:%.*]] = load ptr, ptr @e, align 8, !tbaa [[TBAA0]]
-; CHECK-NEXT:    store i32 0, ptr [[PTR]], align 4, !tbaa [[TBAA4:![0-9]+]]
-; CHECK-NEXT:    br label [[LOOP1]]
+; CHECK-LABEL: define void @main(
+; CHECK-SAME: i1 [[ARG:%.*]], ptr [[ARG1:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br label %[[LOOP1:.*]]
+; CHECK:       [[LOOP1]]:
+; CHECK-NEXT:    br label %[[LOOP2:.*]]
+; CHECK:       [[LOOP2]]:
+; CHECK-NEXT:    br i1 [[ARG]], label %[[LOOP2_LATCH:.*]], label %[[LOOP_LATCH:.*]]
+; CHECK:       [[LOOP2_LATCH]]:
+; CHECK-NEXT:    store i32 0, ptr [[ARG1]], align 4
+; CHECK-NEXT:    br label %[[LOOP2]]
+; CHECK:       [[LOOP_LATCH]]:
+; CHECK-NEXT:    store ptr null, ptr @e, align 8, !tbaa [[ANYPTR_TBAA0:![0-9]+]]
+; CHECK-NEXT:    [[PTR:%.*]] = load ptr, ptr @e, align 8, !tbaa [[ANYPTR_TBAA0]]
+; CHECK-NEXT:    store i32 0, ptr [[PTR]], align 4, !tbaa [[INT_TBAA4:![0-9]+]]
+; CHECK-NEXT:    br label %[[LOOP1]]
 ;
 entry:
   br label %loop1
@@ -40,19 +41,20 @@ loop.latch:
 }
 
 define void @store_null(i1 %arg) {
-; CHECK-LABEL: @store_null(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br label [[LOOP1:%.*]]
-; CHECK:       loop1:
-; CHECK-NEXT:    br label [[LOOP2:%.*]]
-; CHECK:       loop2:
-; CHECK-NEXT:    br i1 [[ARG:%.*]], label [[LOOP2_LATCH:%.*]], label [[LOOP_LATCH:%.*]]
-; CHECK:       loop2.latch:
+; CHECK-LABEL: define void @store_null(
+; CHECK-SAME: i1 [[ARG:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br label %[[LOOP1:.*]]
+; CHECK:       [[LOOP1]]:
+; CHECK-NEXT:    br label %[[LOOP2:.*]]
+; CHECK:       [[LOOP2]]:
+; CHECK-NEXT:    br i1 [[ARG]], label %[[LOOP2_LATCH:.*]], label %[[LOOP_LATCH:.*]]
+; CHECK:       [[LOOP2_LATCH]]:
 ; CHECK-NEXT:    store i32 0, ptr null, align 4
-; CHECK-NEXT:    br label [[LOOP2]]
-; CHECK:       loop.latch:
-; CHECK-NEXT:    store i32 0, ptr null, align 4, !tbaa [[TBAA4]]
-; CHECK-NEXT:    br label [[LOOP1]]
+; CHECK-NEXT:    br label %[[LOOP2]]
+; CHECK:       [[LOOP_LATCH]]:
+; CHECK-NEXT:    store i32 0, ptr null, align 4, !tbaa [[INT_TBAA4]]
+; CHECK-NEXT:    br label %[[LOOP1]]
 ;
 entry:
   br label %loop1
@@ -80,3 +82,11 @@ loop.latch:
 !3 = !{!"Simple C/C++ TBAA"}
 !4 = !{!5, !5, i64 0}
 !5 = !{!"int", !2, i64 0}
+;.
+; CHECK: [[ANYPTR_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
+; CHECK: [[META1]] = !{!"any pointer", [[META2:![0-9]+]], i64 0}
+; CHECK: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0}
+; CHECK: [[META3]] = !{!"Simple C/C++ TBAA"}
+; CHECK: [[INT_TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0}
+; CHECK: [[META5]] = !{!"int", [[META2]], i64 0}
+;.
diff --git a/llvm/test/Transforms/LICM/scalar-promote.ll b/llvm/test/Transforms/LICM/scalar-promote.ll
index bd3960e..3af65df 100644
--- a/llvm/test/Transforms/LICM/scalar-promote.ll
+++ b/llvm/test/Transforms/LICM/scalar-promote.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --version 6
 ; RUN: opt < %s -passes=licm -S | FileCheck %s
 ; RUN: opt -aa-pipeline=tbaa,basic-aa -passes='require<aa>,require<target-ir>,require<scalar-evolution>,require<opt-remark-emit>,loop-mssa(licm)' -S %s | FileCheck %s
 target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
@@ -6,19 +6,20 @@ target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:1
 @X = global i32 7   ; <ptr> [#uses=4]
 
 define void @test1(i32 %i) {
-; CHECK-LABEL: @test1(
-; CHECK-NEXT:  Entry:
+; CHECK-LABEL: define void @test1(
+; CHECK-SAME: i32 [[I:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
 ; CHECK-NEXT:    [[X_PROMOTED:%.*]] = load i32, ptr @X, align 4
-; CHECK-NEXT:    br label [[LOOP:%.*]]
-; CHECK:       Loop:
-; CHECK-NEXT:    [[X21:%.*]] = phi i32 [ [[X_PROMOTED]], [[ENTRY:%.*]] ], [ [[X2:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[J:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[X21:%.*]] = phi i32 [ [[X_PROMOTED]], %[[ENTRY]] ], [ [[X2:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[J:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[NEXT:%.*]], %[[LOOP]] ]
 ; CHECK-NEXT:    [[X2]] = add i32 [[X21]], 1
 ; CHECK-NEXT:    [[NEXT]] = add i32 [[J]], 1
 ; CHECK-NEXT:    [[COND:%.*]] = icmp eq i32 [[NEXT]], 0
-; CHECK-NEXT:    br i1 [[COND]], label [[OUT:%.*]], label [[LOOP]]
-; CHECK:       Out:
-; CHECK-NEXT:    [[X2_LCSSA:%.*]] = phi i32 [ [[X2]], [[LOOP]] ]
+; CHECK-NEXT:    br i1 [[COND]], label %[[OUT:.*]], label %[[LOOP]]
+; CHECK:       [[OUT]]:
+; CHECK-NEXT:    [[X2_LCSSA:%.*]] = phi i32 [ [[X2]], %[[LOOP]] ]
 ; CHECK-NEXT:    store i32 [[X2_LCSSA]], ptr @X, align 4
 ; CHECK-NEXT:    ret void
 ;
@@ -39,18 +40,19 @@ Out:
 }
 
 define void @test2(i32 %i) {
-; CHECK-LABEL: @test2(
-; CHECK-NEXT:  Entry:
+; CHECK-LABEL: define void @test2(
+; CHECK-SAME: i32 [[I:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
 ; CHECK-NEXT:    [[X1:%.*]] = getelementptr i32, ptr @X, i64 1
 ; CHECK-NEXT:    [[X2:%.*]] = getelementptr i32, ptr @X, i64 1
 ; CHECK-NEXT:    [[X1_PROMOTED:%.*]] = load i32, ptr [[X1]], align 4
-; CHECK-NEXT:    br label [[LOOP:%.*]]
-; CHECK:       Loop:
-; CHECK-NEXT:    [[A1:%.*]] = phi i32 [ [[V:%.*]], [[LOOP]] ], [ [[X1_PROMOTED]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[A1:%.*]] = phi i32 [ [[V:%.*]], %[[LOOP]] ], [ [[X1_PROMOTED]], %[[ENTRY]] ]
 ; CHECK-NEXT:    [[V]] = add i32 [[A1]], 1
-; CHECK-NEXT:    br i1 false, label [[LOOP]], label [[EXIT:%.*]]
-; CHECK:       Exit:
-; CHECK-NEXT:    [[V_LCSSA:%.*]] = phi i32 [ [[V]], [[LOOP]] ]
+; CHECK-NEXT:    br i1 false, label %[[LOOP]], label %[[EXIT:.*]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[V_LCSSA:%.*]] = phi i32 [ [[V]], %[[LOOP]] ]
 ; CHECK-NEXT:    store i32 [[V_LCSSA]], ptr [[X1]], align 4
 ; CHECK-NEXT:    ret void
 ;
@@ -70,14 +72,15 @@ Exit:   ; preds = %Loop
 }
 
 define void @test3(i32 %i) {
-; CHECK-LABEL: @test3(
-; CHECK-NEXT:    br label [[LOOP:%.*]]
-; CHECK:       Loop:
+; CHECK-LABEL: define void @test3(
+; CHECK-SAME: i32 [[I:%.*]]) {
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
 ; CHECK-NEXT:    [[X:%.*]] = load volatile i32, ptr @X, align 4
 ; CHECK-NEXT:    [[X2:%.*]] = add i32 [[X]], 1
 ; CHECK-NEXT:    store i32 [[X2]], ptr @X, align 4
-; CHECK-NEXT:    br i1 true, label [[OUT:%.*]], label [[LOOP]]
-; CHECK:       Out:
+; CHECK-NEXT:    br i1 true, label %[[OUT:.*]], label %[[LOOP]]
+; CHECK:       [[OUT]]:
 ; CHECK-NEXT:    ret void
 ;
   br label %Loop
@@ -94,14 +97,15 @@ Out:    ; preds = %Loop
 
 ; Should not promote this to a register
 define void @test3b(i32 %i) {
-; CHECK-LABEL: @test3b(
-; CHECK-NEXT:    br label [[LOOP:%.*]]
-; CHECK:       Loop:
+; CHECK-LABEL: define void @test3b(
+; CHECK-SAME: i32 [[I:%.*]]) {
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
 ; CHECK-NEXT:    [[X:%.*]] = load i32, ptr @X, align 4
 ; CHECK-NEXT:    [[X2:%.*]] = add i32 [[X]], 1
 ; CHECK-NEXT:    store volatile i32 [[X2]], ptr @X, align 4
-; CHECK-NEXT:    br i1 true, label [[OUT:%.*]], label [[LOOP]]
-; CHECK:       Out:
+; CHECK-NEXT:    br i1 true, label %[[OUT:.*]], label %[[LOOP]]
+; CHECK:       [[OUT]]:
 ; CHECK-NEXT:    ret void
 ;
   br label %Loop
@@ -119,30 +123,31 @@ Out:    ; preds = %Loop
 ; Should have promoted 'handle2' accesses.
 ; Should not have promoted offsetx1 loads.
 define void @test4(ptr %x, i8 %n) {
-; CHECK-LABEL: @test4(
+; CHECK-LABEL: define void @test4(
+; CHECK-SAME: ptr [[X:%.*]], i8 [[N:%.*]]) {
 ; CHECK-NEXT:    [[HANDLE1:%.*]] = alloca ptr, align 8
 ; CHECK-NEXT:    [[HANDLE2:%.*]] = alloca ptr, align 8
-; CHECK-NEXT:    store ptr [[X:%.*]], ptr [[HANDLE1]], align 8
+; CHECK-NEXT:    store ptr [[X]], ptr [[HANDLE1]], align 8
 ; CHECK-NEXT:    [[TMP:%.*]] = getelementptr i8, ptr [[X]], i64 8
 ; CHECK-NEXT:    [[OFFSETX1:%.*]] = load ptr, ptr [[HANDLE1]], align 8
-; CHECK-NEXT:    br label [[LOOP:%.*]]
-; CHECK:       loop:
-; CHECK-NEXT:    br label [[SUBLOOP:%.*]]
-; CHECK:       subloop:
-; CHECK-NEXT:    [[NEWOFFSETX21:%.*]] = phi ptr [ [[TMP]], [[LOOP]] ], [ [[NEWOFFSETX2:%.*]], [[SUBLOOP]] ]
-; CHECK-NEXT:    [[COUNT:%.*]] = phi i8 [ 0, [[LOOP]] ], [ [[NEXTCOUNT:%.*]], [[SUBLOOP]] ]
-; CHECK-NEXT:    store i8 [[N:%.*]], ptr [[NEWOFFSETX21]], align 1
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    br label %[[SUBLOOP:.*]]
+; CHECK:       [[SUBLOOP]]:
+; CHECK-NEXT:    [[NEWOFFSETX21:%.*]] = phi ptr [ [[TMP]], %[[LOOP]] ], [ [[NEWOFFSETX2:%.*]], %[[SUBLOOP]] ]
+; CHECK-NEXT:    [[COUNT:%.*]] = phi i8 [ 0, %[[LOOP]] ], [ [[NEXTCOUNT:%.*]], %[[SUBLOOP]] ]
+; CHECK-NEXT:    store i8 [[N]], ptr [[NEWOFFSETX21]], align 1
 ; CHECK-NEXT:    [[NEWOFFSETX2]] = getelementptr i8, ptr [[NEWOFFSETX21]], i64 -1
 ; CHECK-NEXT:    [[NEXTCOUNT]] = add i8 [[COUNT]], 1
 ; CHECK-NEXT:    [[INNEREXITCOND:%.*]] = icmp sge i8 [[NEXTCOUNT]], 8
-; CHECK-NEXT:    br i1 [[INNEREXITCOND]], label [[INNEREXIT:%.*]], label [[SUBLOOP]]
-; CHECK:       innerexit:
-; CHECK-NEXT:    [[NEWOFFSETX2_LCSSA:%.*]] = phi ptr [ [[NEWOFFSETX2]], [[SUBLOOP]] ]
+; CHECK-NEXT:    br i1 [[INNEREXITCOND]], label %[[INNEREXIT:.*]], label %[[SUBLOOP]]
+; CHECK:       [[INNEREXIT]]:
+; CHECK-NEXT:    [[NEWOFFSETX2_LCSSA:%.*]] = phi ptr [ [[NEWOFFSETX2]], %[[SUBLOOP]] ]
 ; CHECK-NEXT:    [[VAL:%.*]] = load i8, ptr [[OFFSETX1]], align 1
 ; CHECK-NEXT:    [[COND:%.*]] = icmp eq i8 [[VAL]], [[N]]
-; CHECK-NEXT:    br i1 [[COND]], label [[EXIT:%.*]], label [[LOOP]]
-; CHECK:       exit:
-; CHECK-NEXT:    [[NEWOFFSETX2_LCSSA_LCSSA:%.*]] = phi ptr [ [[NEWOFFSETX2_LCSSA]], [[INNEREXIT]] ]
+; CHECK-NEXT:    br i1 [[COND]], label %[[EXIT:.*]], label %[[LOOP]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[NEWOFFSETX2_LCSSA_LCSSA:%.*]] = phi ptr [ [[NEWOFFSETX2_LCSSA]], %[[INNEREXIT]] ]
 ; CHECK-NEXT:    store ptr [[NEWOFFSETX2_LCSSA_LCSSA]], ptr [[HANDLE2]], align 8
 ; CHECK-NEXT:    ret void
 ;
@@ -177,20 +182,21 @@ exit:
 }
 
 define void @test5(i32 %i, ptr noalias %P2) {
-; CHECK-LABEL: @test5(
-; CHECK-NEXT:  Entry:
+; CHECK-LABEL: define void @test5(
+; CHECK-SAME: i32 [[I:%.*]], ptr noalias [[P2:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
 ; CHECK-NEXT:    [[X_PROMOTED:%.*]] = load i32, ptr @X, align 4
-; CHECK-NEXT:    br label [[LOOP:%.*]]
-; CHECK:       Loop:
-; CHECK-NEXT:    [[X21:%.*]] = phi i32 [ [[X_PROMOTED]], [[ENTRY:%.*]] ], [ [[X2:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[J:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[X21:%.*]] = phi i32 [ [[X_PROMOTED]], %[[ENTRY]] ], [ [[X2:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[J:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[NEXT:%.*]], %[[LOOP]] ]
 ; CHECK-NEXT:    [[X2]] = add i32 [[X21]], 1
-; CHECK-NEXT:    store atomic ptr @X, ptr [[P2:%.*]] monotonic, align 8
+; CHECK-NEXT:    store atomic ptr @X, ptr [[P2]] monotonic, align 8
 ; CHECK-NEXT:    [[NEXT]] = add i32 [[J]], 1
 ; CHECK-NEXT:    [[COND:%.*]] = icmp eq i32 [[NEXT]], 0
-; CHECK-NEXT:    br i1 [[COND]], label [[OUT:%.*]], label [[LOOP]]
-; CHECK:       Out:
-; CHECK-NEXT:    [[X2_LCSSA:%.*]] = phi i32 [ [[X2]], [[LOOP]] ]
+; CHECK-NEXT:    br i1 [[COND]], label %[[OUT:.*]], label %[[LOOP]]
+; CHECK:       [[OUT]]:
+; CHECK-NEXT:    [[X2_LCSSA:%.*]] = phi i32 [ [[X2]], %[[LOOP]] ]
 ; CHECK-NEXT:    store i32 [[X2_LCSSA]], ptr @X, align 4
 ; CHECK-NEXT:    ret void
 ;
@@ -217,28 +223,29 @@ Out:
 
 ; PR14753 - Preserve TBAA tags when promoting values in a loop.
 define void @test6(i32 %n, ptr nocapture %a, ptr %gi) {
-; CHECK-LABEL: @test6(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    store i32 0, ptr [[GI:%.*]], align 4, !tbaa [[TBAA0:![0-9]+]]
-; CHECK-NEXT:    [[CMP1:%.*]] = icmp slt i32 0, [[N:%.*]]
-; CHECK-NEXT:    br i1 [[CMP1]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]]
-; CHECK:       for.body.lr.ph:
-; CHECK-NEXT:    [[GI_PROMOTED:%.*]] = load i32, ptr [[GI]], align 4, !tbaa [[TBAA0]]
-; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
-; CHECK:       for.body:
-; CHECK-NEXT:    [[INC1:%.*]] = phi i32 [ [[GI_PROMOTED]], [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[STOREMERGE2:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC]], [[FOR_BODY]] ]
+; CHECK-LABEL: define void @test6(
+; CHECK-SAME: i32 [[N:%.*]], ptr captures(none) [[A:%.*]], ptr [[GI:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    store i32 0, ptr [[GI]], align 4, !tbaa [[INT_TBAA0:![0-9]+]]
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp slt i32 0, [[N]]
+; CHECK-NEXT:    br i1 [[CMP1]], label %[[FOR_BODY_LR_PH:.*]], label %[[FOR_END:.*]]
+; CHECK:       [[FOR_BODY_LR_PH]]:
+; CHECK-NEXT:    [[GI_PROMOTED:%.*]] = load i32, ptr [[GI]], align 4, !tbaa [[INT_TBAA0]]
+; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
+; CHECK:       [[FOR_BODY]]:
+; CHECK-NEXT:    [[INC1:%.*]] = phi i32 [ [[GI_PROMOTED]], %[[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT:    [[STOREMERGE2:%.*]] = phi i32 [ 0, %[[FOR_BODY_LR_PH]] ], [ [[INC]], %[[FOR_BODY]] ]
 ; CHECK-NEXT:    [[IDXPROM:%.*]] = sext i32 [[STOREMERGE2]] to i64
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[IDXPROM]]
-; CHECK-NEXT:    store float 0.000000e+00, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA4:![0-9]+]]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IDXPROM]]
+; CHECK-NEXT:    store float 0.000000e+00, ptr [[ARRAYIDX]], align 4, !tbaa [[FLOAT_TBAA4:![0-9]+]]
 ; CHECK-NEXT:    [[INC]] = add nsw i32 [[INC1]], 1
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[INC]], [[N]]
-; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]]
-; CHECK:       for.cond.for.end_crit_edge:
-; CHECK-NEXT:    [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[FOR_BODY]] ]
-; CHECK-NEXT:    store i32 [[INC_LCSSA]], ptr [[GI]], align 4, !tbaa [[TBAA0]]
-; CHECK-NEXT:    br label [[FOR_END]]
-; CHECK:       for.end:
+; CHECK-NEXT:    br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_FOR_END_CRIT_EDGE:.*]]
+; CHECK:       [[FOR_COND_FOR_END_CRIT_EDGE]]:
+; CHECK-NEXT:    [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], %[[FOR_BODY]] ]
+; CHECK-NEXT:    store i32 [[INC_LCSSA]], ptr [[GI]], align 4, !tbaa [[INT_TBAA0]]
+; CHECK-NEXT:    br label %[[FOR_END]]
+; CHECK:       [[FOR_END]]:
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -273,21 +280,21 @@ declare void @capture(ptr)
 
 ; We can promote even if opaque may throw.
 define i32 @test7() {
-; CHECK-LABEL: @test7(
-; CHECK-NEXT:  entry:
+; CHECK-LABEL: define i32 @test7() {
+; CHECK-NEXT:  [[ENTRY:.*]]:
 ; CHECK-NEXT:    [[LOCAL:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    call void @capture(ptr [[LOCAL]])
 ; CHECK-NEXT:    [[LOCAL_PROMOTED:%.*]] = load i32, ptr [[LOCAL]], align 4
-; CHECK-NEXT:    br label [[LOOP:%.*]]
-; CHECK:       loop:
-; CHECK-NEXT:    [[X21:%.*]] = phi i32 [ [[LOCAL_PROMOTED]], [[ENTRY:%.*]] ], [ [[X2:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[J:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[X21:%.*]] = phi i32 [ [[LOCAL_PROMOTED]], %[[ENTRY]] ], [ [[X2:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[J:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[NEXT:%.*]], %[[LOOP]] ]
 ; CHECK-NEXT:    [[X2]] = call i32 @opaque(i32 [[X21]])
 ; CHECK-NEXT:    [[NEXT]] = add i32 [[J]], 1
 ; CHECK-NEXT:    [[COND:%.*]] = icmp eq i32 [[NEXT]], 0
-; CHECK-NEXT:    br i1 [[COND]], label [[EXIT:%.*]], label [[LOOP]]
-; CHECK:       exit:
-; CHECK-NEXT:    [[X2_LCSSA:%.*]] = phi i32 [ [[X2]], [[LOOP]] ]
+; CHECK-NEXT:    br i1 [[COND]], label %[[EXIT:.*]], label %[[LOOP]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[X2_LCSSA:%.*]] = phi i32 [ [[X2]], %[[LOOP]] ]
 ; CHECK-NEXT:    store i32 [[X2_LCSSA]], ptr [[LOCAL]], align 4
 ; CHECK-NEXT:    [[RET:%.*]] = load i32, ptr [[LOCAL]], align 4
 ; CHECK-NEXT:    ret i32 [[RET]]
@@ -314,27 +321,27 @@ exit:
 ; Hoist the load even if we cannot sink the store, since the store is really
 ; control-flow dependent.
 define i32 @test7bad() {
-; CHECK-LABEL: @test7bad(
-; CHECK-NEXT:  entry:
+; CHECK-LABEL: define i32 @test7bad() {
+; CHECK-NEXT:  [[ENTRY:.*]]:
 ; CHECK-NEXT:    [[LOCAL:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    call void @capture(ptr [[LOCAL]])
 ; CHECK-NEXT:    [[LOCAL_PROMOTED:%.*]] = load i32, ptr [[LOCAL]], align 4
-; CHECK-NEXT:    br label [[LOOP:%.*]]
-; CHECK:       loop:
-; CHECK-NEXT:    [[X22:%.*]] = phi i32 [ [[LOCAL_PROMOTED]], [[ENTRY:%.*]] ], [ [[X21:%.*]], [[ELSE:%.*]] ]
-; CHECK-NEXT:    [[J:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[NEXT:%.*]], [[ELSE]] ]
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[X22:%.*]] = phi i32 [ [[LOCAL_PROMOTED]], %[[ENTRY]] ], [ [[X21:%.*]], %[[ELSE:.*]] ]
+; CHECK-NEXT:    [[J:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[NEXT:%.*]], %[[ELSE]] ]
 ; CHECK-NEXT:    [[X2:%.*]] = call i32 @opaque(i32 [[X22]])
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[X2]], 0
-; CHECK-NEXT:    br i1 [[CMP]], label [[IF:%.*]], label [[ELSE]]
-; CHECK:       if:
+; CHECK-NEXT:    br i1 [[CMP]], label %[[IF:.*]], label %[[ELSE]]
+; CHECK:       [[IF]]:
 ; CHECK-NEXT:    store i32 [[X2]], ptr [[LOCAL]], align 4
-; CHECK-NEXT:    br label [[ELSE]]
-; CHECK:       else:
-; CHECK-NEXT:    [[X21]] = phi i32 [ [[X2]], [[IF]] ], [ [[X22]], [[LOOP]] ]
+; CHECK-NEXT:    br label %[[ELSE]]
+; CHECK:       [[ELSE]]:
+; CHECK-NEXT:    [[X21]] = phi i32 [ [[X2]], %[[IF]] ], [ [[X22]], %[[LOOP]] ]
 ; CHECK-NEXT:    [[NEXT]] = add i32 [[J]], 1
 ; CHECK-NEXT:    [[COND:%.*]] = icmp eq i32 [[NEXT]], 0
-; CHECK-NEXT:    br i1 [[COND]], label [[EXIT:%.*]], label [[LOOP]]
-; CHECK:       exit:
+; CHECK-NEXT:    br i1 [[COND]], label %[[EXIT:.*]], label %[[LOOP]]
+; CHECK:       [[EXIT]]:
 ; CHECK-NEXT:    [[RET:%.*]] = load i32, ptr [[LOCAL]], align 4
 ; CHECK-NEXT:    ret i32 [[RET]]
 ;
@@ -367,22 +374,22 @@ exit:
 ; opaque() may throw, we can still promote - the load not being guaranteed
 ; doesn't block us, because %local is always dereferenceable.
 define i32 @test8() {
-; CHECK-LABEL: @test8(
-; CHECK-NEXT:  entry:
+; CHECK-LABEL: define i32 @test8() {
+; CHECK-NEXT:  [[ENTRY:.*]]:
 ; CHECK-NEXT:    [[LOCAL:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    call void @capture(ptr [[LOCAL]])
 ; CHECK-NEXT:    [[LOCAL_PROMOTED:%.*]] = load i32, ptr [[LOCAL]], align 4
-; CHECK-NEXT:    br label [[LOOP:%.*]]
-; CHECK:       loop:
-; CHECK-NEXT:    [[X21:%.*]] = phi i32 [ [[LOCAL_PROMOTED]], [[ENTRY:%.*]] ], [ [[X2:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[J:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[X21:%.*]] = phi i32 [ [[LOCAL_PROMOTED]], %[[ENTRY]] ], [ [[X2:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[J:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[NEXT:%.*]], %[[LOOP]] ]
 ; CHECK-NEXT:    [[THROWAWAY:%.*]] = call i32 @opaque(i32 [[J]])
 ; CHECK-NEXT:    [[X2]] = call i32 @opaque(i32 [[X21]])
 ; CHECK-NEXT:    [[NEXT]] = add i32 [[J]], 1
 ; CHECK-NEXT:    [[COND:%.*]] = icmp eq i32 [[NEXT]], 0
-; CHECK-NEXT:    br i1 [[COND]], label [[EXIT:%.*]], label [[LOOP]]
-; CHECK:       exit:
-; CHECK-NEXT:    [[X2_LCSSA:%.*]] = phi i32 [ [[X2]], [[LOOP]] ]
+; CHECK-NEXT:    br i1 [[COND]], label %[[EXIT:.*]], label %[[LOOP]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[X2_LCSSA:%.*]] = phi i32 [ [[X2]], %[[LOOP]] ]
 ; CHECK-NEXT:    store i32 [[X2_LCSSA]], ptr [[LOCAL]], align 4
 ; CHECK-NEXT:    [[RET:%.*]] = load i32, ptr [[LOCAL]], align 4
 ; CHECK-NEXT:    ret i32 [[RET]]
@@ -412,27 +419,27 @@ exit:
 ; control flow, we can only promote if the pointer is otherwise known to be
 ; dereferenceable
 define i32 @test9() {
-; CHECK-LABEL: @test9(
-; CHECK-NEXT:  entry:
+; CHECK-LABEL: define i32 @test9() {
+; CHECK-NEXT:  [[ENTRY:.*]]:
 ; CHECK-NEXT:    [[LOCAL:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    call void @capture(ptr [[LOCAL]])
 ; CHECK-NEXT:    [[LOCAL_PROMOTED:%.*]] = load i32, ptr [[LOCAL]], align 4
-; CHECK-NEXT:    br label [[LOOP:%.*]]
-; CHECK:       loop:
-; CHECK-NEXT:    [[X21:%.*]] = phi i32 [ [[LOCAL_PROMOTED]], [[ENTRY:%.*]] ], [ [[X2:%.*]], [[ELSE:%.*]] ]
-; CHECK-NEXT:    [[J:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[NEXT:%.*]], [[ELSE]] ]
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[X21:%.*]] = phi i32 [ [[LOCAL_PROMOTED]], %[[ENTRY]] ], [ [[X2:%.*]], %[[ELSE:.*]] ]
+; CHECK-NEXT:    [[J:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[NEXT:%.*]], %[[ELSE]] ]
 ; CHECK-NEXT:    [[J2:%.*]] = call i32 @opaque(i32 [[J]])
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[J2]], 0
-; CHECK-NEXT:    br i1 [[CMP]], label [[IF:%.*]], label [[ELSE]]
-; CHECK:       if:
-; CHECK-NEXT:    br label [[ELSE]]
-; CHECK:       else:
-; CHECK-NEXT:    [[X2]] = phi i32 [ 0, [[LOOP]] ], [ [[X21]], [[IF]] ]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[IF:.*]], label %[[ELSE]]
+; CHECK:       [[IF]]:
+; CHECK-NEXT:    br label %[[ELSE]]
+; CHECK:       [[ELSE]]:
+; CHECK-NEXT:    [[X2]] = phi i32 [ 0, %[[LOOP]] ], [ [[X21]], %[[IF]] ]
 ; CHECK-NEXT:    [[NEXT]] = add i32 [[J]], 1
 ; CHECK-NEXT:    [[COND:%.*]] = icmp eq i32 [[NEXT]], 0
-; CHECK-NEXT:    br i1 [[COND]], label [[EXIT:%.*]], label [[LOOP]]
-; CHECK:       exit:
-; CHECK-NEXT:    [[X2_LCSSA:%.*]] = phi i32 [ [[X2]], [[ELSE]] ]
+; CHECK-NEXT:    br i1 [[COND]], label %[[EXIT:.*]], label %[[LOOP]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[X2_LCSSA:%.*]] = phi i32 [ [[X2]], %[[ELSE]] ]
 ; CHECK-NEXT:    store i32 [[X2_LCSSA]], ptr [[LOCAL]], align 4
 ; CHECK-NEXT:    [[RET:%.*]] = load i32, ptr [[LOCAL]], align 4
 ; CHECK-NEXT:    ret i32 [[RET]]
@@ -465,27 +472,28 @@ exit:
 }
 
 define i32 @test9bad(i32 %i) {
-; CHECK-LABEL: @test9bad(
-; CHECK-NEXT:  entry:
+; CHECK-LABEL: define i32 @test9bad(
+; CHECK-SAME: i32 [[I:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
 ; CHECK-NEXT:    [[LOCAL:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    call void @capture(ptr [[LOCAL]])
-; CHECK-NEXT:    [[NOTDEREF:%.*]] = getelementptr i32, ptr [[LOCAL]], i32 [[I:%.*]]
-; CHECK-NEXT:    br label [[LOOP:%.*]]
-; CHECK:       loop:
-; CHECK-NEXT:    [[J:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[ELSE:%.*]] ]
+; CHECK-NEXT:    [[NOTDEREF:%.*]] = getelementptr i32, ptr [[LOCAL]], i32 [[I]]
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[J:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[NEXT:%.*]], %[[ELSE:.*]] ]
 ; CHECK-NEXT:    [[J2:%.*]] = call i32 @opaque(i32 [[J]])
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[J2]], 0
-; CHECK-NEXT:    br i1 [[CMP]], label [[IF:%.*]], label [[ELSE]]
-; CHECK:       if:
+; CHECK-NEXT:    br i1 [[CMP]], label %[[IF:.*]], label %[[ELSE]]
+; CHECK:       [[IF]]:
 ; CHECK-NEXT:    [[X:%.*]] = load i32, ptr [[NOTDEREF]], align 4
-; CHECK-NEXT:    br label [[ELSE]]
-; CHECK:       else:
-; CHECK-NEXT:    [[X2:%.*]] = phi i32 [ 0, [[LOOP]] ], [ [[X]], [[IF]] ]
+; CHECK-NEXT:    br label %[[ELSE]]
+; CHECK:       [[ELSE]]:
+; CHECK-NEXT:    [[X2:%.*]] = phi i32 [ 0, %[[LOOP]] ], [ [[X]], %[[IF]] ]
 ; CHECK-NEXT:    store i32 [[X2]], ptr [[NOTDEREF]], align 4
 ; CHECK-NEXT:    [[NEXT]] = add i32 [[J]], 1
 ; CHECK-NEXT:    [[COND:%.*]] = icmp eq i32 [[NEXT]], 0
-; CHECK-NEXT:    br i1 [[COND]], label [[EXIT:%.*]], label [[LOOP]]
-; CHECK:       exit:
+; CHECK-NEXT:    br i1 [[COND]], label %[[EXIT:.*]], label %[[LOOP]]
+; CHECK:       [[EXIT]]:
 ; CHECK-NEXT:    [[RET:%.*]] = load i32, ptr [[NOTDEREF]], align 4
 ; CHECK-NEXT:    ret i32 [[RET]]
 ;
@@ -518,19 +526,20 @@ exit:
 }
 
 define void @test10(i32 %i) {
-; CHECK-LABEL: @test10(
-; CHECK-NEXT:  Entry:
+; CHECK-LABEL: define void @test10(
+; CHECK-SAME: i32 [[I:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
 ; CHECK-NEXT:    [[X_PROMOTED:%.*]] = load atomic i32, ptr @X unordered, align 4
-; CHECK-NEXT:    br label [[LOOP:%.*]]
-; CHECK:       Loop:
-; CHECK-NEXT:    [[X21:%.*]] = phi i32 [ [[X_PROMOTED]], [[ENTRY:%.*]] ], [ [[X2:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[J:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[X21:%.*]] = phi i32 [ [[X_PROMOTED]], %[[ENTRY]] ], [ [[X2:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[J:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[NEXT:%.*]], %[[LOOP]] ]
 ; CHECK-NEXT:    [[X2]] = add i32 [[X21]], 1
 ; CHECK-NEXT:    [[NEXT]] = add i32 [[J]], 1
 ; CHECK-NEXT:    [[COND:%.*]] = icmp eq i32 [[NEXT]], 0
-; CHECK-NEXT:    br i1 [[COND]], label [[OUT:%.*]], label [[LOOP]]
-; CHECK:       Out:
-; CHECK-NEXT:    [[X2_LCSSA:%.*]] = phi i32 [ [[X2]], [[LOOP]] ]
+; CHECK-NEXT:    br i1 [[COND]], label %[[OUT:.*]], label %[[LOOP]]
+; CHECK:       [[OUT]]:
+; CHECK-NEXT:    [[X2_LCSSA:%.*]] = phi i32 [ [[X2]], %[[LOOP]] ]
 ; CHECK-NEXT:    store atomic i32 [[X2_LCSSA]], ptr @X unordered, align 4
 ; CHECK-NEXT:    ret void
 ;
@@ -555,26 +564,27 @@ Out:
 ; Early exit is known not to be taken on first iteration and thus doesn't
 ; effect whether load is known to execute.
 define void @test11(i32 %i) {
-; CHECK-LABEL: @test11(
-; CHECK-NEXT:  Entry:
+; CHECK-LABEL: define void @test11(
+; CHECK-SAME: i32 [[I:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
 ; CHECK-NEXT:    [[X_PROMOTED:%.*]] = load i32, ptr @X, align 4
-; CHECK-NEXT:    br label [[LOOP:%.*]]
-; CHECK:       Loop:
-; CHECK-NEXT:    [[X21:%.*]] = phi i32 [ [[X_PROMOTED]], [[ENTRY:%.*]] ], [ [[X2:%.*]], [[BODY:%.*]] ]
-; CHECK-NEXT:    [[J:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[NEXT:%.*]], [[BODY]] ]
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[X21:%.*]] = phi i32 [ [[X_PROMOTED]], %[[ENTRY]] ], [ [[X2:%.*]], %[[BODY:.*]] ]
+; CHECK-NEXT:    [[J:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[NEXT:%.*]], %[[BODY]] ]
 ; CHECK-NEXT:    [[EARLY_TEST:%.*]] = icmp ult i32 [[J]], 32
-; CHECK-NEXT:    br i1 [[EARLY_TEST]], label [[BODY]], label [[EARLY:%.*]]
-; CHECK:       body:
+; CHECK-NEXT:    br i1 [[EARLY_TEST]], label %[[BODY]], label %[[EARLY:.*]]
+; CHECK:       [[BODY]]:
 ; CHECK-NEXT:    [[X2]] = add i32 [[X21]], 1
 ; CHECK-NEXT:    [[NEXT]] = add i32 [[J]], 1
 ; CHECK-NEXT:    [[COND:%.*]] = icmp eq i32 [[NEXT]], 0
-; CHECK-NEXT:    br i1 [[COND]], label [[OUT:%.*]], label [[LOOP]]
-; CHECK:       Early:
-; CHECK-NEXT:    [[X21_LCSSA:%.*]] = phi i32 [ [[X21]], [[LOOP]] ]
+; CHECK-NEXT:    br i1 [[COND]], label %[[OUT:.*]], label %[[LOOP]]
+; CHECK:       [[EARLY]]:
+; CHECK-NEXT:    [[X21_LCSSA:%.*]] = phi i32 [ [[X21]], %[[LOOP]] ]
 ; CHECK-NEXT:    store i32 [[X21_LCSSA]], ptr @X, align 4
 ; CHECK-NEXT:    ret void
-; CHECK:       Out:
-; CHECK-NEXT:    [[X2_LCSSA:%.*]] = phi i32 [ [[X2]], [[BODY]] ]
+; CHECK:       [[OUT]]:
+; CHECK-NEXT:    [[X2_LCSSA:%.*]] = phi i32 [ [[X2]], %[[BODY]] ]
 ; CHECK-NEXT:    store i32 [[X2_LCSSA]], ptr @X, align 4
 ; CHECK-NEXT:    ret void
 ;
@@ -603,21 +613,22 @@ Out:
 
 define i8 @test_hoistable_existing_load_sinkable_store_writeonly(ptr dereferenceable(8) %ptr, i8 %start) writeonly {
 ; CHECK: Function Attrs: memory(write)
-; CHECK-LABEL: @test_hoistable_existing_load_sinkable_store_writeonly(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[PTR_PROMOTED:%.*]] = load i8, ptr [[PTR:%.*]], align 1
-; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
-; CHECK:       loop.header:
-; CHECK-NEXT:    [[INC1:%.*]] = phi i8 [ [[PTR_PROMOTED]], [[ENTRY:%.*]] ], [ [[INC1]], [[LOOP_LATCH:%.*]] ]
-; CHECK-NEXT:    [[I:%.*]] = phi i8 [ [[START:%.*]], [[ENTRY]] ], [ [[ADD:%.*]], [[LOOP_LATCH]] ]
+; CHECK-LABEL: define i8 @test_hoistable_existing_load_sinkable_store_writeonly(
+; CHECK-SAME: ptr dereferenceable(8) [[PTR:%.*]], i8 [[START:%.*]]) #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[PTR_PROMOTED:%.*]] = load i8, ptr [[PTR]], align 1
+; CHECK-NEXT:    br label %[[LOOP_HEADER:.*]]
+; CHECK:       [[LOOP_HEADER]]:
+; CHECK-NEXT:    [[INC1:%.*]] = phi i8 [ [[PTR_PROMOTED]], %[[ENTRY]] ], [ [[INC1]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT:    [[I:%.*]] = phi i8 [ [[START]], %[[ENTRY]] ], [ [[ADD:%.*]], %[[LOOP_LATCH]] ]
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8 [[I]], 4
-; CHECK-NEXT:    br i1 [[CMP]], label [[LOOP_LATCH]], label [[EXIT:%.*]]
-; CHECK:       loop.latch:
+; CHECK-NEXT:    br i1 [[CMP]], label %[[LOOP_LATCH]], label %[[EXIT:.*]]
+; CHECK:       [[LOOP_LATCH]]:
 ; CHECK-NEXT:    store i8 [[INC1]], ptr [[PTR]], align 1
 ; CHECK-NEXT:    [[ADD]] = add i8 [[I]], [[INC1]]
-; CHECK-NEXT:    br label [[LOOP_HEADER]]
-; CHECK:       exit:
-; CHECK-NEXT:    [[I_LCSSA:%.*]] = phi i8 [ [[I]], [[LOOP_HEADER]] ]
+; CHECK-NEXT:    br label %[[LOOP_HEADER]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[I_LCSSA:%.*]] = phi i8 [ [[I]], %[[LOOP_HEADER]] ]
 ; CHECK-NEXT:    ret i8 [[I_LCSSA]]
 ;
 entry:
@@ -644,20 +655,21 @@ exit:
 ; Test case for PR51248.
 define void @test_sink_store_only() writeonly {
 ; CHECK: Function Attrs: memory(write)
-; CHECK-LABEL: @test_sink_store_only(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
-; CHECK:       loop.header:
-; CHECK-NEXT:    [[DIV1:%.*]] = phi i8 [ poison, [[ENTRY:%.*]] ], [ [[DIV:%.*]], [[LOOP_LATCH:%.*]] ]
-; CHECK-NEXT:    [[I:%.*]] = phi i8 [ 0, [[ENTRY]] ], [ [[ADD:%.*]], [[LOOP_LATCH]] ]
+; CHECK-LABEL: define void @test_sink_store_only(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[LOOP_HEADER:.*]]
+; CHECK:       [[LOOP_HEADER]]:
+; CHECK-NEXT:    [[DIV1:%.*]] = phi i8 [ poison, %[[ENTRY]] ], [ [[DIV:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT:    [[I:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[ADD:%.*]], %[[LOOP_LATCH]] ]
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8 [[I]], 4
-; CHECK-NEXT:    br i1 [[CMP]], label [[LOOP_LATCH]], label [[EXIT:%.*]]
-; CHECK:       loop.latch:
+; CHECK-NEXT:    br i1 [[CMP]], label %[[LOOP_LATCH]], label %[[EXIT:.*]]
+; CHECK:       [[LOOP_LATCH]]:
 ; CHECK-NEXT:    [[DIV]] = sdiv i8 [[I]], 3
 ; CHECK-NEXT:    [[ADD]] = add i8 [[I]], 4
-; CHECK-NEXT:    br label [[LOOP_HEADER]]
-; CHECK:       exit:
-; CHECK-NEXT:    [[DIV1_LCSSA:%.*]] = phi i8 [ [[DIV1]], [[LOOP_HEADER]] ]
+; CHECK-NEXT:    br label %[[LOOP_HEADER]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[DIV1_LCSSA:%.*]] = phi i8 [ [[DIV1]], %[[LOOP_HEADER]] ]
 ; CHECK-NEXT:    store i8 [[DIV1_LCSSA]], ptr @glb, align 1
 ; CHECK-NEXT:    ret void
 ;
@@ -681,21 +693,22 @@ exit:
 
 define void @test_sink_store_to_local_object_only_loop_must_execute() writeonly {
 ; CHECK: Function Attrs: memory(write)
-; CHECK-LABEL: @test_sink_store_to_local_object_only_loop_must_execute(
-; CHECK-NEXT:  entry:
+; CHECK-LABEL: define void @test_sink_store_to_local_object_only_loop_must_execute(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT:  [[ENTRY:.*]]:
 ; CHECK-NEXT:    [[A:%.*]] = alloca i8, align 1
-; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
-; CHECK:       loop.header:
-; CHECK-NEXT:    [[DIV1:%.*]] = phi i8 [ poison, [[ENTRY:%.*]] ], [ [[DIV:%.*]], [[LOOP_LATCH:%.*]] ]
-; CHECK-NEXT:    [[I:%.*]] = phi i8 [ 0, [[ENTRY]] ], [ [[ADD:%.*]], [[LOOP_LATCH]] ]
+; CHECK-NEXT:    br label %[[LOOP_HEADER:.*]]
+; CHECK:       [[LOOP_HEADER]]:
+; CHECK-NEXT:    [[DIV1:%.*]] = phi i8 [ poison, %[[ENTRY]] ], [ [[DIV:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT:    [[I:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[ADD:%.*]], %[[LOOP_LATCH]] ]
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8 [[I]], 4
-; CHECK-NEXT:    br i1 [[CMP]], label [[LOOP_LATCH]], label [[EXIT:%.*]]
-; CHECK:       loop.latch:
+; CHECK-NEXT:    br i1 [[CMP]], label %[[LOOP_LATCH]], label %[[EXIT:.*]]
+; CHECK:       [[LOOP_LATCH]]:
 ; CHECK-NEXT:    [[DIV]] = sdiv i8 [[I]], 3
 ; CHECK-NEXT:    [[ADD]] = add i8 [[I]], 4
-; CHECK-NEXT:    br label [[LOOP_HEADER]]
-; CHECK:       exit:
-; CHECK-NEXT:    [[DIV1_LCSSA:%.*]] = phi i8 [ [[DIV1]], [[LOOP_HEADER]] ]
+; CHECK-NEXT:    br label %[[LOOP_HEADER]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[DIV1_LCSSA:%.*]] = phi i8 [ [[DIV1]], %[[LOOP_HEADER]] ]
 ; CHECK-NEXT:    store i8 [[DIV1_LCSSA]], ptr [[A]], align 1
 ; CHECK-NEXT:    ret void
 ;
@@ -722,22 +735,23 @@ exit:
 ; pre-header. Make sure the writeonly attribute is dropped.
 define void @test_sink_store_to_local_object_only_loop_may_not_execute(i8 %n) writeonly {
 ; CHECK: Function Attrs: memory(write)
-; CHECK-LABEL: @test_sink_store_to_local_object_only_loop_may_not_execute(
-; CHECK-NEXT:  entry:
+; CHECK-LABEL: define void @test_sink_store_to_local_object_only_loop_may_not_execute(
+; CHECK-SAME: i8 [[N:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  [[ENTRY:.*]]:
 ; CHECK-NEXT:    [[A:%.*]] = alloca i8, align 1
 ; CHECK-NEXT:    [[A_PROMOTED:%.*]] = load i8, ptr [[A]], align 1
-; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
-; CHECK:       loop.header:
-; CHECK-NEXT:    [[DIV1:%.*]] = phi i8 [ [[A_PROMOTED]], [[ENTRY:%.*]] ], [ [[DIV:%.*]], [[LOOP_LATCH:%.*]] ]
-; CHECK-NEXT:    [[I:%.*]] = phi i8 [ 0, [[ENTRY]] ], [ [[ADD:%.*]], [[LOOP_LATCH]] ]
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8 [[I]], [[N:%.*]]
-; CHECK-NEXT:    br i1 [[CMP]], label [[LOOP_LATCH]], label [[EXIT:%.*]]
-; CHECK:       loop.latch:
+; CHECK-NEXT:    br label %[[LOOP_HEADER:.*]]
+; CHECK:       [[LOOP_HEADER]]:
+; CHECK-NEXT:    [[DIV1:%.*]] = phi i8 [ [[A_PROMOTED]], %[[ENTRY]] ], [ [[DIV:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT:    [[I:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[ADD:%.*]], %[[LOOP_LATCH]] ]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8 [[I]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[LOOP_LATCH]], label %[[EXIT:.*]]
+; CHECK:       [[LOOP_LATCH]]:
 ; CHECK-NEXT:    [[DIV]] = sdiv i8 [[I]], 3
 ; CHECK-NEXT:    [[ADD]] = add i8 [[I]], 4
-; CHECK-NEXT:    br label [[LOOP_HEADER]]
-; CHECK:       exit:
-; CHECK-NEXT:    [[DIV1_LCSSA:%.*]] = phi i8 [ [[DIV1]], [[LOOP_HEADER]] ]
+; CHECK-NEXT:    br label %[[LOOP_HEADER]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[DIV1_LCSSA:%.*]] = phi i8 [ [[DIV1]], %[[LOOP_HEADER]] ]
 ; CHECK-NEXT:    store i8 [[DIV1_LCSSA]], ptr [[A]], align 1
 ; CHECK-NEXT:    ret void
 ;
@@ -764,22 +778,23 @@ declare dereferenceable(8) noalias ptr @alloc_writeonly() writeonly
 
 define void @test_sink_store_to_noalias_call_object_only_loop_may_not_execute1(i8 %n) writeonly {
 ; CHECK: Function Attrs: memory(write)
-; CHECK-LABEL: @test_sink_store_to_noalias_call_object_only_loop_may_not_execute1(
-; CHECK-NEXT:  entry:
+; CHECK-LABEL: define void @test_sink_store_to_noalias_call_object_only_loop_may_not_execute1(
+; CHECK-SAME: i8 [[N:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  [[ENTRY:.*]]:
 ; CHECK-NEXT:    [[A:%.*]] = call noalias dereferenceable(8) ptr @alloc_writeonly()
 ; CHECK-NEXT:    [[A_PROMOTED:%.*]] = load i8, ptr [[A]], align 1
-; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
-; CHECK:       loop.header:
-; CHECK-NEXT:    [[DIV1:%.*]] = phi i8 [ [[A_PROMOTED]], [[ENTRY:%.*]] ], [ [[DIV:%.*]], [[LOOP_LATCH:%.*]] ]
-; CHECK-NEXT:    [[I:%.*]] = phi i8 [ 0, [[ENTRY]] ], [ [[ADD:%.*]], [[LOOP_LATCH]] ]
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8 [[I]], [[N:%.*]]
-; CHECK-NEXT:    br i1 [[CMP]], label [[LOOP_LATCH]], label [[EXIT:%.*]]
-; CHECK:       loop.latch:
+; CHECK-NEXT:    br label %[[LOOP_HEADER:.*]]
+; CHECK:       [[LOOP_HEADER]]:
+; CHECK-NEXT:    [[DIV1:%.*]] = phi i8 [ [[A_PROMOTED]], %[[ENTRY]] ], [ [[DIV:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT:    [[I:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[ADD:%.*]], %[[LOOP_LATCH]] ]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8 [[I]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[LOOP_LATCH]], label %[[EXIT:.*]]
+; CHECK:       [[LOOP_LATCH]]:
 ; CHECK-NEXT:    [[DIV]] = sdiv i8 [[I]], 3
 ; CHECK-NEXT:    [[ADD]] = add i8 [[I]], 4
-; CHECK-NEXT:    br label [[LOOP_HEADER]]
-; CHECK:       exit:
-; CHECK-NEXT:    [[DIV1_LCSSA:%.*]] = phi i8 [ [[DIV1]], [[LOOP_HEADER]] ]
+; CHECK-NEXT:    br label %[[LOOP_HEADER]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[DIV1_LCSSA:%.*]] = phi i8 [ [[DIV1]], %[[LOOP_HEADER]] ]
 ; CHECK-NEXT:    store i8 [[DIV1_LCSSA]], ptr [[A]], align 1
 ; CHECK-NEXT:    ret void
 ;
@@ -804,17 +819,18 @@ exit:
 
 define void @test_sink_store_only_no_phi_needed() writeonly {
 ; CHECK: Function Attrs: memory(write)
-; CHECK-LABEL: @test_sink_store_only_no_phi_needed(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br label [[LOOP:%.*]]
-; CHECK:       loop:
-; CHECK-NEXT:    [[I:%.*]] = phi i8 [ 0, [[ENTRY:%.*]] ], [ [[ADD:%.*]], [[LOOP]] ]
+; CHECK-LABEL: define void @test_sink_store_only_no_phi_needed(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[ADD:%.*]], %[[LOOP]] ]
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8 [[I]], 4
 ; CHECK-NEXT:    [[DIV:%.*]] = sdiv i8 [[I]], 3
 ; CHECK-NEXT:    [[ADD]] = add i8 [[I]], 4
-; CHECK-NEXT:    br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]]
-; CHECK:       exit:
-; CHECK-NEXT:    [[DIV_LCSSA:%.*]] = phi i8 [ [[DIV]], [[LOOP]] ]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[DIV_LCSSA:%.*]] = phi i8 [ [[DIV]], %[[LOOP]] ]
 ; CHECK-NEXT:    store i8 [[DIV_LCSSA]], ptr @glb, align 1
 ; CHECK-NEXT:    ret void
 ;
@@ -834,28 +850,29 @@ exit:
 }
 
 define void @sink_store_lcssa_phis(ptr %ptr, i1 %c) {
-; CHECK-LABEL: @sink_store_lcssa_phis(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br label [[LOOP_1_HEADER:%.*]]
-; CHECK:       loop.1.header:
-; CHECK-NEXT:    br label [[LOOP_2_HEADER:%.*]]
-; CHECK:       loop.2.header:
-; CHECK-NEXT:    br i1 false, label [[LOOP_3_HEADER_PREHEADER:%.*]], label [[LOOP_1_LATCH:%.*]]
-; CHECK:       loop.3.header.preheader:
-; CHECK-NEXT:    br label [[LOOP_3_HEADER:%.*]]
-; CHECK:       loop.3.header:
-; CHECK-NEXT:    [[I_11:%.*]] = phi i32 [ [[I_1:%.*]], [[LOOP_3_LATCH:%.*]] ], [ poison, [[LOOP_3_HEADER_PREHEADER]] ]
-; CHECK-NEXT:    [[I_1]] = phi i32 [ 1, [[LOOP_3_LATCH]] ], [ 0, [[LOOP_3_HEADER_PREHEADER]] ]
-; CHECK-NEXT:    br i1 true, label [[LOOP_3_LATCH]], label [[LOOP_2_LATCH:%.*]]
-; CHECK:       loop.3.latch:
-; CHECK-NEXT:    br label [[LOOP_3_HEADER]]
-; CHECK:       loop.2.latch:
-; CHECK-NEXT:    [[I_11_LCSSA:%.*]] = phi i32 [ [[I_11]], [[LOOP_3_HEADER]] ]
-; CHECK-NEXT:    store i32 [[I_11_LCSSA]], ptr [[PTR:%.*]], align 4
-; CHECK-NEXT:    br label [[LOOP_2_HEADER]]
-; CHECK:       loop.1.latch:
-; CHECK-NEXT:    br i1 [[C:%.*]], label [[LOOP_1_HEADER]], label [[EXIT:%.*]]
-; CHECK:       exit:
+; CHECK-LABEL: define void @sink_store_lcssa_phis(
+; CHECK-SAME: ptr [[PTR:%.*]], i1 [[C:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br label %[[LOOP_1_HEADER:.*]]
+; CHECK:       [[LOOP_1_HEADER]]:
+; CHECK-NEXT:    br label %[[LOOP_2_HEADER:.*]]
+; CHECK:       [[LOOP_2_HEADER]]:
+; CHECK-NEXT:    br i1 false, label %[[LOOP_3_HEADER_PREHEADER:.*]], label %[[LOOP_1_LATCH:.*]]
+; CHECK:       [[LOOP_3_HEADER_PREHEADER]]:
+; CHECK-NEXT:    br label %[[LOOP_3_HEADER:.*]]
+; CHECK:       [[LOOP_3_HEADER]]:
+; CHECK-NEXT:    [[I_11:%.*]] = phi i32 [ [[I_1:%.*]], %[[LOOP_3_LATCH:.*]] ], [ poison, %[[LOOP_3_HEADER_PREHEADER]] ]
+; CHECK-NEXT:    [[I_1]] = phi i32 [ 1, %[[LOOP_3_LATCH]] ], [ 0, %[[LOOP_3_HEADER_PREHEADER]] ]
+; CHECK-NEXT:    br i1 true, label %[[LOOP_3_LATCH]], label %[[LOOP_2_LATCH:.*]]
+; CHECK:       [[LOOP_3_LATCH]]:
+; CHECK-NEXT:    br label %[[LOOP_3_HEADER]]
+; CHECK:       [[LOOP_2_LATCH]]:
+; CHECK-NEXT:    [[I_11_LCSSA:%.*]] = phi i32 [ [[I_11]], %[[LOOP_3_HEADER]] ]
+; CHECK-NEXT:    store i32 [[I_11_LCSSA]], ptr [[PTR]], align 4
+; CHECK-NEXT:    br label %[[LOOP_2_HEADER]]
+; CHECK:       [[LOOP_1_LATCH]]:
+; CHECK-NEXT:    br i1 [[C]], label %[[LOOP_1_HEADER]], label %[[EXIT:.*]]
+; CHECK:       [[EXIT]]:
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -886,18 +903,19 @@ exit:
 }
 
 define void @cond_store_writable_dereferenceable(ptr noalias writable dereferenceable(4) %ptr) {
-; CHECK-LABEL: @cond_store_writable_dereferenceable(
-; CHECK-NEXT:    [[PTR_PROMOTED:%.*]] = load i32, ptr [[PTR:%.*]], align 4
-; CHECK-NEXT:    br label [[LOOP:%.*]]
-; CHECK:       loop:
-; CHECK-NEXT:    [[V_INC1:%.*]] = phi i32 [ [[V_INC:%.*]], [[LOOP_LATCH:%.*]] ], [ [[PTR_PROMOTED]], [[TMP0:%.*]] ]
+; CHECK-LABEL: define void @cond_store_writable_dereferenceable(
+; CHECK-SAME: ptr noalias writable dereferenceable(4) [[PTR:%.*]]) {
+; CHECK-NEXT:    [[PTR_PROMOTED:%.*]] = load i32, ptr [[PTR]], align 4
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[V_INC1:%.*]] = phi i32 [ [[V_INC:%.*]], %[[LOOP_LATCH:.*]] ], [ [[PTR_PROMOTED]], [[TMP0:%.*]] ]
 ; CHECK-NEXT:    [[C:%.*]] = icmp ult i32 [[V_INC1]], 10
-; CHECK-NEXT:    br i1 [[C]], label [[LOOP_LATCH]], label [[EXIT:%.*]]
-; CHECK:       loop.latch:
+; CHECK-NEXT:    br i1 [[C]], label %[[LOOP_LATCH]], label %[[EXIT:.*]]
+; CHECK:       [[LOOP_LATCH]]:
 ; CHECK-NEXT:    [[V_INC]] = add i32 [[V_INC1]], 1
-; CHECK-NEXT:    br label [[LOOP]]
-; CHECK:       exit:
-; CHECK-NEXT:    [[V_INC1_LCSSA:%.*]] = phi i32 [ [[V_INC1]], [[LOOP]] ]
+; CHECK-NEXT:    br label %[[LOOP]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[V_INC1_LCSSA:%.*]] = phi i32 [ [[V_INC1]], %[[LOOP]] ]
 ; CHECK-NEXT:    store i32 [[V_INC1_LCSSA]], ptr [[PTR]], align 4
 ; CHECK-NEXT:    ret void
 ;
@@ -918,18 +936,19 @@ exit:
 }
 
 define void @cond_store_writable_not_sufficiently_dereferenceable(ptr noalias writable dereferenceable(2) %ptr) {
-; CHECK-LABEL: @cond_store_writable_not_sufficiently_dereferenceable(
-; CHECK-NEXT:    [[PTR_PROMOTED:%.*]] = load i32, ptr [[PTR:%.*]], align 4
-; CHECK-NEXT:    br label [[LOOP:%.*]]
-; CHECK:       loop:
-; CHECK-NEXT:    [[V_INC1:%.*]] = phi i32 [ [[V_INC:%.*]], [[LOOP_LATCH:%.*]] ], [ [[PTR_PROMOTED]], [[TMP0:%.*]] ]
+; CHECK-LABEL: define void @cond_store_writable_not_sufficiently_dereferenceable(
+; CHECK-SAME: ptr noalias writable dereferenceable(2) [[PTR:%.*]]) {
+; CHECK-NEXT:    [[PTR_PROMOTED:%.*]] = load i32, ptr [[PTR]], align 4
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[V_INC1:%.*]] = phi i32 [ [[V_INC:%.*]], %[[LOOP_LATCH:.*]] ], [ [[PTR_PROMOTED]], [[TMP0:%.*]] ]
 ; CHECK-NEXT:    [[C:%.*]] = icmp ult i32 [[V_INC1]], 10
-; CHECK-NEXT:    br i1 [[C]], label [[LOOP_LATCH]], label [[EXIT:%.*]]
-; CHECK:       loop.latch:
+; CHECK-NEXT:    br i1 [[C]], label %[[LOOP_LATCH]], label %[[EXIT:.*]]
+; CHECK:       [[LOOP_LATCH]]:
 ; CHECK-NEXT:    [[V_INC]] = add i32 [[V_INC1]], 1
 ; CHECK-NEXT:    store i32 [[V_INC]], ptr [[PTR]], align 4
-; CHECK-NEXT:    br label [[LOOP]]
-; CHECK:       exit:
+; CHECK-NEXT:    br label %[[LOOP]]
+; CHECK:       [[EXIT]]:
 ; CHECK-NEXT:    ret void
 ;
   br label %loop
@@ -954,3 +973,11 @@ exit:
 !3 = !{!5, !5, i64 0}
 !4 = !{!"int", !1}
 !5 = !{!"float", !1}
+;.
+; CHECK: [[INT_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
+; CHECK: [[META1]] = !{!"int", [[META2:![0-9]+]]}
+; CHECK: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]]}
+; CHECK: [[META3]] = !{!"Simple C/C++ TBAA"}
+; CHECK: [[FLOAT_TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0}
+; CHECK: [[META5]] = !{!"float", [[META2]]}
+;.
diff --git a/llvm/test/Transforms/LICM/variant-aainfo.ll b/llvm/test/Transforms/LICM/variant-aainfo.ll
index 1e2a33e..4eac3f2 100644
--- a/llvm/test/Transforms/LICM/variant-aainfo.ll
+++ b/llvm/test/Transforms/LICM/variant-aainfo.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt < %s -S -passes=licm | FileCheck %s
 
 ; See https://discourse.llvm.org/t/rfc-dont-merge-memory-locations-in-aliassettracker/73336
@@ -8,21 +8,21 @@
 define void @_Z4testP1S(ptr %s) {
 ; CHECK-LABEL: define void @_Z4testP1S(
 ; CHECK-SAME: ptr [[S:%.*]]) {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[S_PROMOTED:%.*]] = load ptr, ptr [[S]], align 4, !tbaa [[TBAA0:![0-9]+]]
-; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
-; CHECK:       for.cond.cleanup:
-; CHECK-NEXT:    [[ADD_PTR_I_LCSSA:%.*]] = phi ptr [ [[ADD_PTR_I:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    store ptr [[ADD_PTR_I_LCSSA]], ptr [[S]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[S_PROMOTED:%.*]] = load ptr, ptr [[S]], align 4, !tbaa [[ANYPTR_TBAA0:![0-9]+]]
+; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
+; CHECK:       [[FOR_COND_CLEANUP:.*]]:
+; CHECK-NEXT:    [[ADD_PTR_I_LCSSA:%.*]] = phi ptr [ [[ADD_PTR_I:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT:    store ptr [[ADD_PTR_I_LCSSA]], ptr [[S]], align 4, !tbaa [[ANYPTR_TBAA0]]
 ; CHECK-NEXT:    ret void
-; CHECK:       for.body:
-; CHECK-NEXT:    [[ADD_PTR_I1:%.*]] = phi ptr [ [[S_PROMOTED]], [[ENTRY:%.*]] ], [ [[ADD_PTR_I]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[I_05:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    store i32 [[I_05]], ptr [[ADD_PTR_I1]], align 4, !tbaa [[TBAA4:![0-9]+]]
+; CHECK:       [[FOR_BODY]]:
+; CHECK-NEXT:    [[ADD_PTR_I1:%.*]] = phi ptr [ [[S_PROMOTED]], %[[ENTRY]] ], [ [[ADD_PTR_I]], %[[FOR_BODY]] ]
+; CHECK-NEXT:    [[I_05:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT:    store i32 [[I_05]], ptr [[ADD_PTR_I1]], align 4, !tbaa [[INT_TBAA4:![0-9]+]]
 ; CHECK-NEXT:    [[ADD_PTR_I]] = getelementptr inbounds i32, ptr [[ADD_PTR_I1]], i32 1
 ; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_05]], 1
 ; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], 100
-; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]]
 ;
 entry:
   br label %for.body
@@ -50,10 +50,10 @@ for.body:                                         ; preds = %entry, %for.body
 !6 = !{!"int", !3, i64 0}
 !7 = !{!2, !2, i64 0}
 ;.
-; CHECK: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
+; CHECK: [[ANYPTR_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
 ; CHECK: [[META1]] = !{!"any pointer", [[META2:![0-9]+]], i64 0}
 ; CHECK: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0}
 ; CHECK: [[META3]] = !{!"Simple C++ TBAA"}
-; CHECK: [[TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0}
+; CHECK: [[INT_TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0}
 ; CHECK: [[META5]] = !{!"int", [[META2]], i64 0}
 ;.
diff --git a/llvm/test/Transforms/LoopIdiom/memmove-tbaa.ll b/llvm/test/Transforms/LoopIdiom/memmove-tbaa.ll
index 881931e0..218b7f4 100644
--- a/llvm/test/Transforms/LoopIdiom/memmove-tbaa.ll
+++ b/llvm/test/Transforms/LoopIdiom/memmove-tbaa.ll
@@ -1,21 +1,22 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt -passes="loop-idiom" < %s -S | FileCheck %s
 
 define void @looper(ptr nocapture %out) {
-; CHECK-LABEL: @looper(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[M:%.*]] = getelementptr double, ptr [[OUT:%.*]], i32 16
-; CHECK-NEXT:    call void @llvm.memmove.p0.p0.i64(ptr align 8 [[OUT]], ptr align 8 [[M]], i64 256, i1 false), !tbaa [[TBAA0:![0-9]+]]
-; CHECK-NEXT:    br label [[FOR_BODY4:%.*]]
-; CHECK:       for.body4:
-; CHECK-NEXT:    [[J_020:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY4]] ]
+; CHECK-LABEL: define void @looper(
+; CHECK-SAME: ptr captures(none) [[OUT:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[M:%.*]] = getelementptr double, ptr [[OUT]], i32 16
+; CHECK-NEXT:    call void @llvm.memmove.p0.p0.i64(ptr align 8 [[OUT]], ptr align 8 [[M]], i64 256, i1 false), !tbaa [[DOUBLE_TBAA0:![0-9]+]]
+; CHECK-NEXT:    br label %[[FOR_BODY4:.*]]
+; CHECK:       [[FOR_BODY4]]:
+; CHECK-NEXT:    [[J_020:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY4]] ]
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[M]], i64 [[J_020]]
-; CHECK-NEXT:    [[A0:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA0]]
+; CHECK-NEXT:    [[A0:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !tbaa [[DOUBLE_TBAA0]]
 ; CHECK-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[OUT]], i64 [[J_020]]
 ; CHECK-NEXT:    [[INC]] = add nuw nsw i64 [[J_020]], 1
 ; CHECK-NEXT:    [[CMP2:%.*]] = icmp ult i64 [[J_020]], 31
-; CHECK-NEXT:    br i1 [[CMP2]], label [[FOR_BODY4]], label [[FOR_COND_CLEANUP:%.*]]
-; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    br i1 [[CMP2]], label %[[FOR_BODY4]], label %[[FOR_COND_CLEANUP:.*]]
+; CHECK:       [[FOR_COND_CLEANUP]]:
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -38,20 +39,21 @@ for.cond.cleanup:                                 ; preds = %for.cond.cleanup3
 
 
 define void @looperBadMerge(ptr nocapture %out) {
-; CHECK-LABEL: @looperBadMerge(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[M:%.*]] = getelementptr double, ptr [[OUT:%.*]], i32 16
-; CHECK-NEXT:    call void @llvm.memmove.p0.p0.i64(ptr align 8 [[OUT]], ptr align 8 [[M]], i64 256, i1 false), !tbaa [[TBAA4:![0-9]+]]
-; CHECK-NEXT:    br label [[FOR_BODY4:%.*]]
-; CHECK:       for.body4:
-; CHECK-NEXT:    [[J_020:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY4]] ]
+; CHECK-LABEL: define void @looperBadMerge(
+; CHECK-SAME: ptr captures(none) [[OUT:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[M:%.*]] = getelementptr double, ptr [[OUT]], i32 16
+; CHECK-NEXT:    call void @llvm.memmove.p0.p0.i64(ptr align 8 [[OUT]], ptr align 8 [[M]], i64 256, i1 false), !tbaa [[CHAR_TBAA4:![0-9]+]]
+; CHECK-NEXT:    br label %[[FOR_BODY4:.*]]
+; CHECK:       [[FOR_BODY4]]:
+; CHECK-NEXT:    [[J_020:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY4]] ]
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[M]], i64 [[J_020]]
-; CHECK-NEXT:    [[A0:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA0]]
+; CHECK-NEXT:    [[A0:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !tbaa [[DOUBLE_TBAA0]]
 ; CHECK-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[OUT]], i64 [[J_020]]
 ; CHECK-NEXT:    [[INC]] = add nuw nsw i64 [[J_020]], 1
 ; CHECK-NEXT:    [[CMP2:%.*]] = icmp ult i64 [[J_020]], 31
-; CHECK-NEXT:    br i1 [[CMP2]], label [[FOR_BODY4]], label [[FOR_COND_CLEANUP:%.*]]
-; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    br i1 [[CMP2]], label %[[FOR_BODY4]], label %[[FOR_COND_CLEANUP:.*]]
+; CHECK:       [[FOR_COND_CLEANUP]]:
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -73,20 +75,21 @@ for.cond.cleanup:                                 ; preds = %for.cond.cleanup3
 }
 
 define void @looperGoodMerge(ptr nocapture %out) {
-; CHECK-LABEL: @looperGoodMerge(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[M:%.*]] = getelementptr double, ptr [[OUT:%.*]], i32 16
+; CHECK-LABEL: define void @looperGoodMerge(
+; CHECK-SAME: ptr captures(none) [[OUT:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[M:%.*]] = getelementptr double, ptr [[OUT]], i32 16
 ; CHECK-NEXT:    call void @llvm.memmove.p0.p0.i64(ptr align 8 [[OUT]], ptr align 8 [[M]], i64 256, i1 false)
-; CHECK-NEXT:    br label [[FOR_BODY4:%.*]]
-; CHECK:       for.body4:
-; CHECK-NEXT:    [[J_020:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY4]] ]
+; CHECK-NEXT:    br label %[[FOR_BODY4:.*]]
+; CHECK:       [[FOR_BODY4]]:
+; CHECK-NEXT:    [[J_020:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY4]] ]
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[M]], i64 [[J_020]]
-; CHECK-NEXT:    [[A0:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA0]]
+; CHECK-NEXT:    [[A0:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !tbaa [[DOUBLE_TBAA0]]
 ; CHECK-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[OUT]], i64 [[J_020]]
 ; CHECK-NEXT:    [[INC]] = add nuw nsw i64 [[J_020]], 1
 ; CHECK-NEXT:    [[CMP2:%.*]] = icmp ult i64 [[J_020]], 31
-; CHECK-NEXT:    br i1 [[CMP2]], label [[FOR_BODY4]], label [[FOR_COND_CLEANUP:%.*]]
-; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    br i1 [[CMP2]], label %[[FOR_BODY4]], label %[[FOR_COND_CLEANUP:.*]]
+; CHECK:       [[FOR_COND_CLEANUP]]:
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -114,3 +117,10 @@ for.cond.cleanup:                                 ; preds = %for.cond.cleanup3
 !6 = !{!"double", !7, i64 0}
 !7 = !{!"omnipotent char", !8, i64 0}
 !8 = !{!"Simple C++ TBAA"}
+;.
+; CHECK: [[DOUBLE_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
+; CHECK: [[META1]] = !{!"double", [[META2:![0-9]+]], i64 0}
+; CHECK: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0}
+; CHECK: [[META3]] = !{!"Simple C++ TBAA"}
+; CHECK: [[CHAR_TBAA4]] = !{[[META2]], [[META2]], i64 0}
+;.
diff --git a/llvm/test/Transforms/LoopStrengthReduce/duplicated-phis.ll b/llvm/test/Transforms/LoopStrengthReduce/duplicated-phis.ll
index c59f7d9..cee8c8a 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/duplicated-phis.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/duplicated-phis.ll
@@ -18,7 +18,8 @@ define i64 @test_duplicated_phis(i64 noundef %N) {
 ; CHECK:       [[FOR_BODY_PREHEADER_NEW]]:
 ; CHECK-NEXT:    [[UNROLL_ITER:%.*]] = and i64 [[MUL]], -4
 ; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[UNROLL_ITER]], -4
-; CHECK-NEXT:    [[TMP3:%.*]] = lshr i64 [[TMP4]], 1
+; CHECK-NEXT:    [[TMP5:%.*]] = lshr i64 [[TMP4]], 2
+; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw nsw i64 [[TMP5]], 1
 ; CHECK-NEXT:    [[LSR_IV_NEXT:%.*]] = sub i64 -3, [[TMP3]]
 ; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
 ; CHECK:       [[FOR_BODY]]:
diff --git a/llvm/test/Transforms/LoopUnrollAndJam/unroll-and-jam.ll b/llvm/test/Transforms/LoopUnrollAndJam/unroll-and-jam.ll
index 89ce667..6f48c41 100644
--- a/llvm/test/Transforms/LoopUnrollAndJam/unroll-and-jam.ll
+++ b/llvm/test/Transforms/LoopUnrollAndJam/unroll-and-jam.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt -passes=loop-unroll-and-jam -allow-unroll-and-jam -unroll-and-jam-count=4 -unroll-remainder < %s -S | FileCheck %s
 ; RUN: opt -aa-pipeline=tbaa,basic-aa -passes='loop-unroll-and-jam' -allow-unroll-and-jam -unroll-and-jam-count=4 -unroll-remainder < %s -S | FileCheck %s
 
@@ -6,137 +6,138 @@ target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
 
 ; Tests for(i) { sum = 0; for(j) sum += B[j]; A[i] = sum; }
 define void @test1(i32 %I, i32 %E, ptr noalias nocapture %A, ptr noalias nocapture readonly %B) #0 {
-; CHECK-LABEL: @test1(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[E:%.*]], 0
-; CHECK-NEXT:    [[CMPJ:%.*]] = icmp ne i32 [[I:%.*]], 0
+; CHECK-LABEL: define void @test1(
+; CHECK-SAME: i32 [[I:%.*]], i32 [[E:%.*]], ptr noalias captures(none) [[A:%.*]], ptr noalias readonly captures(none) [[B:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[E]], 0
+; CHECK-NEXT:    [[CMPJ:%.*]] = icmp ne i32 [[I]], 0
 ; CHECK-NEXT:    [[OR_COND:%.*]] = and i1 [[CMP]], [[CMPJ]]
-; CHECK-NEXT:    br i1 [[OR_COND]], label [[FOR_OUTER_PREHEADER:%.*]], label [[FOR_END:%.*]]
-; CHECK:       for.outer.preheader:
+; CHECK-NEXT:    br i1 [[OR_COND]], label %[[FOR_OUTER_PREHEADER:.*]], label %[[FOR_END:.*]]
+; CHECK:       [[FOR_OUTER_PREHEADER]]:
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[I]], -1
 ; CHECK-NEXT:    [[XTRAITER:%.*]] = and i32 [[I]], 3
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[TMP0]], 3
-; CHECK-NEXT:    br i1 [[TMP1]], label [[FOR_END_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_OUTER_PREHEADER_NEW:%.*]]
-; CHECK:       for.outer.preheader.new:
+; CHECK-NEXT:    br i1 [[TMP1]], label %[[FOR_END_LOOPEXIT_UNR_LCSSA:.*]], label %[[FOR_OUTER_PREHEADER_NEW:.*]]
+; CHECK:       [[FOR_OUTER_PREHEADER_NEW]]:
 ; CHECK-NEXT:    [[UNROLL_ITER:%.*]] = sub i32 [[I]], [[XTRAITER]]
-; CHECK-NEXT:    br label [[FOR_OUTER:%.*]]
-; CHECK:       for.outer:
-; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[ADD8_3:%.*]], [[FOR_LATCH:%.*]] ], [ 0, [[FOR_OUTER_PREHEADER_NEW]] ]
-; CHECK-NEXT:    [[NITER:%.*]] = phi i32 [ 0, [[FOR_OUTER_PREHEADER_NEW]] ], [ [[NITER_NEXT_3:%.*]], [[FOR_LATCH]] ]
+; CHECK-NEXT:    br label %[[FOR_OUTER:.*]]
+; CHECK:       [[FOR_OUTER]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[ADD8_3:%.*]], %[[FOR_LATCH:.*]] ], [ 0, %[[FOR_OUTER_PREHEADER_NEW]] ]
+; CHECK-NEXT:    [[NITER:%.*]] = phi i32 [ 0, %[[FOR_OUTER_PREHEADER_NEW]] ], [ [[NITER_NEXT_3:%.*]], %[[FOR_LATCH]] ]
 ; CHECK-NEXT:    [[ADD8:%.*]] = add nuw nsw i32 [[I]], 1
 ; CHECK-NEXT:    [[ADD8_1:%.*]] = add nuw nsw i32 [[I]], 2
 ; CHECK-NEXT:    [[ADD8_2:%.*]] = add nuw nsw i32 [[I]], 3
 ; CHECK-NEXT:    [[ADD8_3]] = add nuw i32 [[I]], 4
 ; CHECK-NEXT:    [[NITER_NEXT_3]] = add i32 [[NITER]], 4
-; CHECK-NEXT:    br label [[FOR_INNER:%.*]]
-; CHECK:       for.inner:
-; CHECK-NEXT:    [[J:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[INC:%.*]], [[FOR_INNER]] ]
-; CHECK-NEXT:    [[SUM:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[ADD:%.*]], [[FOR_INNER]] ]
-; CHECK-NEXT:    [[J_1:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[INC_1:%.*]], [[FOR_INNER]] ]
-; CHECK-NEXT:    [[SUM_1:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[ADD_1:%.*]], [[FOR_INNER]] ]
-; CHECK-NEXT:    [[J_2:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[INC_2:%.*]], [[FOR_INNER]] ]
-; CHECK-NEXT:    [[SUM_2:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[ADD_2:%.*]], [[FOR_INNER]] ]
-; CHECK-NEXT:    [[J_3:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[INC_3:%.*]], [[FOR_INNER]] ]
-; CHECK-NEXT:    [[SUM_3:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[ADD_3:%.*]], [[FOR_INNER]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i32 [[J]]
-; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA0:![0-9]+]]
+; CHECK-NEXT:    br label %[[FOR_INNER:.*]]
+; CHECK:       [[FOR_INNER]]:
+; CHECK-NEXT:    [[J:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[INC:%.*]], %[[FOR_INNER]] ]
+; CHECK-NEXT:    [[SUM:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[ADD:%.*]], %[[FOR_INNER]] ]
+; CHECK-NEXT:    [[J_1:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[INC_1:%.*]], %[[FOR_INNER]] ]
+; CHECK-NEXT:    [[SUM_1:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[ADD_1:%.*]], %[[FOR_INNER]] ]
+; CHECK-NEXT:    [[J_2:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[INC_2:%.*]], %[[FOR_INNER]] ]
+; CHECK-NEXT:    [[SUM_2:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[ADD_2:%.*]], %[[FOR_INNER]] ]
+; CHECK-NEXT:    [[J_3:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[INC_3:%.*]], %[[FOR_INNER]] ]
+; CHECK-NEXT:    [[SUM_3:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[ADD_3:%.*]], %[[FOR_INNER]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J]]
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA0:![0-9]+]]
 ; CHECK-NEXT:    [[ADD]] = add i32 [[TMP2]], [[SUM]]
 ; CHECK-NEXT:    [[INC]] = add nuw i32 [[J]], 1
 ; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J_1]]
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX_1]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX_1]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[ADD_1]] = add i32 [[TMP3]], [[SUM_1]]
 ; CHECK-NEXT:    [[INC_1]] = add nuw i32 [[J_1]], 1
 ; CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J_2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX_2]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX_2]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[ADD_2]] = add i32 [[TMP4]], [[SUM_2]]
 ; CHECK-NEXT:    [[INC_2]] = add nuw i32 [[J_2]], 1
 ; CHECK-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J_3]]
-; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX_3]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX_3]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[ADD_3]] = add i32 [[TMP5]], [[SUM_3]]
 ; CHECK-NEXT:    [[INC_3]] = add nuw i32 [[J_3]], 1
 ; CHECK-NEXT:    [[EXITCOND_3:%.*]] = icmp eq i32 [[INC_3]], [[E]]
-; CHECK-NEXT:    br i1 [[EXITCOND_3]], label [[FOR_LATCH]], label [[FOR_INNER]]
-; CHECK:       for.latch:
-; CHECK-NEXT:    [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_INNER]] ]
-; CHECK-NEXT:    [[ADD_LCSSA_1:%.*]] = phi i32 [ [[ADD_1]], [[FOR_INNER]] ]
-; CHECK-NEXT:    [[ADD_LCSSA_2:%.*]] = phi i32 [ [[ADD_2]], [[FOR_INNER]] ]
-; CHECK-NEXT:    [[ADD_LCSSA_3:%.*]] = phi i32 [ [[ADD_3]], [[FOR_INNER]] ]
-; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[I]]
-; CHECK-NEXT:    store i32 [[ADD_LCSSA]], ptr [[ARRAYIDX6]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    br i1 [[EXITCOND_3]], label %[[FOR_LATCH]], label %[[FOR_INNER]]
+; CHECK:       [[FOR_LATCH]]:
+; CHECK-NEXT:    [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], %[[FOR_INNER]] ]
+; CHECK-NEXT:    [[ADD_LCSSA_1:%.*]] = phi i32 [ [[ADD_1]], %[[FOR_INNER]] ]
+; CHECK-NEXT:    [[ADD_LCSSA_2:%.*]] = phi i32 [ [[ADD_2]], %[[FOR_INNER]] ]
+; CHECK-NEXT:    [[ADD_LCSSA_3:%.*]] = phi i32 [ [[ADD_3]], %[[FOR_INNER]] ]
+; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[I]]
+; CHECK-NEXT:    store i32 [[ADD_LCSSA]], ptr [[ARRAYIDX6]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[ARRAYIDX6_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD8]]
-; CHECK-NEXT:    store i32 [[ADD_LCSSA_1]], ptr [[ARRAYIDX6_1]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    store i32 [[ADD_LCSSA_1]], ptr [[ARRAYIDX6_1]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[ARRAYIDX6_2:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD8_1]]
-; CHECK-NEXT:    store i32 [[ADD_LCSSA_2]], ptr [[ARRAYIDX6_2]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    store i32 [[ADD_LCSSA_2]], ptr [[ARRAYIDX6_2]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[ARRAYIDX6_3:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD8_2]]
-; CHECK-NEXT:    store i32 [[ADD_LCSSA_3]], ptr [[ARRAYIDX6_3]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    store i32 [[ADD_LCSSA_3]], ptr [[ARRAYIDX6_3]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[NITER_NCMP_3:%.*]] = icmp eq i32 [[NITER_NEXT_3]], [[UNROLL_ITER]]
-; CHECK-NEXT:    br i1 [[NITER_NCMP_3]], label [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT:%.*]], label [[FOR_OUTER]], !llvm.loop [[LOOP4:![0-9]+]]
-; CHECK:       for.end.loopexit.unr-lcssa.loopexit:
-; CHECK-NEXT:    [[I_UNR_PH:%.*]] = phi i32 [ [[ADD8_3]], [[FOR_LATCH]] ]
-; CHECK-NEXT:    br label [[FOR_END_LOOPEXIT_UNR_LCSSA]]
-; CHECK:       for.end.loopexit.unr-lcssa:
-; CHECK-NEXT:    [[I_UNR:%.*]] = phi i32 [ 0, [[FOR_OUTER_PREHEADER]] ], [ [[I_UNR_PH]], [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ]
+; CHECK-NEXT:    br i1 [[NITER_NCMP_3]], label %[[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT:.*]], label %[[FOR_OUTER]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK:       [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]]:
+; CHECK-NEXT:    [[I_UNR_PH:%.*]] = phi i32 [ [[ADD8_3]], %[[FOR_LATCH]] ]
+; CHECK-NEXT:    br label %[[FOR_END_LOOPEXIT_UNR_LCSSA]]
+; CHECK:       [[FOR_END_LOOPEXIT_UNR_LCSSA]]:
+; CHECK-NEXT:    [[I_UNR:%.*]] = phi i32 [ 0, %[[FOR_OUTER_PREHEADER]] ], [ [[I_UNR_PH]], %[[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ]
 ; CHECK-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i32 [[XTRAITER]], 0
-; CHECK-NEXT:    br i1 [[LCMP_MOD]], label [[FOR_OUTER_EPIL_PREHEADER:%.*]], label [[FOR_END_LOOPEXIT:%.*]]
-; CHECK:       for.outer.epil.preheader:
-; CHECK-NEXT:    br label [[FOR_OUTER_EPIL:%.*]]
-; CHECK:       for.outer.epil:
-; CHECK-NEXT:    br label [[FOR_INNER_EPIL:%.*]]
-; CHECK:       for.inner.epil:
-; CHECK-NEXT:    [[J_EPIL:%.*]] = phi i32 [ 0, [[FOR_OUTER_EPIL]] ], [ [[INC_EPIL:%.*]], [[FOR_INNER_EPIL]] ]
-; CHECK-NEXT:    [[SUM_EPIL:%.*]] = phi i32 [ 0, [[FOR_OUTER_EPIL]] ], [ [[ADD_EPIL:%.*]], [[FOR_INNER_EPIL]] ]
+; CHECK-NEXT:    br i1 [[LCMP_MOD]], label %[[FOR_OUTER_EPIL_PREHEADER:.*]], label %[[FOR_END_LOOPEXIT:.*]]
+; CHECK:       [[FOR_OUTER_EPIL_PREHEADER]]:
+; CHECK-NEXT:    br label %[[FOR_OUTER_EPIL:.*]]
+; CHECK:       [[FOR_OUTER_EPIL]]:
+; CHECK-NEXT:    br label %[[FOR_INNER_EPIL:.*]]
+; CHECK:       [[FOR_INNER_EPIL]]:
+; CHECK-NEXT:    [[J_EPIL:%.*]] = phi i32 [ 0, %[[FOR_OUTER_EPIL]] ], [ [[INC_EPIL:%.*]], %[[FOR_INNER_EPIL]] ]
+; CHECK-NEXT:    [[SUM_EPIL:%.*]] = phi i32 [ 0, %[[FOR_OUTER_EPIL]] ], [ [[ADD_EPIL:%.*]], %[[FOR_INNER_EPIL]] ]
 ; CHECK-NEXT:    [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J_EPIL]]
-; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX_EPIL]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX_EPIL]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[ADD_EPIL]] = add i32 [[TMP6]], [[SUM_EPIL]]
 ; CHECK-NEXT:    [[INC_EPIL]] = add nuw i32 [[J_EPIL]], 1
 ; CHECK-NEXT:    [[EXITCOND_EPIL:%.*]] = icmp eq i32 [[INC_EPIL]], [[E]]
-; CHECK-NEXT:    br i1 [[EXITCOND_EPIL]], label [[FOR_LATCH_EPIL:%.*]], label [[FOR_INNER_EPIL]]
-; CHECK:       for.latch.epil:
-; CHECK-NEXT:    [[ADD_LCSSA_EPIL:%.*]] = phi i32 [ [[ADD_EPIL]], [[FOR_INNER_EPIL]] ]
+; CHECK-NEXT:    br i1 [[EXITCOND_EPIL]], label %[[FOR_LATCH_EPIL:.*]], label %[[FOR_INNER_EPIL]]
+; CHECK:       [[FOR_LATCH_EPIL]]:
+; CHECK-NEXT:    [[ADD_LCSSA_EPIL:%.*]] = phi i32 [ [[ADD_EPIL]], %[[FOR_INNER_EPIL]] ]
 ; CHECK-NEXT:    [[ARRAYIDX6_EPIL:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[I_UNR]]
-; CHECK-NEXT:    store i32 [[ADD_LCSSA_EPIL]], ptr [[ARRAYIDX6_EPIL]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    store i32 [[ADD_LCSSA_EPIL]], ptr [[ARRAYIDX6_EPIL]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[ADD8_EPIL:%.*]] = add nuw i32 [[I_UNR]], 1
 ; CHECK-NEXT:    [[EPIL_ITER_CMP:%.*]] = icmp ne i32 1, [[XTRAITER]]
-; CHECK-NEXT:    br i1 [[EPIL_ITER_CMP]], label [[FOR_OUTER_EPIL_1:%.*]], label [[FOR_END_LOOPEXIT_EPILOG_LCSSA:%.*]]
-; CHECK:       for.outer.epil.1:
-; CHECK-NEXT:    br label [[FOR_INNER_EPIL_1:%.*]]
-; CHECK:       for.inner.epil.1:
-; CHECK-NEXT:    [[J_EPIL_1:%.*]] = phi i32 [ 0, [[FOR_OUTER_EPIL_1]] ], [ [[INC_EPIL_1:%.*]], [[FOR_INNER_EPIL_1]] ]
-; CHECK-NEXT:    [[SUM_EPIL_1:%.*]] = phi i32 [ 0, [[FOR_OUTER_EPIL_1]] ], [ [[ADD_EPIL_1:%.*]], [[FOR_INNER_EPIL_1]] ]
+; CHECK-NEXT:    br i1 [[EPIL_ITER_CMP]], label %[[FOR_OUTER_EPIL_1:.*]], label %[[FOR_END_LOOPEXIT_EPILOG_LCSSA:.*]]
+; CHECK:       [[FOR_OUTER_EPIL_1]]:
+; CHECK-NEXT:    br label %[[FOR_INNER_EPIL_1:.*]]
+; CHECK:       [[FOR_INNER_EPIL_1]]:
+; CHECK-NEXT:    [[J_EPIL_1:%.*]] = phi i32 [ 0, %[[FOR_OUTER_EPIL_1]] ], [ [[INC_EPIL_1:%.*]], %[[FOR_INNER_EPIL_1]] ]
+; CHECK-NEXT:    [[SUM_EPIL_1:%.*]] = phi i32 [ 0, %[[FOR_OUTER_EPIL_1]] ], [ [[ADD_EPIL_1:%.*]], %[[FOR_INNER_EPIL_1]] ]
 ; CHECK-NEXT:    [[ARRAYIDX_EPIL_1:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J_EPIL_1]]
-; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX_EPIL_1]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX_EPIL_1]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[ADD_EPIL_1]] = add i32 [[TMP7]], [[SUM_EPIL_1]]
 ; CHECK-NEXT:    [[INC_EPIL_1]] = add nuw i32 [[J_EPIL_1]], 1
 ; CHECK-NEXT:    [[EXITCOND_EPIL_1:%.*]] = icmp eq i32 [[INC_EPIL_1]], [[E]]
-; CHECK-NEXT:    br i1 [[EXITCOND_EPIL_1]], label [[FOR_LATCH_EPIL_1:%.*]], label [[FOR_INNER_EPIL_1]]
-; CHECK:       for.latch.epil.1:
-; CHECK-NEXT:    [[ADD_LCSSA_EPIL_1:%.*]] = phi i32 [ [[ADD_EPIL_1]], [[FOR_INNER_EPIL_1]] ]
+; CHECK-NEXT:    br i1 [[EXITCOND_EPIL_1]], label %[[FOR_LATCH_EPIL_1:.*]], label %[[FOR_INNER_EPIL_1]]
+; CHECK:       [[FOR_LATCH_EPIL_1]]:
+; CHECK-NEXT:    [[ADD_LCSSA_EPIL_1:%.*]] = phi i32 [ [[ADD_EPIL_1]], %[[FOR_INNER_EPIL_1]] ]
 ; CHECK-NEXT:    [[ARRAYIDX6_EPIL_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD8_EPIL]]
-; CHECK-NEXT:    store i32 [[ADD_LCSSA_EPIL_1]], ptr [[ARRAYIDX6_EPIL_1]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    store i32 [[ADD_LCSSA_EPIL_1]], ptr [[ARRAYIDX6_EPIL_1]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[ADD8_EPIL_1:%.*]] = add nuw i32 [[I_UNR]], 2
 ; CHECK-NEXT:    [[EPIL_ITER_CMP_1:%.*]] = icmp ne i32 2, [[XTRAITER]]
-; CHECK-NEXT:    br i1 [[EPIL_ITER_CMP_1]], label [[FOR_OUTER_EPIL_2:%.*]], label [[FOR_END_LOOPEXIT_EPILOG_LCSSA]]
-; CHECK:       for.outer.epil.2:
-; CHECK-NEXT:    br label [[FOR_INNER_EPIL_2:%.*]]
-; CHECK:       for.inner.epil.2:
-; CHECK-NEXT:    [[J_EPIL_2:%.*]] = phi i32 [ 0, [[FOR_OUTER_EPIL_2]] ], [ [[INC_EPIL_2:%.*]], [[FOR_INNER_EPIL_2]] ]
-; CHECK-NEXT:    [[SUM_EPIL_2:%.*]] = phi i32 [ 0, [[FOR_OUTER_EPIL_2]] ], [ [[ADD_EPIL_2:%.*]], [[FOR_INNER_EPIL_2]] ]
+; CHECK-NEXT:    br i1 [[EPIL_ITER_CMP_1]], label %[[FOR_OUTER_EPIL_2:.*]], label %[[FOR_END_LOOPEXIT_EPILOG_LCSSA]]
+; CHECK:       [[FOR_OUTER_EPIL_2]]:
+; CHECK-NEXT:    br label %[[FOR_INNER_EPIL_2:.*]]
+; CHECK:       [[FOR_INNER_EPIL_2]]:
+; CHECK-NEXT:    [[J_EPIL_2:%.*]] = phi i32 [ 0, %[[FOR_OUTER_EPIL_2]] ], [ [[INC_EPIL_2:%.*]], %[[FOR_INNER_EPIL_2]] ]
+; CHECK-NEXT:    [[SUM_EPIL_2:%.*]] = phi i32 [ 0, %[[FOR_OUTER_EPIL_2]] ], [ [[ADD_EPIL_2:%.*]], %[[FOR_INNER_EPIL_2]] ]
 ; CHECK-NEXT:    [[ARRAYIDX_EPIL_2:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J_EPIL_2]]
-; CHECK-NEXT:    [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX_EPIL_2]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX_EPIL_2]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[ADD_EPIL_2]] = add i32 [[TMP8]], [[SUM_EPIL_2]]
 ; CHECK-NEXT:    [[INC_EPIL_2]] = add nuw i32 [[J_EPIL_2]], 1
 ; CHECK-NEXT:    [[EXITCOND_EPIL_2:%.*]] = icmp eq i32 [[INC_EPIL_2]], [[E]]
-; CHECK-NEXT:    br i1 [[EXITCOND_EPIL_2]], label [[FOR_LATCH_EPIL_2:%.*]], label [[FOR_INNER_EPIL_2]]
-; CHECK:       for.latch.epil.2:
-; CHECK-NEXT:    [[ADD_LCSSA_EPIL_2:%.*]] = phi i32 [ [[ADD_EPIL_2]], [[FOR_INNER_EPIL_2]] ]
+; CHECK-NEXT:    br i1 [[EXITCOND_EPIL_2]], label %[[FOR_LATCH_EPIL_2:.*]], label %[[FOR_INNER_EPIL_2]]
+; CHECK:       [[FOR_LATCH_EPIL_2]]:
+; CHECK-NEXT:    [[ADD_LCSSA_EPIL_2:%.*]] = phi i32 [ [[ADD_EPIL_2]], %[[FOR_INNER_EPIL_2]] ]
 ; CHECK-NEXT:    [[ARRAYIDX6_EPIL_2:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD8_EPIL_1]]
-; CHECK-NEXT:    store i32 [[ADD_LCSSA_EPIL_2]], ptr [[ARRAYIDX6_EPIL_2]], align 4, !tbaa [[TBAA0]]
-; CHECK-NEXT:    br label [[FOR_END_LOOPEXIT_EPILOG_LCSSA]]
-; CHECK:       for.end.loopexit.epilog-lcssa:
-; CHECK-NEXT:    br label [[FOR_END_LOOPEXIT]]
-; CHECK:       for.end.loopexit:
-; CHECK-NEXT:    br label [[FOR_END]]
-; CHECK:       for.end:
+; CHECK-NEXT:    store i32 [[ADD_LCSSA_EPIL_2]], ptr [[ARRAYIDX6_EPIL_2]], align 4, !tbaa [[INT_TBAA0]]
+; CHECK-NEXT:    br label %[[FOR_END_LOOPEXIT_EPILOG_LCSSA]]
+; CHECK:       [[FOR_END_LOOPEXIT_EPILOG_LCSSA]]:
+; CHECK-NEXT:    br label %[[FOR_END_LOOPEXIT]]
+; CHECK:       [[FOR_END_LOOPEXIT]]:
+; CHECK-NEXT:    br label %[[FOR_END]]
+; CHECK:       [[FOR_END]]:
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -181,144 +182,145 @@ for.end:
 ; Tests for(i) { sum = A[i]; for(j) sum += B[j]; A[i] = sum; }
 ; A[i] load/store dependency should not block unroll-and-jam
 define void @test2(i32 %I, i32 %E, ptr noalias nocapture %A, ptr noalias nocapture readonly %B) #0 {
-; CHECK-LABEL: @test2(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[E:%.*]], 0
-; CHECK-NEXT:    [[CMP125:%.*]] = icmp ne i32 [[I:%.*]], 0
+; CHECK-LABEL: define void @test2(
+; CHECK-SAME: i32 [[I:%.*]], i32 [[E:%.*]], ptr noalias captures(none) [[A:%.*]], ptr noalias readonly captures(none) [[B:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[E]], 0
+; CHECK-NEXT:    [[CMP125:%.*]] = icmp ne i32 [[I]], 0
 ; CHECK-NEXT:    [[OR_COND:%.*]] = and i1 [[CMP]], [[CMP125]]
-; CHECK-NEXT:    br i1 [[OR_COND]], label [[FOR_OUTER_PREHEADER:%.*]], label [[FOR_END10:%.*]]
-; CHECK:       for.outer.preheader:
+; CHECK-NEXT:    br i1 [[OR_COND]], label %[[FOR_OUTER_PREHEADER:.*]], label %[[FOR_END10:.*]]
+; CHECK:       [[FOR_OUTER_PREHEADER]]:
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[I]], -1
 ; CHECK-NEXT:    [[XTRAITER:%.*]] = and i32 [[I]], 3
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[TMP0]], 3
-; CHECK-NEXT:    br i1 [[TMP1]], label [[FOR_END10_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_OUTER_PREHEADER_NEW:%.*]]
-; CHECK:       for.outer.preheader.new:
+; CHECK-NEXT:    br i1 [[TMP1]], label %[[FOR_END10_LOOPEXIT_UNR_LCSSA:.*]], label %[[FOR_OUTER_PREHEADER_NEW:.*]]
+; CHECK:       [[FOR_OUTER_PREHEADER_NEW]]:
 ; CHECK-NEXT:    [[UNROLL_ITER:%.*]] = sub i32 [[I]], [[XTRAITER]]
-; CHECK-NEXT:    br label [[FOR_OUTER:%.*]]
-; CHECK:       for.outer:
-; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[ADD9_3:%.*]], [[FOR_LATCH:%.*]] ], [ 0, [[FOR_OUTER_PREHEADER_NEW]] ]
-; CHECK-NEXT:    [[NITER:%.*]] = phi i32 [ 0, [[FOR_OUTER_PREHEADER_NEW]] ], [ [[NITER_NEXT_3:%.*]], [[FOR_LATCH]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[I]]
-; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    br label %[[FOR_OUTER:.*]]
+; CHECK:       [[FOR_OUTER]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[ADD9_3:%.*]], %[[FOR_LATCH:.*]] ], [ 0, %[[FOR_OUTER_PREHEADER_NEW]] ]
+; CHECK-NEXT:    [[NITER:%.*]] = phi i32 [ 0, %[[FOR_OUTER_PREHEADER_NEW]] ], [ [[NITER_NEXT_3:%.*]], %[[FOR_LATCH]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[I]]
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[ADD9:%.*]] = add nuw nsw i32 [[I]], 1
 ; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD9]]
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX_1]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX_1]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[ADD9_1:%.*]] = add nuw nsw i32 [[I]], 2
 ; CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD9_1]]
-; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX_2]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX_2]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[ADD9_2:%.*]] = add nuw nsw i32 [[I]], 3
 ; CHECK-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD9_2]]
-; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX_3]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX_3]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[ADD9_3]] = add nuw i32 [[I]], 4
 ; CHECK-NEXT:    [[NITER_NEXT_3]] = add i32 [[NITER]], 4
-; CHECK-NEXT:    br label [[FOR_INNER:%.*]]
-; CHECK:       for.inner:
-; CHECK-NEXT:    [[J:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[INC:%.*]], [[FOR_INNER]] ]
-; CHECK-NEXT:    [[SUM:%.*]] = phi i32 [ [[TMP2]], [[FOR_OUTER]] ], [ [[ADD:%.*]], [[FOR_INNER]] ]
-; CHECK-NEXT:    [[J_1:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[INC_1:%.*]], [[FOR_INNER]] ]
-; CHECK-NEXT:    [[SUM_1:%.*]] = phi i32 [ [[TMP3]], [[FOR_OUTER]] ], [ [[ADD_1:%.*]], [[FOR_INNER]] ]
-; CHECK-NEXT:    [[J_2:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[INC_2:%.*]], [[FOR_INNER]] ]
-; CHECK-NEXT:    [[SUM_2:%.*]] = phi i32 [ [[TMP4]], [[FOR_OUTER]] ], [ [[ADD_2:%.*]], [[FOR_INNER]] ]
-; CHECK-NEXT:    [[J_3:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[INC_3:%.*]], [[FOR_INNER]] ]
-; CHECK-NEXT:    [[SUM_3:%.*]] = phi i32 [ [[TMP5]], [[FOR_OUTER]] ], [ [[ADD_3:%.*]], [[FOR_INNER]] ]
-; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i32 [[J]]
-; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    br label %[[FOR_INNER:.*]]
+; CHECK:       [[FOR_INNER]]:
+; CHECK-NEXT:    [[J:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[INC:%.*]], %[[FOR_INNER]] ]
+; CHECK-NEXT:    [[SUM:%.*]] = phi i32 [ [[TMP2]], %[[FOR_OUTER]] ], [ [[ADD:%.*]], %[[FOR_INNER]] ]
+; CHECK-NEXT:    [[J_1:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[INC_1:%.*]], %[[FOR_INNER]] ]
+; CHECK-NEXT:    [[SUM_1:%.*]] = phi i32 [ [[TMP3]], %[[FOR_OUTER]] ], [ [[ADD_1:%.*]], %[[FOR_INNER]] ]
+; CHECK-NEXT:    [[J_2:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[INC_2:%.*]], %[[FOR_INNER]] ]
+; CHECK-NEXT:    [[SUM_2:%.*]] = phi i32 [ [[TMP4]], %[[FOR_OUTER]] ], [ [[ADD_2:%.*]], %[[FOR_INNER]] ]
+; CHECK-NEXT:    [[J_3:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[INC_3:%.*]], %[[FOR_INNER]] ]
+; CHECK-NEXT:    [[SUM_3:%.*]] = phi i32 [ [[TMP5]], %[[FOR_OUTER]] ], [ [[ADD_3:%.*]], %[[FOR_INNER]] ]
+; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J]]
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[ADD]] = add i32 [[TMP6]], [[SUM]]
 ; CHECK-NEXT:    [[INC]] = add nuw i32 [[J]], 1
 ; CHECK-NEXT:    [[ARRAYIDX6_1:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J_1]]
-; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX6_1]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX6_1]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[ADD_1]] = add i32 [[TMP7]], [[SUM_1]]
 ; CHECK-NEXT:    [[INC_1]] = add nuw i32 [[J_1]], 1
 ; CHECK-NEXT:    [[ARRAYIDX6_2:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J_2]]
-; CHECK-NEXT:    [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX6_2]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX6_2]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[ADD_2]] = add i32 [[TMP8]], [[SUM_2]]
 ; CHECK-NEXT:    [[INC_2]] = add nuw i32 [[J_2]], 1
 ; CHECK-NEXT:    [[ARRAYIDX6_3:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J_3]]
-; CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX6_3]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX6_3]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[ADD_3]] = add i32 [[TMP9]], [[SUM_3]]
 ; CHECK-NEXT:    [[INC_3]] = add nuw i32 [[J_3]], 1
 ; CHECK-NEXT:    [[EXITCOND_3:%.*]] = icmp eq i32 [[INC_3]], [[E]]
-; CHECK-NEXT:    br i1 [[EXITCOND_3]], label [[FOR_LATCH]], label [[FOR_INNER]]
-; CHECK:       for.latch:
-; CHECK-NEXT:    [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_INNER]] ]
-; CHECK-NEXT:    [[ADD_LCSSA_1:%.*]] = phi i32 [ [[ADD_1]], [[FOR_INNER]] ]
-; CHECK-NEXT:    [[ADD_LCSSA_2:%.*]] = phi i32 [ [[ADD_2]], [[FOR_INNER]] ]
-; CHECK-NEXT:    [[ADD_LCSSA_3:%.*]] = phi i32 [ [[ADD_3]], [[FOR_INNER]] ]
-; CHECK-NEXT:    store i32 [[ADD_LCSSA]], ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA0]]
-; CHECK-NEXT:    store i32 [[ADD_LCSSA_1]], ptr [[ARRAYIDX_1]], align 4, !tbaa [[TBAA0]]
-; CHECK-NEXT:    store i32 [[ADD_LCSSA_2]], ptr [[ARRAYIDX_2]], align 4, !tbaa [[TBAA0]]
-; CHECK-NEXT:    store i32 [[ADD_LCSSA_3]], ptr [[ARRAYIDX_3]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    br i1 [[EXITCOND_3]], label %[[FOR_LATCH]], label %[[FOR_INNER]]
+; CHECK:       [[FOR_LATCH]]:
+; CHECK-NEXT:    [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], %[[FOR_INNER]] ]
+; CHECK-NEXT:    [[ADD_LCSSA_1:%.*]] = phi i32 [ [[ADD_1]], %[[FOR_INNER]] ]
+; CHECK-NEXT:    [[ADD_LCSSA_2:%.*]] = phi i32 [ [[ADD_2]], %[[FOR_INNER]] ]
+; CHECK-NEXT:    [[ADD_LCSSA_3:%.*]] = phi i32 [ [[ADD_3]], %[[FOR_INNER]] ]
+; CHECK-NEXT:    store i32 [[ADD_LCSSA]], ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA0]]
+; CHECK-NEXT:    store i32 [[ADD_LCSSA_1]], ptr [[ARRAYIDX_1]], align 4, !tbaa [[INT_TBAA0]]
+; CHECK-NEXT:    store i32 [[ADD_LCSSA_2]], ptr [[ARRAYIDX_2]], align 4, !tbaa [[INT_TBAA0]]
+; CHECK-NEXT:    store i32 [[ADD_LCSSA_3]], ptr [[ARRAYIDX_3]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[NITER_NCMP_3:%.*]] = icmp eq i32 [[NITER_NEXT_3]], [[UNROLL_ITER]]
-; CHECK-NEXT:    br i1 [[NITER_NCMP_3]], label [[FOR_END10_LOOPEXIT_UNR_LCSSA_LOOPEXIT:%.*]], label [[FOR_OUTER]], !llvm.loop [[LOOP6:![0-9]+]]
-; CHECK:       for.end10.loopexit.unr-lcssa.loopexit:
-; CHECK-NEXT:    [[I_UNR_PH:%.*]] = phi i32 [ [[ADD9_3]], [[FOR_LATCH]] ]
-; CHECK-NEXT:    br label [[FOR_END10_LOOPEXIT_UNR_LCSSA]]
-; CHECK:       for.end10.loopexit.unr-lcssa:
-; CHECK-NEXT:    [[I_UNR:%.*]] = phi i32 [ 0, [[FOR_OUTER_PREHEADER]] ], [ [[I_UNR_PH]], [[FOR_END10_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ]
+; CHECK-NEXT:    br i1 [[NITER_NCMP_3]], label %[[FOR_END10_LOOPEXIT_UNR_LCSSA_LOOPEXIT:.*]], label %[[FOR_OUTER]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK:       [[FOR_END10_LOOPEXIT_UNR_LCSSA_LOOPEXIT]]:
+; CHECK-NEXT:    [[I_UNR_PH:%.*]] = phi i32 [ [[ADD9_3]], %[[FOR_LATCH]] ]
+; CHECK-NEXT:    br label %[[FOR_END10_LOOPEXIT_UNR_LCSSA]]
+; CHECK:       [[FOR_END10_LOOPEXIT_UNR_LCSSA]]:
+; CHECK-NEXT:    [[I_UNR:%.*]] = phi i32 [ 0, %[[FOR_OUTER_PREHEADER]] ], [ [[I_UNR_PH]], %[[FOR_END10_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ]
 ; CHECK-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i32 [[XTRAITER]], 0
-; CHECK-NEXT:    br i1 [[LCMP_MOD]], label [[FOR_OUTER_EPIL_PREHEADER:%.*]], label [[FOR_END10_LOOPEXIT:%.*]]
-; CHECK:       for.outer.epil.preheader:
-; CHECK-NEXT:    br label [[FOR_OUTER_EPIL:%.*]]
-; CHECK:       for.outer.epil:
+; CHECK-NEXT:    br i1 [[LCMP_MOD]], label %[[FOR_OUTER_EPIL_PREHEADER:.*]], label %[[FOR_END10_LOOPEXIT:.*]]
+; CHECK:       [[FOR_OUTER_EPIL_PREHEADER]]:
+; CHECK-NEXT:    br label %[[FOR_OUTER_EPIL:.*]]
+; CHECK:       [[FOR_OUTER_EPIL]]:
 ; CHECK-NEXT:    [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[I_UNR]]
-; CHECK-NEXT:    [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX_EPIL]], align 4, !tbaa [[TBAA0]]
-; CHECK-NEXT:    br label [[FOR_INNER_EPIL:%.*]]
-; CHECK:       for.inner.epil:
-; CHECK-NEXT:    [[J_EPIL:%.*]] = phi i32 [ 0, [[FOR_OUTER_EPIL]] ], [ [[INC_EPIL:%.*]], [[FOR_INNER_EPIL]] ]
-; CHECK-NEXT:    [[SUM_EPIL:%.*]] = phi i32 [ [[TMP10]], [[FOR_OUTER_EPIL]] ], [ [[ADD_EPIL:%.*]], [[FOR_INNER_EPIL]] ]
+; CHECK-NEXT:    [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX_EPIL]], align 4, !tbaa [[INT_TBAA0]]
+; CHECK-NEXT:    br label %[[FOR_INNER_EPIL:.*]]
+; CHECK:       [[FOR_INNER_EPIL]]:
+; CHECK-NEXT:    [[J_EPIL:%.*]] = phi i32 [ 0, %[[FOR_OUTER_EPIL]] ], [ [[INC_EPIL:%.*]], %[[FOR_INNER_EPIL]] ]
+; CHECK-NEXT:    [[SUM_EPIL:%.*]] = phi i32 [ [[TMP10]], %[[FOR_OUTER_EPIL]] ], [ [[ADD_EPIL:%.*]], %[[FOR_INNER_EPIL]] ]
 ; CHECK-NEXT:    [[ARRAYIDX6_EPIL:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J_EPIL]]
-; CHECK-NEXT:    [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX6_EPIL]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX6_EPIL]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[ADD_EPIL]] = add i32 [[TMP11]], [[SUM_EPIL]]
 ; CHECK-NEXT:    [[INC_EPIL]] = add nuw i32 [[J_EPIL]], 1
 ; CHECK-NEXT:    [[EXITCOND_EPIL:%.*]] = icmp eq i32 [[INC_EPIL]], [[E]]
-; CHECK-NEXT:    br i1 [[EXITCOND_EPIL]], label [[FOR_LATCH_EPIL:%.*]], label [[FOR_INNER_EPIL]]
-; CHECK:       for.latch.epil:
-; CHECK-NEXT:    [[ADD_LCSSA_EPIL:%.*]] = phi i32 [ [[ADD_EPIL]], [[FOR_INNER_EPIL]] ]
-; CHECK-NEXT:    store i32 [[ADD_LCSSA_EPIL]], ptr [[ARRAYIDX_EPIL]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    br i1 [[EXITCOND_EPIL]], label %[[FOR_LATCH_EPIL:.*]], label %[[FOR_INNER_EPIL]]
+; CHECK:       [[FOR_LATCH_EPIL]]:
+; CHECK-NEXT:    [[ADD_LCSSA_EPIL:%.*]] = phi i32 [ [[ADD_EPIL]], %[[FOR_INNER_EPIL]] ]
+; CHECK-NEXT:    store i32 [[ADD_LCSSA_EPIL]], ptr [[ARRAYIDX_EPIL]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[ADD9_EPIL:%.*]] = add nuw i32 [[I_UNR]], 1
 ; CHECK-NEXT:    [[EPIL_ITER_CMP:%.*]] = icmp ne i32 1, [[XTRAITER]]
-; CHECK-NEXT:    br i1 [[EPIL_ITER_CMP]], label [[FOR_OUTER_EPIL_1:%.*]], label [[FOR_END10_LOOPEXIT_EPILOG_LCSSA:%.*]]
-; CHECK:       for.outer.epil.1:
+; CHECK-NEXT:    br i1 [[EPIL_ITER_CMP]], label %[[FOR_OUTER_EPIL_1:.*]], label %[[FOR_END10_LOOPEXIT_EPILOG_LCSSA:.*]]
+; CHECK:       [[FOR_OUTER_EPIL_1]]:
 ; CHECK-NEXT:    [[ARRAYIDX_EPIL_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD9_EPIL]]
-; CHECK-NEXT:    [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX_EPIL_1]], align 4, !tbaa [[TBAA0]]
-; CHECK-NEXT:    br label [[FOR_INNER_EPIL_1:%.*]]
-; CHECK:       for.inner.epil.1:
-; CHECK-NEXT:    [[J_EPIL_1:%.*]] = phi i32 [ 0, [[FOR_OUTER_EPIL_1]] ], [ [[INC_EPIL_1:%.*]], [[FOR_INNER_EPIL_1]] ]
-; CHECK-NEXT:    [[SUM_EPIL_1:%.*]] = phi i32 [ [[TMP12]], [[FOR_OUTER_EPIL_1]] ], [ [[ADD_EPIL_1:%.*]], [[FOR_INNER_EPIL_1]] ]
+; CHECK-NEXT:    [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX_EPIL_1]], align 4, !tbaa [[INT_TBAA0]]
+; CHECK-NEXT:    br label %[[FOR_INNER_EPIL_1:.*]]
+; CHECK:       [[FOR_INNER_EPIL_1]]:
+; CHECK-NEXT:    [[J_EPIL_1:%.*]] = phi i32 [ 0, %[[FOR_OUTER_EPIL_1]] ], [ [[INC_EPIL_1:%.*]], %[[FOR_INNER_EPIL_1]] ]
+; CHECK-NEXT:    [[SUM_EPIL_1:%.*]] = phi i32 [ [[TMP12]], %[[FOR_OUTER_EPIL_1]] ], [ [[ADD_EPIL_1:%.*]], %[[FOR_INNER_EPIL_1]] ]
 ; CHECK-NEXT:    [[ARRAYIDX6_EPIL_1:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J_EPIL_1]]
-; CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX6_EPIL_1]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX6_EPIL_1]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[ADD_EPIL_1]] = add i32 [[TMP13]], [[SUM_EPIL_1]]
 ; CHECK-NEXT:    [[INC_EPIL_1]] = add nuw i32 [[J_EPIL_1]], 1
 ; CHECK-NEXT:    [[EXITCOND_EPIL_1:%.*]] = icmp eq i32 [[INC_EPIL_1]], [[E]]
-; CHECK-NEXT:    br i1 [[EXITCOND_EPIL_1]], label [[FOR_LATCH_EPIL_1:%.*]], label [[FOR_INNER_EPIL_1]]
-; CHECK:       for.latch.epil.1:
-; CHECK-NEXT:    [[ADD_LCSSA_EPIL_1:%.*]] = phi i32 [ [[ADD_EPIL_1]], [[FOR_INNER_EPIL_1]] ]
-; CHECK-NEXT:    store i32 [[ADD_LCSSA_EPIL_1]], ptr [[ARRAYIDX_EPIL_1]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    br i1 [[EXITCOND_EPIL_1]], label %[[FOR_LATCH_EPIL_1:.*]], label %[[FOR_INNER_EPIL_1]]
+; CHECK:       [[FOR_LATCH_EPIL_1]]:
+; CHECK-NEXT:    [[ADD_LCSSA_EPIL_1:%.*]] = phi i32 [ [[ADD_EPIL_1]], %[[FOR_INNER_EPIL_1]] ]
+; CHECK-NEXT:    store i32 [[ADD_LCSSA_EPIL_1]], ptr [[ARRAYIDX_EPIL_1]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[ADD9_EPIL_1:%.*]] = add nuw i32 [[I_UNR]], 2
 ; CHECK-NEXT:    [[EPIL_ITER_CMP_1:%.*]] = icmp ne i32 2, [[XTRAITER]]
-; CHECK-NEXT:    br i1 [[EPIL_ITER_CMP_1]], label [[FOR_OUTER_EPIL_2:%.*]], label [[FOR_END10_LOOPEXIT_EPILOG_LCSSA]]
-; CHECK:       for.outer.epil.2:
+; CHECK-NEXT:    br i1 [[EPIL_ITER_CMP_1]], label %[[FOR_OUTER_EPIL_2:.*]], label %[[FOR_END10_LOOPEXIT_EPILOG_LCSSA]]
+; CHECK:       [[FOR_OUTER_EPIL_2]]:
 ; CHECK-NEXT:    [[ARRAYIDX_EPIL_2:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD9_EPIL_1]]
-; CHECK-NEXT:    [[TMP14:%.*]] = load i32, ptr [[ARRAYIDX_EPIL_2]], align 4, !tbaa [[TBAA0]]
-; CHECK-NEXT:    br label [[FOR_INNER_EPIL_2:%.*]]
-; CHECK:       for.inner.epil.2:
-; CHECK-NEXT:    [[J_EPIL_2:%.*]] = phi i32 [ 0, [[FOR_OUTER_EPIL_2]] ], [ [[INC_EPIL_2:%.*]], [[FOR_INNER_EPIL_2]] ]
-; CHECK-NEXT:    [[SUM_EPIL_2:%.*]] = phi i32 [ [[TMP14]], [[FOR_OUTER_EPIL_2]] ], [ [[ADD_EPIL_2:%.*]], [[FOR_INNER_EPIL_2]] ]
+; CHECK-NEXT:    [[TMP14:%.*]] = load i32, ptr [[ARRAYIDX_EPIL_2]], align 4, !tbaa [[INT_TBAA0]]
+; CHECK-NEXT:    br label %[[FOR_INNER_EPIL_2:.*]]
+; CHECK:       [[FOR_INNER_EPIL_2]]:
+; CHECK-NEXT:    [[J_EPIL_2:%.*]] = phi i32 [ 0, %[[FOR_OUTER_EPIL_2]] ], [ [[INC_EPIL_2:%.*]], %[[FOR_INNER_EPIL_2]] ]
+; CHECK-NEXT:    [[SUM_EPIL_2:%.*]] = phi i32 [ [[TMP14]], %[[FOR_OUTER_EPIL_2]] ], [ [[ADD_EPIL_2:%.*]], %[[FOR_INNER_EPIL_2]] ]
 ; CHECK-NEXT:    [[ARRAYIDX6_EPIL_2:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J_EPIL_2]]
-; CHECK-NEXT:    [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX6_EPIL_2]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX6_EPIL_2]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[ADD_EPIL_2]] = add i32 [[TMP15]], [[SUM_EPIL_2]]
 ; CHECK-NEXT:    [[INC_EPIL_2]] = add nuw i32 [[J_EPIL_2]], 1
 ; CHECK-NEXT:    [[EXITCOND_EPIL_2:%.*]] = icmp eq i32 [[INC_EPIL_2]], [[E]]
-; CHECK-NEXT:    br i1 [[EXITCOND_EPIL_2]], label [[FOR_LATCH_EPIL_2:%.*]], label [[FOR_INNER_EPIL_2]]
-; CHECK:       for.latch.epil.2:
-; CHECK-NEXT:    [[ADD_LCSSA_EPIL_2:%.*]] = phi i32 [ [[ADD_EPIL_2]], [[FOR_INNER_EPIL_2]] ]
-; CHECK-NEXT:    store i32 [[ADD_LCSSA_EPIL_2]], ptr [[ARRAYIDX_EPIL_2]], align 4, !tbaa [[TBAA0]]
-; CHECK-NEXT:    br label [[FOR_END10_LOOPEXIT_EPILOG_LCSSA]]
-; CHECK:       for.end10.loopexit.epilog-lcssa:
-; CHECK-NEXT:    br label [[FOR_END10_LOOPEXIT]]
-; CHECK:       for.end10.loopexit:
-; CHECK-NEXT:    br label [[FOR_END10]]
-; CHECK:       for.end10:
+; CHECK-NEXT:    br i1 [[EXITCOND_EPIL_2]], label %[[FOR_LATCH_EPIL_2:.*]], label %[[FOR_INNER_EPIL_2]]
+; CHECK:       [[FOR_LATCH_EPIL_2]]:
+; CHECK-NEXT:    [[ADD_LCSSA_EPIL_2:%.*]] = phi i32 [ [[ADD_EPIL_2]], %[[FOR_INNER_EPIL_2]] ]
+; CHECK-NEXT:    store i32 [[ADD_LCSSA_EPIL_2]], ptr [[ARRAYIDX_EPIL_2]], align 4, !tbaa [[INT_TBAA0]]
+; CHECK-NEXT:    br label %[[FOR_END10_LOOPEXIT_EPILOG_LCSSA]]
+; CHECK:       [[FOR_END10_LOOPEXIT_EPILOG_LCSSA]]:
+; CHECK-NEXT:    br label %[[FOR_END10_LOOPEXIT]]
+; CHECK:       [[FOR_END10_LOOPEXIT]]:
+; CHECK-NEXT:    br label %[[FOR_END10]]
+; CHECK:       [[FOR_END10]]:
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -363,61 +365,62 @@ for.end10:
 
 ; Tests Complete unroll-and-jam of the outer loop
 define void @test3(i32 %I, i32 %E, ptr noalias nocapture %A, ptr noalias nocapture readonly %B) #0 {
-; CHECK-LABEL: @test3(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[E:%.*]], 0
-; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_END:%.*]], label [[FOR_PREHEADER:%.*]]
-; CHECK:       for.preheader:
-; CHECK-NEXT:    br label [[FOR_OUTER:%.*]]
-; CHECK:       for.outer:
-; CHECK-NEXT:    br label [[FOR_INNER:%.*]]
-; CHECK:       for.inner:
-; CHECK-NEXT:    [[J:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[INC:%.*]], [[FOR_INNER]] ]
-; CHECK-NEXT:    [[SUM:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[ADD:%.*]], [[FOR_INNER]] ]
-; CHECK-NEXT:    [[J_1:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[INC_1:%.*]], [[FOR_INNER]] ]
-; CHECK-NEXT:    [[SUM_1:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[ADD_1:%.*]], [[FOR_INNER]] ]
-; CHECK-NEXT:    [[J_2:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[INC_2:%.*]], [[FOR_INNER]] ]
-; CHECK-NEXT:    [[SUM_2:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[ADD_2:%.*]], [[FOR_INNER]] ]
-; CHECK-NEXT:    [[J_3:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[INC_3:%.*]], [[FOR_INNER]] ]
-; CHECK-NEXT:    [[SUM_3:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[ADD_3:%.*]], [[FOR_INNER]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i32 [[J]]
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA0]]
+; CHECK-LABEL: define void @test3(
+; CHECK-SAME: i32 [[I:%.*]], i32 [[E:%.*]], ptr noalias captures(none) [[A:%.*]], ptr noalias readonly captures(none) [[B:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[E]], 0
+; CHECK-NEXT:    br i1 [[CMP]], label %[[FOR_END:.*]], label %[[FOR_PREHEADER:.*]]
+; CHECK:       [[FOR_PREHEADER]]:
+; CHECK-NEXT:    br label %[[FOR_OUTER:.*]]
+; CHECK:       [[FOR_OUTER]]:
+; CHECK-NEXT:    br label %[[FOR_INNER:.*]]
+; CHECK:       [[FOR_INNER]]:
+; CHECK-NEXT:    [[J:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[INC:%.*]], %[[FOR_INNER]] ]
+; CHECK-NEXT:    [[SUM:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[ADD:%.*]], %[[FOR_INNER]] ]
+; CHECK-NEXT:    [[J_1:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[INC_1:%.*]], %[[FOR_INNER]] ]
+; CHECK-NEXT:    [[SUM_1:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[ADD_1:%.*]], %[[FOR_INNER]] ]
+; CHECK-NEXT:    [[J_2:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[INC_2:%.*]], %[[FOR_INNER]] ]
+; CHECK-NEXT:    [[SUM_2:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[ADD_2:%.*]], %[[FOR_INNER]] ]
+; CHECK-NEXT:    [[J_3:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[INC_3:%.*]], %[[FOR_INNER]] ]
+; CHECK-NEXT:    [[SUM_3:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[ADD_3:%.*]], %[[FOR_INNER]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[SUB:%.*]] = add i32 [[SUM]], 10
 ; CHECK-NEXT:    [[ADD]] = sub i32 [[SUB]], [[TMP0]]
 ; CHECK-NEXT:    [[INC]] = add nuw i32 [[J]], 1
 ; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J_1]]
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX_1]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX_1]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[SUB_1:%.*]] = add i32 [[SUM_1]], 10
 ; CHECK-NEXT:    [[ADD_1]] = sub i32 [[SUB_1]], [[TMP1]]
 ; CHECK-NEXT:    [[INC_1]] = add nuw i32 [[J_1]], 1
 ; CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J_2]]
-; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX_2]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX_2]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[SUB_2:%.*]] = add i32 [[SUM_2]], 10
 ; CHECK-NEXT:    [[ADD_2]] = sub i32 [[SUB_2]], [[TMP2]]
 ; CHECK-NEXT:    [[INC_2]] = add nuw i32 [[J_2]], 1
 ; CHECK-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J_3]]
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX_3]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX_3]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[SUB_3:%.*]] = add i32 [[SUM_3]], 10
 ; CHECK-NEXT:    [[ADD_3]] = sub i32 [[SUB_3]], [[TMP3]]
 ; CHECK-NEXT:    [[INC_3]] = add nuw i32 [[J_3]], 1
 ; CHECK-NEXT:    [[EXITCOND_3:%.*]] = icmp eq i32 [[INC_3]], [[E]]
-; CHECK-NEXT:    br i1 [[EXITCOND_3]], label [[FOR_LATCH:%.*]], label [[FOR_INNER]]
-; CHECK:       for.latch:
-; CHECK-NEXT:    [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_INNER]] ]
-; CHECK-NEXT:    [[ADD_LCSSA_1:%.*]] = phi i32 [ [[ADD_1]], [[FOR_INNER]] ]
-; CHECK-NEXT:    [[ADD_LCSSA_2:%.*]] = phi i32 [ [[ADD_2]], [[FOR_INNER]] ]
-; CHECK-NEXT:    [[ADD_LCSSA_3:%.*]] = phi i32 [ [[ADD_3]], [[FOR_INNER]] ]
-; CHECK-NEXT:    store i32 [[ADD_LCSSA]], ptr [[A:%.*]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    br i1 [[EXITCOND_3]], label %[[FOR_LATCH:.*]], label %[[FOR_INNER]]
+; CHECK:       [[FOR_LATCH]]:
+; CHECK-NEXT:    [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], %[[FOR_INNER]] ]
+; CHECK-NEXT:    [[ADD_LCSSA_1:%.*]] = phi i32 [ [[ADD_1]], %[[FOR_INNER]] ]
+; CHECK-NEXT:    [[ADD_LCSSA_2:%.*]] = phi i32 [ [[ADD_2]], %[[FOR_INNER]] ]
+; CHECK-NEXT:    [[ADD_LCSSA_3:%.*]] = phi i32 [ [[ADD_3]], %[[FOR_INNER]] ]
+; CHECK-NEXT:    store i32 [[ADD_LCSSA]], ptr [[A]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[ARRAYIDX6_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 1
-; CHECK-NEXT:    store i32 [[ADD_LCSSA_1]], ptr [[ARRAYIDX6_1]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    store i32 [[ADD_LCSSA_1]], ptr [[ARRAYIDX6_1]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[ARRAYIDX6_2:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 2
-; CHECK-NEXT:    store i32 [[ADD_LCSSA_2]], ptr [[ARRAYIDX6_2]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    store i32 [[ADD_LCSSA_2]], ptr [[ARRAYIDX6_2]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[ARRAYIDX6_3:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 3
-; CHECK-NEXT:    store i32 [[ADD_LCSSA_3]], ptr [[ARRAYIDX6_3]], align 4, !tbaa [[TBAA0]]
-; CHECK-NEXT:    br label [[FOR_END_LOOPEXIT:%.*]]
-; CHECK:       for.end.loopexit:
-; CHECK-NEXT:    br label [[FOR_END]]
-; CHECK:       for.end:
+; CHECK-NEXT:    store i32 [[ADD_LCSSA_3]], ptr [[ARRAYIDX6_3]], align 4, !tbaa [[INT_TBAA0]]
+; CHECK-NEXT:    br label %[[FOR_END_LOOPEXIT:.*]]
+; CHECK:       [[FOR_END_LOOPEXIT]]:
+; CHECK-NEXT:    br label %[[FOR_END]]
+; CHECK:       [[FOR_END]]:
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -456,31 +459,32 @@ for.end:
 
 ; Tests Complete unroll-and-jam with a trip count of 1
 define void @test4(i32 %I, i32 %E, ptr noalias nocapture %A, ptr noalias nocapture readonly %B) #0 {
-; CHECK-LABEL: @test4(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[E:%.*]], 0
-; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_END:%.*]], label [[FOR_PREHEADER:%.*]]
-; CHECK:       for.preheader:
-; CHECK-NEXT:    br label [[FOR_OUTER:%.*]]
-; CHECK:       for.outer:
-; CHECK-NEXT:    br label [[FOR_INNER:%.*]]
-; CHECK:       for.inner:
-; CHECK-NEXT:    [[J:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[INC:%.*]], [[FOR_INNER]] ]
-; CHECK-NEXT:    [[SUM:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[ADD:%.*]], [[FOR_INNER]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i32 [[J]]
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA0]]
+; CHECK-LABEL: define void @test4(
+; CHECK-SAME: i32 [[I:%.*]], i32 [[E:%.*]], ptr noalias captures(none) [[A:%.*]], ptr noalias readonly captures(none) [[B:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[E]], 0
+; CHECK-NEXT:    br i1 [[CMP]], label %[[FOR_END:.*]], label %[[FOR_PREHEADER:.*]]
+; CHECK:       [[FOR_PREHEADER]]:
+; CHECK-NEXT:    br label %[[FOR_OUTER:.*]]
+; CHECK:       [[FOR_OUTER]]:
+; CHECK-NEXT:    br label %[[FOR_INNER:.*]]
+; CHECK:       [[FOR_INNER]]:
+; CHECK-NEXT:    [[J:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[INC:%.*]], %[[FOR_INNER]] ]
+; CHECK-NEXT:    [[SUM:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[ADD:%.*]], %[[FOR_INNER]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[SUB:%.*]] = add i32 [[SUM]], 10
 ; CHECK-NEXT:    [[ADD]] = sub i32 [[SUB]], [[TMP0]]
 ; CHECK-NEXT:    [[INC]] = add nuw i32 [[J]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[E]]
-; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_LATCH:%.*]], label [[FOR_INNER]]
-; CHECK:       for.latch:
-; CHECK-NEXT:    [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_INNER]] ]
-; CHECK-NEXT:    store i32 [[ADD_LCSSA]], ptr [[A:%.*]], align 4, !tbaa [[TBAA0]]
-; CHECK-NEXT:    br label [[FOR_END_LOOPEXIT:%.*]]
-; CHECK:       for.end.loopexit:
-; CHECK-NEXT:    br label [[FOR_END]]
-; CHECK:       for.end:
+; CHECK-NEXT:    br i1 [[EXITCOND]], label %[[FOR_LATCH:.*]], label %[[FOR_INNER]]
+; CHECK:       [[FOR_LATCH]]:
+; CHECK-NEXT:    [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], %[[FOR_INNER]] ]
+; CHECK-NEXT:    store i32 [[ADD_LCSSA]], ptr [[A]], align 4, !tbaa [[INT_TBAA0]]
+; CHECK-NEXT:    br label %[[FOR_END_LOOPEXIT:.*]]
+; CHECK:       [[FOR_END_LOOPEXIT]]:
+; CHECK-NEXT:    br label %[[FOR_END]]
+; CHECK:       [[FOR_END]]:
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -520,47 +524,47 @@ for.end:
 ; Multiple SubLoopBlocks
 @a = hidden global [1 x i32] zeroinitializer, align 4
 define i32 @test5() #0 {
-; CHECK-LABEL: @test5(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br label [[FOR_OUTER:%.*]]
-; CHECK:       for.outer:
-; CHECK-NEXT:    br label [[FOR_INNER:%.*]]
-; CHECK:       for.inner:
-; CHECK-NEXT:    [[INC8_SINK15:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[INC8:%.*]], [[FOR_INC_1:%.*]] ]
-; CHECK-NEXT:    [[INC8_SINK15_1:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[INC8_1:%.*]], [[FOR_INC_1]] ]
-; CHECK-NEXT:    br label [[FOR_INNER2:%.*]]
-; CHECK:       for.inner2:
+; CHECK-LABEL: define i32 @test5() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br label %[[FOR_OUTER:.*]]
+; CHECK:       [[FOR_OUTER]]:
+; CHECK-NEXT:    br label %[[FOR_INNER:.*]]
+; CHECK:       [[FOR_INNER]]:
+; CHECK-NEXT:    [[INC8_SINK15:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[INC8:%.*]], %[[FOR_INC_1:.*]] ]
+; CHECK-NEXT:    [[INC8_SINK15_1:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[INC8_1:%.*]], %[[FOR_INC_1]] ]
+; CHECK-NEXT:    br label %[[FOR_INNER2:.*]]
+; CHECK:       [[FOR_INNER2]]:
 ; CHECK-NEXT:    [[L1:%.*]] = load i32, ptr @a, align 4
 ; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[L1]], 0
-; CHECK-NEXT:    br i1 [[TOBOOL]], label [[FOR_COND4:%.*]], label [[FOR_INC:%.*]]
-; CHECK:       for.cond4:
+; CHECK-NEXT:    br i1 [[TOBOOL]], label %[[FOR_COND4:.*]], label %[[FOR_INC:.*]]
+; CHECK:       [[FOR_COND4]]:
 ; CHECK-NEXT:    [[L0:%.*]] = load i32, ptr getelementptr inbounds ([1 x i32], ptr @a, i32 1, i32 0), align 4
 ; CHECK-NEXT:    [[TOBOOL_1:%.*]] = icmp eq i32 [[L0]], 0
-; CHECK-NEXT:    br i1 [[TOBOOL_1]], label [[FOR_COND4A:%.*]], label [[FOR_INC]]
-; CHECK:       for.cond4a:
-; CHECK-NEXT:    br label [[FOR_INC]]
-; CHECK:       for.inc:
+; CHECK-NEXT:    br i1 [[TOBOOL_1]], label %[[FOR_COND4A:.*]], label %[[FOR_INC]]
+; CHECK:       [[FOR_COND4A]]:
+; CHECK-NEXT:    br label %[[FOR_INC]]
+; CHECK:       [[FOR_INC]]:
 ; CHECK-NEXT:    [[INC8]] = add nuw nsw i32 [[INC8_SINK15]], 1
 ; CHECK-NEXT:    [[L1_1:%.*]] = load i32, ptr @a, align 4
 ; CHECK-NEXT:    [[TOBOOL_11:%.*]] = icmp eq i32 [[L1_1]], 0
-; CHECK-NEXT:    br i1 [[TOBOOL_11]], label [[FOR_COND4_1:%.*]], label [[FOR_INC_1]]
-; CHECK:       for.latch:
-; CHECK-NEXT:    [[DOTLCSSA_1:%.*]] = phi i32 [ [[L2_1:%.*]], [[FOR_INC_1]] ]
-; CHECK-NEXT:    br label [[FOR_END:%.*]]
-; CHECK:       for.end:
-; CHECK-NEXT:    [[DOTLCSSA_LCSSA:%.*]] = phi i32 [ [[DOTLCSSA_1]], [[FOR_LATCH:%.*]] ]
+; CHECK-NEXT:    br i1 [[TOBOOL_11]], label %[[FOR_COND4_1:.*]], label %[[FOR_INC_1]]
+; CHECK:       [[FOR_LATCH:.*]]:
+; CHECK-NEXT:    [[DOTLCSSA_1:%.*]] = phi i32 [ [[L2_1:%.*]], %[[FOR_INC_1]] ]
+; CHECK-NEXT:    br label %[[FOR_END:.*]]
+; CHECK:       [[FOR_END]]:
+; CHECK-NEXT:    [[DOTLCSSA_LCSSA:%.*]] = phi i32 [ [[DOTLCSSA_1]], %[[FOR_LATCH]] ]
 ; CHECK-NEXT:    ret i32 0
-; CHECK:       for.cond4.1:
+; CHECK:       [[FOR_COND4_1]]:
 ; CHECK-NEXT:    [[L0_1:%.*]] = load i32, ptr getelementptr inbounds ([1 x i32], ptr @a, i32 1, i32 0), align 4
 ; CHECK-NEXT:    [[TOBOOL_1_1:%.*]] = icmp eq i32 [[L0_1]], 0
-; CHECK-NEXT:    br i1 [[TOBOOL_1_1]], label [[FOR_COND4A_1:%.*]], label [[FOR_INC_1]]
-; CHECK:       for.cond4a.1:
-; CHECK-NEXT:    br label [[FOR_INC_1]]
-; CHECK:       for.inc.1:
-; CHECK-NEXT:    [[L2_1]] = phi i32 [ 0, [[FOR_INC]] ], [ 1, [[FOR_COND4_1]] ], [ 2, [[FOR_COND4A_1]] ]
+; CHECK-NEXT:    br i1 [[TOBOOL_1_1]], label %[[FOR_COND4A_1:.*]], label %[[FOR_INC_1]]
+; CHECK:       [[FOR_COND4A_1]]:
+; CHECK-NEXT:    br label %[[FOR_INC_1]]
+; CHECK:       [[FOR_INC_1]]:
+; CHECK-NEXT:    [[L2_1]] = phi i32 [ 0, %[[FOR_INC]] ], [ 1, %[[FOR_COND4_1]] ], [ 2, %[[FOR_COND4A_1]] ]
 ; CHECK-NEXT:    [[INC8_1]] = add nuw nsw i32 [[INC8_SINK15_1]], 1
 ; CHECK-NEXT:    [[EXITCOND_1:%.*]] = icmp eq i32 [[INC8_1]], 3
-; CHECK-NEXT:    br i1 [[EXITCOND_1]], label [[FOR_LATCH]], label [[FOR_INNER]]
+; CHECK-NEXT:    br i1 [[EXITCOND_1]], label %[[FOR_LATCH]], label %[[FOR_INNER]]
 ;
 entry:
   br label %for.outer
@@ -608,57 +612,57 @@ for.end:
 ; Test odd uses of phi nodes
 @f = hidden global i32 0, align 4
 define i32 @test6() #0 {
-; CHECK-LABEL: @test6(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[F_PROMOTED10:%.*]] = load i32, ptr @f, align 4, !tbaa [[TBAA0]]
-; CHECK-NEXT:    br i1 false, label [[FOR_END_UNR_LCSSA:%.*]], label [[ENTRY_NEW:%.*]]
-; CHECK:       entry.new:
-; CHECK-NEXT:    br label [[FOR_OUTER:%.*]]
-; CHECK:       for.outer:
-; CHECK-NEXT:    [[INC5_SINK9:%.*]] = phi i32 [ 2, [[ENTRY_NEW]] ], [ [[INC5_3:%.*]], [[FOR_LATCH:%.*]] ]
-; CHECK-NEXT:    [[NITER:%.*]] = phi i32 [ 0, [[ENTRY_NEW]] ], [ [[NITER_NEXT_3:%.*]], [[FOR_LATCH]] ]
+; CHECK-LABEL: define i32 @test6() {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[F_PROMOTED10:%.*]] = load i32, ptr @f, align 4, !tbaa [[INT_TBAA0]]
+; CHECK-NEXT:    br i1 false, label %[[FOR_END_UNR_LCSSA:.*]], label %[[ENTRY_NEW:.*]]
+; CHECK:       [[ENTRY_NEW]]:
+; CHECK-NEXT:    br label %[[FOR_OUTER:.*]]
+; CHECK:       [[FOR_OUTER]]:
+; CHECK-NEXT:    [[INC5_SINK9:%.*]] = phi i32 [ 2, %[[ENTRY_NEW]] ], [ [[INC5_3:%.*]], %[[FOR_LATCH:.*]] ]
+; CHECK-NEXT:    [[NITER:%.*]] = phi i32 [ 0, %[[ENTRY_NEW]] ], [ [[NITER_NEXT_3:%.*]], %[[FOR_LATCH]] ]
 ; CHECK-NEXT:    [[INC5_3]] = add nuw nsw i32 [[INC5_SINK9]], 4
 ; CHECK-NEXT:    [[NITER_NEXT_3]] = add nuw nsw i32 [[NITER]], 4
-; CHECK-NEXT:    br label [[FOR_INNER:%.*]]
-; CHECK:       for.inner:
-; CHECK-NEXT:    [[INC_SINK8:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[INC:%.*]], [[FOR_INNER]] ]
-; CHECK-NEXT:    [[INC_SINK8_1:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[INC_1:%.*]], [[FOR_INNER]] ]
-; CHECK-NEXT:    [[INC_SINK8_2:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[INC_2:%.*]], [[FOR_INNER]] ]
-; CHECK-NEXT:    [[INC_SINK8_3:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[INC_3:%.*]], [[FOR_INNER]] ]
+; CHECK-NEXT:    br label %[[FOR_INNER:.*]]
+; CHECK:       [[FOR_INNER]]:
+; CHECK-NEXT:    [[INC_SINK8:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[INC:%.*]], %[[FOR_INNER]] ]
+; CHECK-NEXT:    [[INC_SINK8_1:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[INC_1:%.*]], %[[FOR_INNER]] ]
+; CHECK-NEXT:    [[INC_SINK8_2:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[INC_2:%.*]], %[[FOR_INNER]] ]
+; CHECK-NEXT:    [[INC_SINK8_3:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[INC_3:%.*]], %[[FOR_INNER]] ]
 ; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[INC_SINK8]], 1
 ; CHECK-NEXT:    [[INC_1]] = add nuw nsw i32 [[INC_SINK8_1]], 1
 ; CHECK-NEXT:    [[INC_2]] = add nuw nsw i32 [[INC_SINK8_2]], 1
 ; CHECK-NEXT:    [[INC_3]] = add nuw nsw i32 [[INC_SINK8_3]], 1
 ; CHECK-NEXT:    [[EXITCOND_3:%.*]] = icmp ne i32 [[INC_3]], 7
-; CHECK-NEXT:    br i1 [[EXITCOND_3]], label [[FOR_INNER]], label [[FOR_LATCH]]
-; CHECK:       for.latch:
-; CHECK-NEXT:    br i1 false, label [[FOR_OUTER]], label [[FOR_END_UNR_LCSSA_LOOPEXIT:%.*]], !llvm.loop [[LOOP7:![0-9]+]]
-; CHECK:       for.end.unr-lcssa.loopexit:
-; CHECK-NEXT:    [[DOTLCSSA_LCSSA_PH_PH:%.*]] = phi i32 [ 2, [[FOR_LATCH]] ]
-; CHECK-NEXT:    [[INC_LCSSA_LCSSA_PH_PH:%.*]] = phi i32 [ 7, [[FOR_LATCH]] ]
-; CHECK-NEXT:    [[P0_UNR_PH:%.*]] = phi i32 [ 2, [[FOR_LATCH]] ]
-; CHECK-NEXT:    br label [[FOR_END_UNR_LCSSA]]
-; CHECK:       for.end.unr-lcssa:
-; CHECK-NEXT:    [[DOTLCSSA_LCSSA_PH:%.*]] = phi i32 [ poison, [[ENTRY:%.*]] ], [ [[DOTLCSSA_LCSSA_PH_PH]], [[FOR_END_UNR_LCSSA_LOOPEXIT]] ]
-; CHECK-NEXT:    [[INC_LCSSA_LCSSA_PH:%.*]] = phi i32 [ poison, [[ENTRY]] ], [ [[INC_LCSSA_LCSSA_PH_PH]], [[FOR_END_UNR_LCSSA_LOOPEXIT]] ]
-; CHECK-NEXT:    [[P0_UNR:%.*]] = phi i32 [ [[F_PROMOTED10]], [[ENTRY]] ], [ [[P0_UNR_PH]], [[FOR_END_UNR_LCSSA_LOOPEXIT]] ]
-; CHECK-NEXT:    br i1 true, label [[FOR_OUTER_EPIL_PREHEADER:%.*]], label [[FOR_END:%.*]]
-; CHECK:       for.outer.epil.preheader:
-; CHECK-NEXT:    br label [[FOR_OUTER_EPIL:%.*]]
-; CHECK:       for.outer.epil:
-; CHECK-NEXT:    br label [[FOR_INNER_EPIL:%.*]]
-; CHECK:       for.inner.epil:
-; CHECK-NEXT:    [[P1_EPIL:%.*]] = phi i32 [ [[P0_UNR]], [[FOR_OUTER_EPIL]] ], [ 2, [[FOR_INNER_EPIL]] ]
-; CHECK-NEXT:    [[INC_SINK8_EPIL:%.*]] = phi i32 [ 0, [[FOR_OUTER_EPIL]] ], [ [[INC_EPIL:%.*]], [[FOR_INNER_EPIL]] ]
+; CHECK-NEXT:    br i1 [[EXITCOND_3]], label %[[FOR_INNER]], label %[[FOR_LATCH]]
+; CHECK:       [[FOR_LATCH]]:
+; CHECK-NEXT:    br i1 false, label %[[FOR_OUTER]], label %[[FOR_END_UNR_LCSSA_LOOPEXIT:.*]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK:       [[FOR_END_UNR_LCSSA_LOOPEXIT]]:
+; CHECK-NEXT:    [[DOTLCSSA_LCSSA_PH_PH:%.*]] = phi i32 [ 2, %[[FOR_LATCH]] ]
+; CHECK-NEXT:    [[INC_LCSSA_LCSSA_PH_PH:%.*]] = phi i32 [ 7, %[[FOR_LATCH]] ]
+; CHECK-NEXT:    [[P0_UNR_PH:%.*]] = phi i32 [ 2, %[[FOR_LATCH]] ]
+; CHECK-NEXT:    br label %[[FOR_END_UNR_LCSSA]]
+; CHECK:       [[FOR_END_UNR_LCSSA]]:
+; CHECK-NEXT:    [[DOTLCSSA_LCSSA_PH:%.*]] = phi i32 [ poison, %[[ENTRY]] ], [ [[DOTLCSSA_LCSSA_PH_PH]], %[[FOR_END_UNR_LCSSA_LOOPEXIT]] ]
+; CHECK-NEXT:    [[INC_LCSSA_LCSSA_PH:%.*]] = phi i32 [ poison, %[[ENTRY]] ], [ [[INC_LCSSA_LCSSA_PH_PH]], %[[FOR_END_UNR_LCSSA_LOOPEXIT]] ]
+; CHECK-NEXT:    [[P0_UNR:%.*]] = phi i32 [ [[F_PROMOTED10]], %[[ENTRY]] ], [ [[P0_UNR_PH]], %[[FOR_END_UNR_LCSSA_LOOPEXIT]] ]
+; CHECK-NEXT:    br i1 true, label %[[FOR_OUTER_EPIL_PREHEADER:.*]], label %[[FOR_END:.*]]
+; CHECK:       [[FOR_OUTER_EPIL_PREHEADER]]:
+; CHECK-NEXT:    br label %[[FOR_OUTER_EPIL:.*]]
+; CHECK:       [[FOR_OUTER_EPIL]]:
+; CHECK-NEXT:    br label %[[FOR_INNER_EPIL:.*]]
+; CHECK:       [[FOR_INNER_EPIL]]:
+; CHECK-NEXT:    [[P1_EPIL:%.*]] = phi i32 [ [[P0_UNR]], %[[FOR_OUTER_EPIL]] ], [ 2, %[[FOR_INNER_EPIL]] ]
+; CHECK-NEXT:    [[INC_SINK8_EPIL:%.*]] = phi i32 [ 0, %[[FOR_OUTER_EPIL]] ], [ [[INC_EPIL:%.*]], %[[FOR_INNER_EPIL]] ]
 ; CHECK-NEXT:    [[INC_EPIL]] = add nuw nsw i32 [[INC_SINK8_EPIL]], 1
 ; CHECK-NEXT:    [[EXITCOND_EPIL:%.*]] = icmp ne i32 [[INC_EPIL]], 7
-; CHECK-NEXT:    br i1 [[EXITCOND_EPIL]], label [[FOR_INNER_EPIL]], label [[FOR_LATCH_EPIL:%.*]]
-; CHECK:       for.latch.epil:
-; CHECK-NEXT:    [[DOTLCSSA_EPIL:%.*]] = phi i32 [ [[P1_EPIL]], [[FOR_INNER_EPIL]] ]
-; CHECK-NEXT:    br label [[FOR_END]]
-; CHECK:       for.end:
-; CHECK-NEXT:    [[DOTLCSSA_LCSSA:%.*]] = phi i32 [ [[DOTLCSSA_LCSSA_PH]], [[FOR_END_UNR_LCSSA]] ], [ [[DOTLCSSA_EPIL]], [[FOR_LATCH_EPIL]] ]
-; CHECK-NEXT:    [[INC_LCSSA_LCSSA:%.*]] = phi i32 [ [[INC_LCSSA_LCSSA_PH]], [[FOR_END_UNR_LCSSA]] ], [ 7, [[FOR_LATCH_EPIL]] ]
+; CHECK-NEXT:    br i1 [[EXITCOND_EPIL]], label %[[FOR_INNER_EPIL]], label %[[FOR_LATCH_EPIL:.*]]
+; CHECK:       [[FOR_LATCH_EPIL]]:
+; CHECK-NEXT:    [[DOTLCSSA_EPIL:%.*]] = phi i32 [ [[P1_EPIL]], %[[FOR_INNER_EPIL]] ]
+; CHECK-NEXT:    br label %[[FOR_END]]
+; CHECK:       [[FOR_END]]:
+; CHECK-NEXT:    [[DOTLCSSA_LCSSA:%.*]] = phi i32 [ [[DOTLCSSA_LCSSA_PH]], %[[FOR_END_UNR_LCSSA]] ], [ [[DOTLCSSA_EPIL]], %[[FOR_LATCH_EPIL]] ]
+; CHECK-NEXT:    [[INC_LCSSA_LCSSA:%.*]] = phi i32 [ [[INC_LCSSA_LCSSA_PH]], %[[FOR_END_UNR_LCSSA]] ], [ 7, %[[FOR_LATCH_EPIL]] ]
 ; CHECK-NEXT:    ret i32 0
 ;
 entry:
@@ -693,159 +697,160 @@ for.end:
 ; Has a positive dependency between two stores. Still valid.
 ; The negative dependecy is in unroll-and-jam-disabled.ll
 define void @test7(i32 %I, i32 %E, ptr noalias nocapture %A, ptr noalias nocapture readonly %B) #0 {
-; CHECK-LABEL: @test7(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[E:%.*]], 0
-; CHECK-NEXT:    [[CMP128:%.*]] = icmp ne i32 [[I:%.*]], 0
+; CHECK-LABEL: define void @test7(
+; CHECK-SAME: i32 [[I:%.*]], i32 [[E:%.*]], ptr noalias captures(none) [[A:%.*]], ptr noalias readonly captures(none) [[B:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[E]], 0
+; CHECK-NEXT:    [[CMP128:%.*]] = icmp ne i32 [[I]], 0
 ; CHECK-NEXT:    [[OR_COND:%.*]] = and i1 [[CMP128]], [[CMP]]
-; CHECK-NEXT:    br i1 [[OR_COND]], label [[FOR_PREHEADER:%.*]], label [[FOR_END:%.*]]
-; CHECK:       for.preheader:
+; CHECK-NEXT:    br i1 [[OR_COND]], label %[[FOR_PREHEADER:.*]], label %[[FOR_END:.*]]
+; CHECK:       [[FOR_PREHEADER]]:
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[I]], -1
 ; CHECK-NEXT:    [[XTRAITER:%.*]] = and i32 [[I]], 3
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[TMP0]], 3
-; CHECK-NEXT:    br i1 [[TMP1]], label [[FOR_END_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_PREHEADER_NEW:%.*]]
-; CHECK:       for.preheader.new:
+; CHECK-NEXT:    br i1 [[TMP1]], label %[[FOR_END_LOOPEXIT_UNR_LCSSA:.*]], label %[[FOR_PREHEADER_NEW:.*]]
+; CHECK:       [[FOR_PREHEADER_NEW]]:
 ; CHECK-NEXT:    [[UNROLL_ITER:%.*]] = sub i32 [[I]], [[XTRAITER]]
-; CHECK-NEXT:    br label [[FOR_OUTER:%.*]]
-; CHECK:       for.outer:
-; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[ADD_3:%.*]], [[FOR_LATCH:%.*]] ], [ 0, [[FOR_PREHEADER_NEW]] ]
-; CHECK-NEXT:    [[NITER:%.*]] = phi i32 [ 0, [[FOR_PREHEADER_NEW]] ], [ [[NITER_NEXT_3:%.*]], [[FOR_LATCH]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[I]]
-; CHECK-NEXT:    store i32 0, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    br label %[[FOR_OUTER:.*]]
+; CHECK:       [[FOR_OUTER]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[ADD_3:%.*]], %[[FOR_LATCH:.*]] ], [ 0, %[[FOR_PREHEADER_NEW]] ]
+; CHECK-NEXT:    [[NITER:%.*]] = phi i32 [ 0, %[[FOR_PREHEADER_NEW]] ], [ [[NITER_NEXT_3:%.*]], %[[FOR_LATCH]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[I]]
+; CHECK-NEXT:    store i32 0, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[ADD:%.*]] = add nuw nsw i32 [[I]], 1
 ; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD]]
-; CHECK-NEXT:    store i32 2, ptr [[ARRAYIDX2]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    store i32 2, ptr [[ARRAYIDX2]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD]]
-; CHECK-NEXT:    store i32 0, ptr [[ARRAYIDX_1]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    store i32 0, ptr [[ARRAYIDX_1]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[ADD_1:%.*]] = add nuw nsw i32 [[I]], 2
 ; CHECK-NEXT:    [[ARRAYIDX2_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD_1]]
-; CHECK-NEXT:    store i32 2, ptr [[ARRAYIDX2_1]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    store i32 2, ptr [[ARRAYIDX2_1]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD_1]]
-; CHECK-NEXT:    store i32 0, ptr [[ARRAYIDX_2]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    store i32 0, ptr [[ARRAYIDX_2]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[ADD_2:%.*]] = add nuw nsw i32 [[I]], 3
 ; CHECK-NEXT:    [[ARRAYIDX2_2:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD_2]]
-; CHECK-NEXT:    store i32 2, ptr [[ARRAYIDX2_2]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    store i32 2, ptr [[ARRAYIDX2_2]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD_2]]
-; CHECK-NEXT:    store i32 0, ptr [[ARRAYIDX_3]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    store i32 0, ptr [[ARRAYIDX_3]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[ADD_3]] = add nuw i32 [[I]], 4
 ; CHECK-NEXT:    [[ARRAYIDX2_3:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD_3]]
-; CHECK-NEXT:    store i32 2, ptr [[ARRAYIDX2_3]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    store i32 2, ptr [[ARRAYIDX2_3]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[NITER_NEXT_3]] = add i32 [[NITER]], 4
-; CHECK-NEXT:    br label [[FOR_INNER:%.*]]
-; CHECK:       for.latch:
-; CHECK-NEXT:    [[ADD9_LCSSA:%.*]] = phi i32 [ [[ADD9:%.*]], [[FOR_INNER]] ]
-; CHECK-NEXT:    [[ADD9_LCSSA_1:%.*]] = phi i32 [ [[ADD9_1:%.*]], [[FOR_INNER]] ]
-; CHECK-NEXT:    [[ADD9_LCSSA_2:%.*]] = phi i32 [ [[ADD9_2:%.*]], [[FOR_INNER]] ]
-; CHECK-NEXT:    [[ADD9_LCSSA_3:%.*]] = phi i32 [ [[ADD9_3:%.*]], [[FOR_INNER]] ]
-; CHECK-NEXT:    store i32 [[ADD9_LCSSA]], ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA0]]
-; CHECK-NEXT:    store i32 [[ADD9_LCSSA_1]], ptr [[ARRAYIDX_1]], align 4, !tbaa [[TBAA0]]
-; CHECK-NEXT:    store i32 [[ADD9_LCSSA_2]], ptr [[ARRAYIDX_2]], align 4, !tbaa [[TBAA0]]
-; CHECK-NEXT:    store i32 [[ADD9_LCSSA_3]], ptr [[ARRAYIDX_3]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    br label %[[FOR_INNER:.*]]
+; CHECK:       [[FOR_LATCH]]:
+; CHECK-NEXT:    [[ADD9_LCSSA:%.*]] = phi i32 [ [[ADD9:%.*]], %[[FOR_INNER]] ]
+; CHECK-NEXT:    [[ADD9_LCSSA_1:%.*]] = phi i32 [ [[ADD9_1:%.*]], %[[FOR_INNER]] ]
+; CHECK-NEXT:    [[ADD9_LCSSA_2:%.*]] = phi i32 [ [[ADD9_2:%.*]], %[[FOR_INNER]] ]
+; CHECK-NEXT:    [[ADD9_LCSSA_3:%.*]] = phi i32 [ [[ADD9_3:%.*]], %[[FOR_INNER]] ]
+; CHECK-NEXT:    store i32 [[ADD9_LCSSA]], ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA0]]
+; CHECK-NEXT:    store i32 [[ADD9_LCSSA_1]], ptr [[ARRAYIDX_1]], align 4, !tbaa [[INT_TBAA0]]
+; CHECK-NEXT:    store i32 [[ADD9_LCSSA_2]], ptr [[ARRAYIDX_2]], align 4, !tbaa [[INT_TBAA0]]
+; CHECK-NEXT:    store i32 [[ADD9_LCSSA_3]], ptr [[ARRAYIDX_3]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[NITER_NCMP_3:%.*]] = icmp eq i32 [[NITER_NEXT_3]], [[UNROLL_ITER]]
-; CHECK-NEXT:    br i1 [[NITER_NCMP_3]], label [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT:%.*]], label [[FOR_OUTER]], !llvm.loop [[LOOP8:![0-9]+]]
-; CHECK:       for.inner:
-; CHECK-NEXT:    [[SUM:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[ADD9]], [[FOR_INNER]] ]
-; CHECK-NEXT:    [[J:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[ADD10:%.*]], [[FOR_INNER]] ]
-; CHECK-NEXT:    [[SUM_1:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[ADD9_1]], [[FOR_INNER]] ]
-; CHECK-NEXT:    [[J_1:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[ADD10_1:%.*]], [[FOR_INNER]] ]
-; CHECK-NEXT:    [[SUM_2:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[ADD9_2]], [[FOR_INNER]] ]
-; CHECK-NEXT:    [[J_2:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[ADD10_2:%.*]], [[FOR_INNER]] ]
-; CHECK-NEXT:    [[SUM_3:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[ADD9_3]], [[FOR_INNER]] ]
-; CHECK-NEXT:    [[J_3:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[ADD10_3:%.*]], [[FOR_INNER]] ]
-; CHECK-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i32 [[J]]
-; CHECK-NEXT:    [[L1:%.*]] = load i32, ptr [[ARRAYIDX7]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    br i1 [[NITER_NCMP_3]], label %[[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT:.*]], label %[[FOR_OUTER]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK:       [[FOR_INNER]]:
+; CHECK-NEXT:    [[SUM:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[ADD9]], %[[FOR_INNER]] ]
+; CHECK-NEXT:    [[J:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[ADD10:%.*]], %[[FOR_INNER]] ]
+; CHECK-NEXT:    [[SUM_1:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[ADD9_1]], %[[FOR_INNER]] ]
+; CHECK-NEXT:    [[J_1:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[ADD10_1:%.*]], %[[FOR_INNER]] ]
+; CHECK-NEXT:    [[SUM_2:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[ADD9_2]], %[[FOR_INNER]] ]
+; CHECK-NEXT:    [[J_2:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[ADD10_2:%.*]], %[[FOR_INNER]] ]
+; CHECK-NEXT:    [[SUM_3:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[ADD9_3]], %[[FOR_INNER]] ]
+; CHECK-NEXT:    [[J_3:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[ADD10_3:%.*]], %[[FOR_INNER]] ]
+; CHECK-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J]]
+; CHECK-NEXT:    [[L1:%.*]] = load i32, ptr [[ARRAYIDX7]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[ADD9]] = add i32 [[L1]], [[SUM]]
 ; CHECK-NEXT:    [[ADD10]] = add nuw i32 [[J]], 1
 ; CHECK-NEXT:    [[ARRAYIDX7_1:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J_1]]
-; CHECK-NEXT:    [[L1_1:%.*]] = load i32, ptr [[ARRAYIDX7_1]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    [[L1_1:%.*]] = load i32, ptr [[ARRAYIDX7_1]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[ADD9_1]] = add i32 [[L1_1]], [[SUM_1]]
 ; CHECK-NEXT:    [[ADD10_1]] = add nuw i32 [[J_1]], 1
 ; CHECK-NEXT:    [[ARRAYIDX7_2:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J_2]]
-; CHECK-NEXT:    [[L1_2:%.*]] = load i32, ptr [[ARRAYIDX7_2]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    [[L1_2:%.*]] = load i32, ptr [[ARRAYIDX7_2]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[ADD9_2]] = add i32 [[L1_2]], [[SUM_2]]
 ; CHECK-NEXT:    [[ADD10_2]] = add nuw i32 [[J_2]], 1
 ; CHECK-NEXT:    [[ARRAYIDX7_3:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J_3]]
-; CHECK-NEXT:    [[L1_3:%.*]] = load i32, ptr [[ARRAYIDX7_3]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    [[L1_3:%.*]] = load i32, ptr [[ARRAYIDX7_3]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[ADD9_3]] = add i32 [[L1_3]], [[SUM_3]]
 ; CHECK-NEXT:    [[ADD10_3]] = add nuw i32 [[J_3]], 1
 ; CHECK-NEXT:    [[EXITCOND_3:%.*]] = icmp eq i32 [[ADD10_3]], [[E]]
-; CHECK-NEXT:    br i1 [[EXITCOND_3]], label [[FOR_LATCH]], label [[FOR_INNER]]
-; CHECK:       for.end.loopexit.unr-lcssa.loopexit:
-; CHECK-NEXT:    [[I_UNR_PH:%.*]] = phi i32 [ [[ADD_3]], [[FOR_LATCH]] ]
-; CHECK-NEXT:    br label [[FOR_END_LOOPEXIT_UNR_LCSSA]]
-; CHECK:       for.end.loopexit.unr-lcssa:
-; CHECK-NEXT:    [[I_UNR:%.*]] = phi i32 [ 0, [[FOR_PREHEADER]] ], [ [[I_UNR_PH]], [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ]
+; CHECK-NEXT:    br i1 [[EXITCOND_3]], label %[[FOR_LATCH]], label %[[FOR_INNER]]
+; CHECK:       [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]]:
+; CHECK-NEXT:    [[I_UNR_PH:%.*]] = phi i32 [ [[ADD_3]], %[[FOR_LATCH]] ]
+; CHECK-NEXT:    br label %[[FOR_END_LOOPEXIT_UNR_LCSSA]]
+; CHECK:       [[FOR_END_LOOPEXIT_UNR_LCSSA]]:
+; CHECK-NEXT:    [[I_UNR:%.*]] = phi i32 [ 0, %[[FOR_PREHEADER]] ], [ [[I_UNR_PH]], %[[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ]
 ; CHECK-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i32 [[XTRAITER]], 0
-; CHECK-NEXT:    br i1 [[LCMP_MOD]], label [[FOR_OUTER_EPIL_PREHEADER:%.*]], label [[FOR_END_LOOPEXIT:%.*]]
-; CHECK:       for.outer.epil.preheader:
-; CHECK-NEXT:    br label [[FOR_OUTER_EPIL:%.*]]
-; CHECK:       for.outer.epil:
+; CHECK-NEXT:    br i1 [[LCMP_MOD]], label %[[FOR_OUTER_EPIL_PREHEADER:.*]], label %[[FOR_END_LOOPEXIT:.*]]
+; CHECK:       [[FOR_OUTER_EPIL_PREHEADER]]:
+; CHECK-NEXT:    br label %[[FOR_OUTER_EPIL:.*]]
+; CHECK:       [[FOR_OUTER_EPIL]]:
 ; CHECK-NEXT:    [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[I_UNR]]
-; CHECK-NEXT:    store i32 0, ptr [[ARRAYIDX_EPIL]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    store i32 0, ptr [[ARRAYIDX_EPIL]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[ADD_EPIL:%.*]] = add nuw i32 [[I_UNR]], 1
 ; CHECK-NEXT:    [[ARRAYIDX2_EPIL:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD_EPIL]]
-; CHECK-NEXT:    store i32 2, ptr [[ARRAYIDX2_EPIL]], align 4, !tbaa [[TBAA0]]
-; CHECK-NEXT:    br label [[FOR_INNER_EPIL:%.*]]
-; CHECK:       for.inner.epil:
-; CHECK-NEXT:    [[SUM_EPIL:%.*]] = phi i32 [ 0, [[FOR_OUTER_EPIL]] ], [ [[ADD9_EPIL:%.*]], [[FOR_INNER_EPIL]] ]
-; CHECK-NEXT:    [[J_EPIL:%.*]] = phi i32 [ 0, [[FOR_OUTER_EPIL]] ], [ [[ADD10_EPIL:%.*]], [[FOR_INNER_EPIL]] ]
+; CHECK-NEXT:    store i32 2, ptr [[ARRAYIDX2_EPIL]], align 4, !tbaa [[INT_TBAA0]]
+; CHECK-NEXT:    br label %[[FOR_INNER_EPIL:.*]]
+; CHECK:       [[FOR_INNER_EPIL]]:
+; CHECK-NEXT:    [[SUM_EPIL:%.*]] = phi i32 [ 0, %[[FOR_OUTER_EPIL]] ], [ [[ADD9_EPIL:%.*]], %[[FOR_INNER_EPIL]] ]
+; CHECK-NEXT:    [[J_EPIL:%.*]] = phi i32 [ 0, %[[FOR_OUTER_EPIL]] ], [ [[ADD10_EPIL:%.*]], %[[FOR_INNER_EPIL]] ]
 ; CHECK-NEXT:    [[ARRAYIDX7_EPIL:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J_EPIL]]
-; CHECK-NEXT:    [[L1_EPIL:%.*]] = load i32, ptr [[ARRAYIDX7_EPIL]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    [[L1_EPIL:%.*]] = load i32, ptr [[ARRAYIDX7_EPIL]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[ADD9_EPIL]] = add i32 [[L1_EPIL]], [[SUM_EPIL]]
 ; CHECK-NEXT:    [[ADD10_EPIL]] = add nuw i32 [[J_EPIL]], 1
 ; CHECK-NEXT:    [[EXITCOND_EPIL:%.*]] = icmp eq i32 [[ADD10_EPIL]], [[E]]
-; CHECK-NEXT:    br i1 [[EXITCOND_EPIL]], label [[FOR_LATCH_EPIL:%.*]], label [[FOR_INNER_EPIL]]
-; CHECK:       for.latch.epil:
-; CHECK-NEXT:    [[ADD9_LCSSA_EPIL:%.*]] = phi i32 [ [[ADD9_EPIL]], [[FOR_INNER_EPIL]] ]
-; CHECK-NEXT:    store i32 [[ADD9_LCSSA_EPIL]], ptr [[ARRAYIDX_EPIL]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    br i1 [[EXITCOND_EPIL]], label %[[FOR_LATCH_EPIL:.*]], label %[[FOR_INNER_EPIL]]
+; CHECK:       [[FOR_LATCH_EPIL]]:
+; CHECK-NEXT:    [[ADD9_LCSSA_EPIL:%.*]] = phi i32 [ [[ADD9_EPIL]], %[[FOR_INNER_EPIL]] ]
+; CHECK-NEXT:    store i32 [[ADD9_LCSSA_EPIL]], ptr [[ARRAYIDX_EPIL]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[EPIL_ITER_CMP:%.*]] = icmp ne i32 1, [[XTRAITER]]
-; CHECK-NEXT:    br i1 [[EPIL_ITER_CMP]], label [[FOR_OUTER_EPIL_1:%.*]], label [[FOR_END_LOOPEXIT_EPILOG_LCSSA:%.*]]
-; CHECK:       for.outer.epil.1:
+; CHECK-NEXT:    br i1 [[EPIL_ITER_CMP]], label %[[FOR_OUTER_EPIL_1:.*]], label %[[FOR_END_LOOPEXIT_EPILOG_LCSSA:.*]]
+; CHECK:       [[FOR_OUTER_EPIL_1]]:
 ; CHECK-NEXT:    [[ARRAYIDX_EPIL_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD_EPIL]]
-; CHECK-NEXT:    store i32 0, ptr [[ARRAYIDX_EPIL_1]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    store i32 0, ptr [[ARRAYIDX_EPIL_1]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[ADD_EPIL_1:%.*]] = add nuw i32 [[I_UNR]], 2
 ; CHECK-NEXT:    [[ARRAYIDX2_EPIL_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD_EPIL_1]]
-; CHECK-NEXT:    store i32 2, ptr [[ARRAYIDX2_EPIL_1]], align 4, !tbaa [[TBAA0]]
-; CHECK-NEXT:    br label [[FOR_INNER_EPIL_1:%.*]]
-; CHECK:       for.inner.epil.1:
-; CHECK-NEXT:    [[SUM_EPIL_1:%.*]] = phi i32 [ 0, [[FOR_OUTER_EPIL_1]] ], [ [[ADD9_EPIL_1:%.*]], [[FOR_INNER_EPIL_1]] ]
-; CHECK-NEXT:    [[J_EPIL_1:%.*]] = phi i32 [ 0, [[FOR_OUTER_EPIL_1]] ], [ [[ADD10_EPIL_1:%.*]], [[FOR_INNER_EPIL_1]] ]
+; CHECK-NEXT:    store i32 2, ptr [[ARRAYIDX2_EPIL_1]], align 4, !tbaa [[INT_TBAA0]]
+; CHECK-NEXT:    br label %[[FOR_INNER_EPIL_1:.*]]
+; CHECK:       [[FOR_INNER_EPIL_1]]:
+; CHECK-NEXT:    [[SUM_EPIL_1:%.*]] = phi i32 [ 0, %[[FOR_OUTER_EPIL_1]] ], [ [[ADD9_EPIL_1:%.*]], %[[FOR_INNER_EPIL_1]] ]
+; CHECK-NEXT:    [[J_EPIL_1:%.*]] = phi i32 [ 0, %[[FOR_OUTER_EPIL_1]] ], [ [[ADD10_EPIL_1:%.*]], %[[FOR_INNER_EPIL_1]] ]
 ; CHECK-NEXT:    [[ARRAYIDX7_EPIL_1:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J_EPIL_1]]
-; CHECK-NEXT:    [[L1_EPIL_1:%.*]] = load i32, ptr [[ARRAYIDX7_EPIL_1]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    [[L1_EPIL_1:%.*]] = load i32, ptr [[ARRAYIDX7_EPIL_1]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[ADD9_EPIL_1]] = add i32 [[L1_EPIL_1]], [[SUM_EPIL_1]]
 ; CHECK-NEXT:    [[ADD10_EPIL_1]] = add nuw i32 [[J_EPIL_1]], 1
 ; CHECK-NEXT:    [[EXITCOND_EPIL_1:%.*]] = icmp eq i32 [[ADD10_EPIL_1]], [[E]]
-; CHECK-NEXT:    br i1 [[EXITCOND_EPIL_1]], label [[FOR_LATCH_EPIL_1:%.*]], label [[FOR_INNER_EPIL_1]]
-; CHECK:       for.latch.epil.1:
-; CHECK-NEXT:    [[ADD9_LCSSA_EPIL_1:%.*]] = phi i32 [ [[ADD9_EPIL_1]], [[FOR_INNER_EPIL_1]] ]
-; CHECK-NEXT:    store i32 [[ADD9_LCSSA_EPIL_1]], ptr [[ARRAYIDX_EPIL_1]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    br i1 [[EXITCOND_EPIL_1]], label %[[FOR_LATCH_EPIL_1:.*]], label %[[FOR_INNER_EPIL_1]]
+; CHECK:       [[FOR_LATCH_EPIL_1]]:
+; CHECK-NEXT:    [[ADD9_LCSSA_EPIL_1:%.*]] = phi i32 [ [[ADD9_EPIL_1]], %[[FOR_INNER_EPIL_1]] ]
+; CHECK-NEXT:    store i32 [[ADD9_LCSSA_EPIL_1]], ptr [[ARRAYIDX_EPIL_1]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[EPIL_ITER_CMP_1:%.*]] = icmp ne i32 2, [[XTRAITER]]
-; CHECK-NEXT:    br i1 [[EPIL_ITER_CMP_1]], label [[FOR_OUTER_EPIL_2:%.*]], label [[FOR_END_LOOPEXIT_EPILOG_LCSSA]]
-; CHECK:       for.outer.epil.2:
+; CHECK-NEXT:    br i1 [[EPIL_ITER_CMP_1]], label %[[FOR_OUTER_EPIL_2:.*]], label %[[FOR_END_LOOPEXIT_EPILOG_LCSSA]]
+; CHECK:       [[FOR_OUTER_EPIL_2]]:
 ; CHECK-NEXT:    [[ARRAYIDX_EPIL_2:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD_EPIL_1]]
-; CHECK-NEXT:    store i32 0, ptr [[ARRAYIDX_EPIL_2]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    store i32 0, ptr [[ARRAYIDX_EPIL_2]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[ADD_EPIL_2:%.*]] = add nuw i32 [[I_UNR]], 3
 ; CHECK-NEXT:    [[ARRAYIDX2_EPIL_2:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD_EPIL_2]]
-; CHECK-NEXT:    store i32 2, ptr [[ARRAYIDX2_EPIL_2]], align 4, !tbaa [[TBAA0]]
-; CHECK-NEXT:    br label [[FOR_INNER_EPIL_2:%.*]]
-; CHECK:       for.inner.epil.2:
-; CHECK-NEXT:    [[SUM_EPIL_2:%.*]] = phi i32 [ 0, [[FOR_OUTER_EPIL_2]] ], [ [[ADD9_EPIL_2:%.*]], [[FOR_INNER_EPIL_2]] ]
-; CHECK-NEXT:    [[J_EPIL_2:%.*]] = phi i32 [ 0, [[FOR_OUTER_EPIL_2]] ], [ [[ADD10_EPIL_2:%.*]], [[FOR_INNER_EPIL_2]] ]
+; CHECK-NEXT:    store i32 2, ptr [[ARRAYIDX2_EPIL_2]], align 4, !tbaa [[INT_TBAA0]]
+; CHECK-NEXT:    br label %[[FOR_INNER_EPIL_2:.*]]
+; CHECK:       [[FOR_INNER_EPIL_2]]:
+; CHECK-NEXT:    [[SUM_EPIL_2:%.*]] = phi i32 [ 0, %[[FOR_OUTER_EPIL_2]] ], [ [[ADD9_EPIL_2:%.*]], %[[FOR_INNER_EPIL_2]] ]
+; CHECK-NEXT:    [[J_EPIL_2:%.*]] = phi i32 [ 0, %[[FOR_OUTER_EPIL_2]] ], [ [[ADD10_EPIL_2:%.*]], %[[FOR_INNER_EPIL_2]] ]
 ; CHECK-NEXT:    [[ARRAYIDX7_EPIL_2:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J_EPIL_2]]
-; CHECK-NEXT:    [[L1_EPIL_2:%.*]] = load i32, ptr [[ARRAYIDX7_EPIL_2]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    [[L1_EPIL_2:%.*]] = load i32, ptr [[ARRAYIDX7_EPIL_2]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[ADD9_EPIL_2]] = add i32 [[L1_EPIL_2]], [[SUM_EPIL_2]]
 ; CHECK-NEXT:    [[ADD10_EPIL_2]] = add nuw i32 [[J_EPIL_2]], 1
 ; CHECK-NEXT:    [[EXITCOND_EPIL_2:%.*]] = icmp eq i32 [[ADD10_EPIL_2]], [[E]]
-; CHECK-NEXT:    br i1 [[EXITCOND_EPIL_2]], label [[FOR_LATCH_EPIL_2:%.*]], label [[FOR_INNER_EPIL_2]]
-; CHECK:       for.latch.epil.2:
-; CHECK-NEXT:    [[ADD9_LCSSA_EPIL_2:%.*]] = phi i32 [ [[ADD9_EPIL_2]], [[FOR_INNER_EPIL_2]] ]
-; CHECK-NEXT:    store i32 [[ADD9_LCSSA_EPIL_2]], ptr [[ARRAYIDX_EPIL_2]], align 4, !tbaa [[TBAA0]]
-; CHECK-NEXT:    br label [[FOR_END_LOOPEXIT_EPILOG_LCSSA]]
-; CHECK:       for.end.loopexit.epilog-lcssa:
-; CHECK-NEXT:    br label [[FOR_END_LOOPEXIT]]
-; CHECK:       for.end.loopexit:
-; CHECK-NEXT:    br label [[FOR_END]]
-; CHECK:       for.end:
+; CHECK-NEXT:    br i1 [[EXITCOND_EPIL_2]], label %[[FOR_LATCH_EPIL_2:.*]], label %[[FOR_INNER_EPIL_2]]
+; CHECK:       [[FOR_LATCH_EPIL_2]]:
+; CHECK-NEXT:    [[ADD9_LCSSA_EPIL_2:%.*]] = phi i32 [ [[ADD9_EPIL_2]], %[[FOR_INNER_EPIL_2]] ]
+; CHECK-NEXT:    store i32 [[ADD9_LCSSA_EPIL_2]], ptr [[ARRAYIDX_EPIL_2]], align 4, !tbaa [[INT_TBAA0]]
+; CHECK-NEXT:    br label %[[FOR_END_LOOPEXIT_EPILOG_LCSSA]]
+; CHECK:       [[FOR_END_LOOPEXIT_EPILOG_LCSSA]]:
+; CHECK-NEXT:    br label %[[FOR_END_LOOPEXIT]]
+; CHECK:       [[FOR_END_LOOPEXIT]]:
+; CHECK-NEXT:    br label %[[FOR_END]]
+; CHECK:       [[FOR_END]]:
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -888,166 +893,167 @@ for.end:
 
 ; Same as test7 with an extra outer loop nest
 define void @test8(i32 %I, i32 %E, ptr noalias nocapture %A, ptr noalias nocapture readonly %B) #0 {
-; CHECK-LABEL: @test8(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[E:%.*]], 0
-; CHECK-NEXT:    [[CMP336:%.*]] = icmp eq i32 [[I:%.*]], 0
+; CHECK-LABEL: define void @test8(
+; CHECK-SAME: i32 [[I:%.*]], i32 [[E:%.*]], ptr noalias captures(none) [[A:%.*]], ptr noalias readonly captures(none) [[B:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[E]], 0
+; CHECK-NEXT:    [[CMP336:%.*]] = icmp eq i32 [[I]], 0
 ; CHECK-NEXT:    [[OR_COND:%.*]] = or i1 [[CMP]], [[CMP336]]
-; CHECK-NEXT:    br i1 [[OR_COND]], label [[FOR_END:%.*]], label [[FOR_PREHEADER:%.*]]
-; CHECK:       for.preheader:
+; CHECK-NEXT:    br i1 [[OR_COND]], label %[[FOR_END:.*]], label %[[FOR_PREHEADER:.*]]
+; CHECK:       [[FOR_PREHEADER]]:
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[I]], -1
-; CHECK-NEXT:    br label [[FOR_OUTEST:%.*]]
-; CHECK:       for.outest:
-; CHECK-NEXT:    [[X_038:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_CLEANUP:%.*]] ], [ 0, [[FOR_PREHEADER]] ]
+; CHECK-NEXT:    br label %[[FOR_OUTEST:.*]]
+; CHECK:       [[FOR_OUTEST]]:
+; CHECK-NEXT:    [[X_038:%.*]] = phi i32 [ [[INC:%.*]], %[[FOR_CLEANUP:.*]] ], [ 0, %[[FOR_PREHEADER]] ]
 ; CHECK-NEXT:    [[XTRAITER:%.*]] = and i32 [[I]], 3
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[TMP0]], 3
-; CHECK-NEXT:    br i1 [[TMP1]], label [[FOR_CLEANUP_UNR_LCSSA:%.*]], label [[FOR_OUTEST_NEW:%.*]]
-; CHECK:       for.outest.new:
+; CHECK-NEXT:    br i1 [[TMP1]], label %[[FOR_CLEANUP_UNR_LCSSA:.*]], label %[[FOR_OUTEST_NEW:.*]]
+; CHECK:       [[FOR_OUTEST_NEW]]:
 ; CHECK-NEXT:    [[UNROLL_ITER:%.*]] = sub i32 [[I]], [[XTRAITER]]
-; CHECK-NEXT:    br label [[FOR_OUTER:%.*]]
-; CHECK:       for.outer:
-; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[ADD_3:%.*]], [[FOR_LATCH:%.*]] ], [ 0, [[FOR_OUTEST_NEW]] ]
-; CHECK-NEXT:    [[NITER:%.*]] = phi i32 [ 0, [[FOR_OUTEST_NEW]] ], [ [[NITER_NEXT_3:%.*]], [[FOR_LATCH]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[I]]
-; CHECK-NEXT:    store i32 0, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    br label %[[FOR_OUTER:.*]]
+; CHECK:       [[FOR_OUTER]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[ADD_3:%.*]], %[[FOR_LATCH:.*]] ], [ 0, %[[FOR_OUTEST_NEW]] ]
+; CHECK-NEXT:    [[NITER:%.*]] = phi i32 [ 0, %[[FOR_OUTEST_NEW]] ], [ [[NITER_NEXT_3:%.*]], %[[FOR_LATCH]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[I]]
+; CHECK-NEXT:    store i32 0, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[ADD:%.*]] = add nuw nsw i32 [[I]], 1
 ; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD]]
-; CHECK-NEXT:    store i32 2, ptr [[ARRAYIDX6]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    store i32 2, ptr [[ARRAYIDX6]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD]]
-; CHECK-NEXT:    store i32 0, ptr [[ARRAYIDX_1]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    store i32 0, ptr [[ARRAYIDX_1]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[ADD_1:%.*]] = add nuw nsw i32 [[I]], 2
 ; CHECK-NEXT:    [[ARRAYIDX6_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD_1]]
-; CHECK-NEXT:    store i32 2, ptr [[ARRAYIDX6_1]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    store i32 2, ptr [[ARRAYIDX6_1]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD_1]]
-; CHECK-NEXT:    store i32 0, ptr [[ARRAYIDX_2]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    store i32 0, ptr [[ARRAYIDX_2]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[ADD_2:%.*]] = add nuw nsw i32 [[I]], 3
 ; CHECK-NEXT:    [[ARRAYIDX6_2:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD_2]]
-; CHECK-NEXT:    store i32 2, ptr [[ARRAYIDX6_2]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    store i32 2, ptr [[ARRAYIDX6_2]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD_2]]
-; CHECK-NEXT:    store i32 0, ptr [[ARRAYIDX_3]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    store i32 0, ptr [[ARRAYIDX_3]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[ADD_3]] = add nuw i32 [[I]], 4
 ; CHECK-NEXT:    [[ARRAYIDX6_3:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD_3]]
-; CHECK-NEXT:    store i32 2, ptr [[ARRAYIDX6_3]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    store i32 2, ptr [[ARRAYIDX6_3]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[NITER_NEXT_3]] = add i32 [[NITER]], 4
-; CHECK-NEXT:    br label [[FOR_INNER:%.*]]
-; CHECK:       for.inner:
-; CHECK-NEXT:    [[SUM:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[ADD9:%.*]], [[FOR_INNER]] ]
-; CHECK-NEXT:    [[J:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[ADD10:%.*]], [[FOR_INNER]] ]
-; CHECK-NEXT:    [[SUM_1:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[ADD9_1:%.*]], [[FOR_INNER]] ]
-; CHECK-NEXT:    [[J_1:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[ADD10_1:%.*]], [[FOR_INNER]] ]
-; CHECK-NEXT:    [[SUM_2:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[ADD9_2:%.*]], [[FOR_INNER]] ]
-; CHECK-NEXT:    [[J_2:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[ADD10_2:%.*]], [[FOR_INNER]] ]
-; CHECK-NEXT:    [[SUM_3:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[ADD9_3:%.*]], [[FOR_INNER]] ]
-; CHECK-NEXT:    [[J_3:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[ADD10_3:%.*]], [[FOR_INNER]] ]
-; CHECK-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i32 [[J]]
-; CHECK-NEXT:    [[L1:%.*]] = load i32, ptr [[ARRAYIDX11]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    br label %[[FOR_INNER:.*]]
+; CHECK:       [[FOR_INNER]]:
+; CHECK-NEXT:    [[SUM:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[ADD9:%.*]], %[[FOR_INNER]] ]
+; CHECK-NEXT:    [[J:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[ADD10:%.*]], %[[FOR_INNER]] ]
+; CHECK-NEXT:    [[SUM_1:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[ADD9_1:%.*]], %[[FOR_INNER]] ]
+; CHECK-NEXT:    [[J_1:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[ADD10_1:%.*]], %[[FOR_INNER]] ]
+; CHECK-NEXT:    [[SUM_2:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[ADD9_2:%.*]], %[[FOR_INNER]] ]
+; CHECK-NEXT:    [[J_2:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[ADD10_2:%.*]], %[[FOR_INNER]] ]
+; CHECK-NEXT:    [[SUM_3:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[ADD9_3:%.*]], %[[FOR_INNER]] ]
+; CHECK-NEXT:    [[J_3:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[ADD10_3:%.*]], %[[FOR_INNER]] ]
+; CHECK-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J]]
+; CHECK-NEXT:    [[L1:%.*]] = load i32, ptr [[ARRAYIDX11]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[ADD9]] = add i32 [[L1]], [[SUM]]
 ; CHECK-NEXT:    [[ADD10]] = add nuw i32 [[J]], 1
 ; CHECK-NEXT:    [[ARRAYIDX11_1:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J_1]]
-; CHECK-NEXT:    [[L1_1:%.*]] = load i32, ptr [[ARRAYIDX11_1]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    [[L1_1:%.*]] = load i32, ptr [[ARRAYIDX11_1]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[ADD9_1]] = add i32 [[L1_1]], [[SUM_1]]
 ; CHECK-NEXT:    [[ADD10_1]] = add nuw i32 [[J_1]], 1
 ; CHECK-NEXT:    [[ARRAYIDX11_2:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J_2]]
-; CHECK-NEXT:    [[L1_2:%.*]] = load i32, ptr [[ARRAYIDX11_2]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    [[L1_2:%.*]] = load i32, ptr [[ARRAYIDX11_2]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[ADD9_2]] = add i32 [[L1_2]], [[SUM_2]]
 ; CHECK-NEXT:    [[ADD10_2]] = add nuw i32 [[J_2]], 1
 ; CHECK-NEXT:    [[ARRAYIDX11_3:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J_3]]
-; CHECK-NEXT:    [[L1_3:%.*]] = load i32, ptr [[ARRAYIDX11_3]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    [[L1_3:%.*]] = load i32, ptr [[ARRAYIDX11_3]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[ADD9_3]] = add i32 [[L1_3]], [[SUM_3]]
 ; CHECK-NEXT:    [[ADD10_3]] = add nuw i32 [[J_3]], 1
 ; CHECK-NEXT:    [[EXITCOND_3:%.*]] = icmp eq i32 [[ADD10_3]], [[E]]
-; CHECK-NEXT:    br i1 [[EXITCOND_3]], label [[FOR_LATCH]], label [[FOR_INNER]]
-; CHECK:       for.latch:
-; CHECK-NEXT:    [[ADD9_LCSSA:%.*]] = phi i32 [ [[ADD9]], [[FOR_INNER]] ]
-; CHECK-NEXT:    [[ADD9_LCSSA_1:%.*]] = phi i32 [ [[ADD9_1]], [[FOR_INNER]] ]
-; CHECK-NEXT:    [[ADD9_LCSSA_2:%.*]] = phi i32 [ [[ADD9_2]], [[FOR_INNER]] ]
-; CHECK-NEXT:    [[ADD9_LCSSA_3:%.*]] = phi i32 [ [[ADD9_3]], [[FOR_INNER]] ]
-; CHECK-NEXT:    store i32 [[ADD9_LCSSA]], ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA0]]
-; CHECK-NEXT:    store i32 [[ADD9_LCSSA_1]], ptr [[ARRAYIDX_1]], align 4, !tbaa [[TBAA0]]
-; CHECK-NEXT:    store i32 [[ADD9_LCSSA_2]], ptr [[ARRAYIDX_2]], align 4, !tbaa [[TBAA0]]
-; CHECK-NEXT:    store i32 [[ADD9_LCSSA_3]], ptr [[ARRAYIDX_3]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    br i1 [[EXITCOND_3]], label %[[FOR_LATCH]], label %[[FOR_INNER]]
+; CHECK:       [[FOR_LATCH]]:
+; CHECK-NEXT:    [[ADD9_LCSSA:%.*]] = phi i32 [ [[ADD9]], %[[FOR_INNER]] ]
+; CHECK-NEXT:    [[ADD9_LCSSA_1:%.*]] = phi i32 [ [[ADD9_1]], %[[FOR_INNER]] ]
+; CHECK-NEXT:    [[ADD9_LCSSA_2:%.*]] = phi i32 [ [[ADD9_2]], %[[FOR_INNER]] ]
+; CHECK-NEXT:    [[ADD9_LCSSA_3:%.*]] = phi i32 [ [[ADD9_3]], %[[FOR_INNER]] ]
+; CHECK-NEXT:    store i32 [[ADD9_LCSSA]], ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA0]]
+; CHECK-NEXT:    store i32 [[ADD9_LCSSA_1]], ptr [[ARRAYIDX_1]], align 4, !tbaa [[INT_TBAA0]]
+; CHECK-NEXT:    store i32 [[ADD9_LCSSA_2]], ptr [[ARRAYIDX_2]], align 4, !tbaa [[INT_TBAA0]]
+; CHECK-NEXT:    store i32 [[ADD9_LCSSA_3]], ptr [[ARRAYIDX_3]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[NITER_NCMP_3:%.*]] = icmp eq i32 [[NITER_NEXT_3]], [[UNROLL_ITER]]
-; CHECK-NEXT:    br i1 [[NITER_NCMP_3]], label [[FOR_CLEANUP_UNR_LCSSA_LOOPEXIT:%.*]], label [[FOR_OUTER]], !llvm.loop [[LOOP9:![0-9]+]]
-; CHECK:       for.cleanup.unr-lcssa.loopexit:
-; CHECK-NEXT:    [[I_UNR_PH:%.*]] = phi i32 [ [[ADD_3]], [[FOR_LATCH]] ]
-; CHECK-NEXT:    br label [[FOR_CLEANUP_UNR_LCSSA]]
-; CHECK:       for.cleanup.unr-lcssa:
-; CHECK-NEXT:    [[I_UNR:%.*]] = phi i32 [ 0, [[FOR_OUTEST]] ], [ [[I_UNR_PH]], [[FOR_CLEANUP_UNR_LCSSA_LOOPEXIT]] ]
+; CHECK-NEXT:    br i1 [[NITER_NCMP_3]], label %[[FOR_CLEANUP_UNR_LCSSA_LOOPEXIT:.*]], label %[[FOR_OUTER]], !llvm.loop [[LOOP9:![0-9]+]]
+; CHECK:       [[FOR_CLEANUP_UNR_LCSSA_LOOPEXIT]]:
+; CHECK-NEXT:    [[I_UNR_PH:%.*]] = phi i32 [ [[ADD_3]], %[[FOR_LATCH]] ]
+; CHECK-NEXT:    br label %[[FOR_CLEANUP_UNR_LCSSA]]
+; CHECK:       [[FOR_CLEANUP_UNR_LCSSA]]:
+; CHECK-NEXT:    [[I_UNR:%.*]] = phi i32 [ 0, %[[FOR_OUTEST]] ], [ [[I_UNR_PH]], %[[FOR_CLEANUP_UNR_LCSSA_LOOPEXIT]] ]
 ; CHECK-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i32 [[XTRAITER]], 0
-; CHECK-NEXT:    br i1 [[LCMP_MOD]], label [[FOR_OUTER_EPIL_PREHEADER:%.*]], label [[FOR_CLEANUP]]
-; CHECK:       for.outer.epil.preheader:
-; CHECK-NEXT:    br label [[FOR_OUTER_EPIL:%.*]]
-; CHECK:       for.outer.epil:
+; CHECK-NEXT:    br i1 [[LCMP_MOD]], label %[[FOR_OUTER_EPIL_PREHEADER:.*]], label %[[FOR_CLEANUP]]
+; CHECK:       [[FOR_OUTER_EPIL_PREHEADER]]:
+; CHECK-NEXT:    br label %[[FOR_OUTER_EPIL:.*]]
+; CHECK:       [[FOR_OUTER_EPIL]]:
 ; CHECK-NEXT:    [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[I_UNR]]
-; CHECK-NEXT:    store i32 0, ptr [[ARRAYIDX_EPIL]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    store i32 0, ptr [[ARRAYIDX_EPIL]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[ADD_EPIL:%.*]] = add nuw i32 [[I_UNR]], 1
 ; CHECK-NEXT:    [[ARRAYIDX6_EPIL:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD_EPIL]]
-; CHECK-NEXT:    store i32 2, ptr [[ARRAYIDX6_EPIL]], align 4, !tbaa [[TBAA0]]
-; CHECK-NEXT:    br label [[FOR_INNER_EPIL:%.*]]
-; CHECK:       for.inner.epil:
-; CHECK-NEXT:    [[SUM_EPIL:%.*]] = phi i32 [ 0, [[FOR_OUTER_EPIL]] ], [ [[ADD9_EPIL:%.*]], [[FOR_INNER_EPIL]] ]
-; CHECK-NEXT:    [[J_EPIL:%.*]] = phi i32 [ 0, [[FOR_OUTER_EPIL]] ], [ [[ADD10_EPIL:%.*]], [[FOR_INNER_EPIL]] ]
+; CHECK-NEXT:    store i32 2, ptr [[ARRAYIDX6_EPIL]], align 4, !tbaa [[INT_TBAA0]]
+; CHECK-NEXT:    br label %[[FOR_INNER_EPIL:.*]]
+; CHECK:       [[FOR_INNER_EPIL]]:
+; CHECK-NEXT:    [[SUM_EPIL:%.*]] = phi i32 [ 0, %[[FOR_OUTER_EPIL]] ], [ [[ADD9_EPIL:%.*]], %[[FOR_INNER_EPIL]] ]
+; CHECK-NEXT:    [[J_EPIL:%.*]] = phi i32 [ 0, %[[FOR_OUTER_EPIL]] ], [ [[ADD10_EPIL:%.*]], %[[FOR_INNER_EPIL]] ]
 ; CHECK-NEXT:    [[ARRAYIDX11_EPIL:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J_EPIL]]
-; CHECK-NEXT:    [[L1_EPIL:%.*]] = load i32, ptr [[ARRAYIDX11_EPIL]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    [[L1_EPIL:%.*]] = load i32, ptr [[ARRAYIDX11_EPIL]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[ADD9_EPIL]] = add i32 [[L1_EPIL]], [[SUM_EPIL]]
 ; CHECK-NEXT:    [[ADD10_EPIL]] = add nuw i32 [[J_EPIL]], 1
 ; CHECK-NEXT:    [[EXITCOND_EPIL:%.*]] = icmp eq i32 [[ADD10_EPIL]], [[E]]
-; CHECK-NEXT:    br i1 [[EXITCOND_EPIL]], label [[FOR_LATCH_EPIL:%.*]], label [[FOR_INNER_EPIL]]
-; CHECK:       for.latch.epil:
-; CHECK-NEXT:    [[ADD9_LCSSA_EPIL:%.*]] = phi i32 [ [[ADD9_EPIL]], [[FOR_INNER_EPIL]] ]
-; CHECK-NEXT:    store i32 [[ADD9_LCSSA_EPIL]], ptr [[ARRAYIDX_EPIL]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    br i1 [[EXITCOND_EPIL]], label %[[FOR_LATCH_EPIL:.*]], label %[[FOR_INNER_EPIL]]
+; CHECK:       [[FOR_LATCH_EPIL]]:
+; CHECK-NEXT:    [[ADD9_LCSSA_EPIL:%.*]] = phi i32 [ [[ADD9_EPIL]], %[[FOR_INNER_EPIL]] ]
+; CHECK-NEXT:    store i32 [[ADD9_LCSSA_EPIL]], ptr [[ARRAYIDX_EPIL]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[EPIL_ITER_CMP:%.*]] = icmp ne i32 1, [[XTRAITER]]
-; CHECK-NEXT:    br i1 [[EPIL_ITER_CMP]], label [[FOR_OUTER_EPIL_1:%.*]], label [[FOR_CLEANUP_EPILOG_LCSSA:%.*]]
-; CHECK:       for.outer.epil.1:
+; CHECK-NEXT:    br i1 [[EPIL_ITER_CMP]], label %[[FOR_OUTER_EPIL_1:.*]], label %[[FOR_CLEANUP_EPILOG_LCSSA:.*]]
+; CHECK:       [[FOR_OUTER_EPIL_1]]:
 ; CHECK-NEXT:    [[ARRAYIDX_EPIL_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD_EPIL]]
-; CHECK-NEXT:    store i32 0, ptr [[ARRAYIDX_EPIL_1]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    store i32 0, ptr [[ARRAYIDX_EPIL_1]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[ADD_EPIL_1:%.*]] = add nuw i32 [[I_UNR]], 2
 ; CHECK-NEXT:    [[ARRAYIDX6_EPIL_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD_EPIL_1]]
-; CHECK-NEXT:    store i32 2, ptr [[ARRAYIDX6_EPIL_1]], align 4, !tbaa [[TBAA0]]
-; CHECK-NEXT:    br label [[FOR_INNER_EPIL_1:%.*]]
-; CHECK:       for.inner.epil.1:
-; CHECK-NEXT:    [[SUM_EPIL_1:%.*]] = phi i32 [ 0, [[FOR_OUTER_EPIL_1]] ], [ [[ADD9_EPIL_1:%.*]], [[FOR_INNER_EPIL_1]] ]
-; CHECK-NEXT:    [[J_EPIL_1:%.*]] = phi i32 [ 0, [[FOR_OUTER_EPIL_1]] ], [ [[ADD10_EPIL_1:%.*]], [[FOR_INNER_EPIL_1]] ]
+; CHECK-NEXT:    store i32 2, ptr [[ARRAYIDX6_EPIL_1]], align 4, !tbaa [[INT_TBAA0]]
+; CHECK-NEXT:    br label %[[FOR_INNER_EPIL_1:.*]]
+; CHECK:       [[FOR_INNER_EPIL_1]]:
+; CHECK-NEXT:    [[SUM_EPIL_1:%.*]] = phi i32 [ 0, %[[FOR_OUTER_EPIL_1]] ], [ [[ADD9_EPIL_1:%.*]], %[[FOR_INNER_EPIL_1]] ]
+; CHECK-NEXT:    [[J_EPIL_1:%.*]] = phi i32 [ 0, %[[FOR_OUTER_EPIL_1]] ], [ [[ADD10_EPIL_1:%.*]], %[[FOR_INNER_EPIL_1]] ]
 ; CHECK-NEXT:    [[ARRAYIDX11_EPIL_1:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J_EPIL_1]]
-; CHECK-NEXT:    [[L1_EPIL_1:%.*]] = load i32, ptr [[ARRAYIDX11_EPIL_1]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    [[L1_EPIL_1:%.*]] = load i32, ptr [[ARRAYIDX11_EPIL_1]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[ADD9_EPIL_1]] = add i32 [[L1_EPIL_1]], [[SUM_EPIL_1]]
 ; CHECK-NEXT:    [[ADD10_EPIL_1]] = add nuw i32 [[J_EPIL_1]], 1
 ; CHECK-NEXT:    [[EXITCOND_EPIL_1:%.*]] = icmp eq i32 [[ADD10_EPIL_1]], [[E]]
-; CHECK-NEXT:    br i1 [[EXITCOND_EPIL_1]], label [[FOR_LATCH_EPIL_1:%.*]], label [[FOR_INNER_EPIL_1]]
-; CHECK:       for.latch.epil.1:
-; CHECK-NEXT:    [[ADD9_LCSSA_EPIL_1:%.*]] = phi i32 [ [[ADD9_EPIL_1]], [[FOR_INNER_EPIL_1]] ]
-; CHECK-NEXT:    store i32 [[ADD9_LCSSA_EPIL_1]], ptr [[ARRAYIDX_EPIL_1]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    br i1 [[EXITCOND_EPIL_1]], label %[[FOR_LATCH_EPIL_1:.*]], label %[[FOR_INNER_EPIL_1]]
+; CHECK:       [[FOR_LATCH_EPIL_1]]:
+; CHECK-NEXT:    [[ADD9_LCSSA_EPIL_1:%.*]] = phi i32 [ [[ADD9_EPIL_1]], %[[FOR_INNER_EPIL_1]] ]
+; CHECK-NEXT:    store i32 [[ADD9_LCSSA_EPIL_1]], ptr [[ARRAYIDX_EPIL_1]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[EPIL_ITER_CMP_1:%.*]] = icmp ne i32 2, [[XTRAITER]]
-; CHECK-NEXT:    br i1 [[EPIL_ITER_CMP_1]], label [[FOR_OUTER_EPIL_2:%.*]], label [[FOR_CLEANUP_EPILOG_LCSSA]]
-; CHECK:       for.outer.epil.2:
+; CHECK-NEXT:    br i1 [[EPIL_ITER_CMP_1]], label %[[FOR_OUTER_EPIL_2:.*]], label %[[FOR_CLEANUP_EPILOG_LCSSA]]
+; CHECK:       [[FOR_OUTER_EPIL_2]]:
 ; CHECK-NEXT:    [[ARRAYIDX_EPIL_2:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD_EPIL_1]]
-; CHECK-NEXT:    store i32 0, ptr [[ARRAYIDX_EPIL_2]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    store i32 0, ptr [[ARRAYIDX_EPIL_2]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[ADD_EPIL_2:%.*]] = add nuw i32 [[I_UNR]], 3
 ; CHECK-NEXT:    [[ARRAYIDX6_EPIL_2:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD_EPIL_2]]
-; CHECK-NEXT:    store i32 2, ptr [[ARRAYIDX6_EPIL_2]], align 4, !tbaa [[TBAA0]]
-; CHECK-NEXT:    br label [[FOR_INNER_EPIL_2:%.*]]
-; CHECK:       for.inner.epil.2:
-; CHECK-NEXT:    [[SUM_EPIL_2:%.*]] = phi i32 [ 0, [[FOR_OUTER_EPIL_2]] ], [ [[ADD9_EPIL_2:%.*]], [[FOR_INNER_EPIL_2]] ]
-; CHECK-NEXT:    [[J_EPIL_2:%.*]] = phi i32 [ 0, [[FOR_OUTER_EPIL_2]] ], [ [[ADD10_EPIL_2:%.*]], [[FOR_INNER_EPIL_2]] ]
+; CHECK-NEXT:    store i32 2, ptr [[ARRAYIDX6_EPIL_2]], align 4, !tbaa [[INT_TBAA0]]
+; CHECK-NEXT:    br label %[[FOR_INNER_EPIL_2:.*]]
+; CHECK:       [[FOR_INNER_EPIL_2]]:
+; CHECK-NEXT:    [[SUM_EPIL_2:%.*]] = phi i32 [ 0, %[[FOR_OUTER_EPIL_2]] ], [ [[ADD9_EPIL_2:%.*]], %[[FOR_INNER_EPIL_2]] ]
+; CHECK-NEXT:    [[J_EPIL_2:%.*]] = phi i32 [ 0, %[[FOR_OUTER_EPIL_2]] ], [ [[ADD10_EPIL_2:%.*]], %[[FOR_INNER_EPIL_2]] ]
 ; CHECK-NEXT:    [[ARRAYIDX11_EPIL_2:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J_EPIL_2]]
-; CHECK-NEXT:    [[L1_EPIL_2:%.*]] = load i32, ptr [[ARRAYIDX11_EPIL_2]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    [[L1_EPIL_2:%.*]] = load i32, ptr [[ARRAYIDX11_EPIL_2]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[ADD9_EPIL_2]] = add i32 [[L1_EPIL_2]], [[SUM_EPIL_2]]
 ; CHECK-NEXT:    [[ADD10_EPIL_2]] = add nuw i32 [[J_EPIL_2]], 1
 ; CHECK-NEXT:    [[EXITCOND_EPIL_2:%.*]] = icmp eq i32 [[ADD10_EPIL_2]], [[E]]
-; CHECK-NEXT:    br i1 [[EXITCOND_EPIL_2]], label [[FOR_LATCH_EPIL_2:%.*]], label [[FOR_INNER_EPIL_2]]
-; CHECK:       for.latch.epil.2:
-; CHECK-NEXT:    [[ADD9_LCSSA_EPIL_2:%.*]] = phi i32 [ [[ADD9_EPIL_2]], [[FOR_INNER_EPIL_2]] ]
-; CHECK-NEXT:    store i32 [[ADD9_LCSSA_EPIL_2]], ptr [[ARRAYIDX_EPIL_2]], align 4, !tbaa [[TBAA0]]
-; CHECK-NEXT:    br label [[FOR_CLEANUP_EPILOG_LCSSA]]
-; CHECK:       for.cleanup.epilog-lcssa:
-; CHECK-NEXT:    br label [[FOR_CLEANUP]]
-; CHECK:       for.cleanup:
+; CHECK-NEXT:    br i1 [[EXITCOND_EPIL_2]], label %[[FOR_LATCH_EPIL_2:.*]], label %[[FOR_INNER_EPIL_2]]
+; CHECK:       [[FOR_LATCH_EPIL_2]]:
+; CHECK-NEXT:    [[ADD9_LCSSA_EPIL_2:%.*]] = phi i32 [ [[ADD9_EPIL_2]], %[[FOR_INNER_EPIL_2]] ]
+; CHECK-NEXT:    store i32 [[ADD9_LCSSA_EPIL_2]], ptr [[ARRAYIDX_EPIL_2]], align 4, !tbaa [[INT_TBAA0]]
+; CHECK-NEXT:    br label %[[FOR_CLEANUP_EPILOG_LCSSA]]
+; CHECK:       [[FOR_CLEANUP_EPILOG_LCSSA]]:
+; CHECK-NEXT:    br label %[[FOR_CLEANUP]]
+; CHECK:       [[FOR_CLEANUP]]:
 ; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[X_038]], 1
 ; CHECK-NEXT:    [[EXITCOND41:%.*]] = icmp eq i32 [[INC]], 5
-; CHECK-NEXT:    br i1 [[EXITCOND41]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_OUTEST]]
-; CHECK:       for.end.loopexit:
-; CHECK-NEXT:    br label [[FOR_END]]
-; CHECK:       for.end:
+; CHECK-NEXT:    br i1 [[EXITCOND41]], label %[[FOR_END_LOOPEXIT:.*]], label %[[FOR_OUTEST]]
+; CHECK:       [[FOR_END_LOOPEXIT]]:
+; CHECK-NEXT:    br label %[[FOR_END]]
+; CHECK:       [[FOR_END]]:
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -1099,144 +1105,145 @@ for.end:
 
 ; Same as test1 with tbaa, not noalias
 define void @test9(i32 %I, i32 %E, ptr nocapture %A, ptr nocapture readonly %B) #0 {
-; CHECK-LABEL: @test9(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[E:%.*]], 0
-; CHECK-NEXT:    [[CMPJ:%.*]] = icmp ne i32 [[I:%.*]], 0
+; CHECK-LABEL: define void @test9(
+; CHECK-SAME: i32 [[I:%.*]], i32 [[E:%.*]], ptr captures(none) [[A:%.*]], ptr readonly captures(none) [[B:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[E]], 0
+; CHECK-NEXT:    [[CMPJ:%.*]] = icmp ne i32 [[I]], 0
 ; CHECK-NEXT:    [[OR_COND:%.*]] = and i1 [[CMP]], [[CMPJ]]
-; CHECK-NEXT:    br i1 [[OR_COND]], label [[FOR_OUTER_PREHEADER:%.*]], label [[FOR_END:%.*]]
-; CHECK:       for.outer.preheader:
+; CHECK-NEXT:    br i1 [[OR_COND]], label %[[FOR_OUTER_PREHEADER:.*]], label %[[FOR_END:.*]]
+; CHECK:       [[FOR_OUTER_PREHEADER]]:
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[I]], -1
 ; CHECK-NEXT:    [[XTRAITER:%.*]] = and i32 [[I]], 3
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[TMP0]], 3
-; CHECK-NEXT:    br i1 [[TMP1]], label [[FOR_END_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_OUTER_PREHEADER_NEW:%.*]]
-; CHECK:       for.outer.preheader.new:
+; CHECK-NEXT:    br i1 [[TMP1]], label %[[FOR_END_LOOPEXIT_UNR_LCSSA:.*]], label %[[FOR_OUTER_PREHEADER_NEW:.*]]
+; CHECK:       [[FOR_OUTER_PREHEADER_NEW]]:
 ; CHECK-NEXT:    [[UNROLL_ITER:%.*]] = sub i32 [[I]], [[XTRAITER]]
-; CHECK-NEXT:    br label [[FOR_OUTER:%.*]]
-; CHECK:       for.outer:
-; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[ADD8_3:%.*]], [[FOR_LATCH:%.*]] ], [ 0, [[FOR_OUTER_PREHEADER_NEW]] ]
-; CHECK-NEXT:    [[NITER:%.*]] = phi i32 [ 0, [[FOR_OUTER_PREHEADER_NEW]] ], [ [[NITER_NEXT_3:%.*]], [[FOR_LATCH]] ]
+; CHECK-NEXT:    br label %[[FOR_OUTER:.*]]
+; CHECK:       [[FOR_OUTER]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[ADD8_3:%.*]], %[[FOR_LATCH:.*]] ], [ 0, %[[FOR_OUTER_PREHEADER_NEW]] ]
+; CHECK-NEXT:    [[NITER:%.*]] = phi i32 [ 0, %[[FOR_OUTER_PREHEADER_NEW]] ], [ [[NITER_NEXT_3:%.*]], %[[FOR_LATCH]] ]
 ; CHECK-NEXT:    [[ADD8:%.*]] = add nuw nsw i32 [[I]], 1
 ; CHECK-NEXT:    [[ADD8_1:%.*]] = add nuw nsw i32 [[I]], 2
 ; CHECK-NEXT:    [[ADD8_2:%.*]] = add nuw nsw i32 [[I]], 3
 ; CHECK-NEXT:    [[ADD8_3]] = add nuw i32 [[I]], 4
 ; CHECK-NEXT:    [[NITER_NEXT_3]] = add i32 [[NITER]], 4
-; CHECK-NEXT:    br label [[FOR_INNER:%.*]]
-; CHECK:       for.inner:
-; CHECK-NEXT:    [[J:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[INC:%.*]], [[FOR_INNER]] ]
-; CHECK-NEXT:    [[SUM:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[ADD:%.*]], [[FOR_INNER]] ]
-; CHECK-NEXT:    [[J_1:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[INC_1:%.*]], [[FOR_INNER]] ]
-; CHECK-NEXT:    [[SUM_1:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[ADD_1:%.*]], [[FOR_INNER]] ]
-; CHECK-NEXT:    [[J_2:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[INC_2:%.*]], [[FOR_INNER]] ]
-; CHECK-NEXT:    [[SUM_2:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[ADD_2:%.*]], [[FOR_INNER]] ]
-; CHECK-NEXT:    [[J_3:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[INC_3:%.*]], [[FOR_INNER]] ]
-; CHECK-NEXT:    [[SUM_3:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[ADD_3:%.*]], [[FOR_INNER]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[B:%.*]], i32 [[J]]
-; CHECK-NEXT:    [[TMP2:%.*]] = load i16, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA10:![0-9]+]]
+; CHECK-NEXT:    br label %[[FOR_INNER:.*]]
+; CHECK:       [[FOR_INNER]]:
+; CHECK-NEXT:    [[J:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[INC:%.*]], %[[FOR_INNER]] ]
+; CHECK-NEXT:    [[SUM:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[ADD:%.*]], %[[FOR_INNER]] ]
+; CHECK-NEXT:    [[J_1:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[INC_1:%.*]], %[[FOR_INNER]] ]
+; CHECK-NEXT:    [[SUM_1:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[ADD_1:%.*]], %[[FOR_INNER]] ]
+; CHECK-NEXT:    [[J_2:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[INC_2:%.*]], %[[FOR_INNER]] ]
+; CHECK-NEXT:    [[SUM_2:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[ADD_2:%.*]], %[[FOR_INNER]] ]
+; CHECK-NEXT:    [[J_3:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[INC_3:%.*]], %[[FOR_INNER]] ]
+; CHECK-NEXT:    [[SUM_3:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[ADD_3:%.*]], %[[FOR_INNER]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[J]]
+; CHECK-NEXT:    [[TMP2:%.*]] = load i16, ptr [[ARRAYIDX]], align 4, !tbaa [[SHORT_TBAA10:![0-9]+]]
 ; CHECK-NEXT:    [[SEXT:%.*]] = sext i16 [[TMP2]] to i32
 ; CHECK-NEXT:    [[ADD]] = add i32 [[SEXT]], [[SUM]]
 ; CHECK-NEXT:    [[INC]] = add nuw i32 [[J]], 1
 ; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[J_1]]
-; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr [[ARRAYIDX_1]], align 4, !tbaa [[TBAA10]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr [[ARRAYIDX_1]], align 4, !tbaa [[SHORT_TBAA10]]
 ; CHECK-NEXT:    [[SEXT_1:%.*]] = sext i16 [[TMP3]] to i32
 ; CHECK-NEXT:    [[ADD_1]] = add i32 [[SEXT_1]], [[SUM_1]]
 ; CHECK-NEXT:    [[INC_1]] = add nuw i32 [[J_1]], 1
 ; CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[J_2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = load i16, ptr [[ARRAYIDX_2]], align 4, !tbaa [[TBAA10]]
+; CHECK-NEXT:    [[TMP4:%.*]] = load i16, ptr [[ARRAYIDX_2]], align 4, !tbaa [[SHORT_TBAA10]]
 ; CHECK-NEXT:    [[SEXT_2:%.*]] = sext i16 [[TMP4]] to i32
 ; CHECK-NEXT:    [[ADD_2]] = add i32 [[SEXT_2]], [[SUM_2]]
 ; CHECK-NEXT:    [[INC_2]] = add nuw i32 [[J_2]], 1
 ; CHECK-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[J_3]]
-; CHECK-NEXT:    [[TMP5:%.*]] = load i16, ptr [[ARRAYIDX_3]], align 4, !tbaa [[TBAA10]]
+; CHECK-NEXT:    [[TMP5:%.*]] = load i16, ptr [[ARRAYIDX_3]], align 4, !tbaa [[SHORT_TBAA10]]
 ; CHECK-NEXT:    [[SEXT_3:%.*]] = sext i16 [[TMP5]] to i32
 ; CHECK-NEXT:    [[ADD_3]] = add i32 [[SEXT_3]], [[SUM_3]]
 ; CHECK-NEXT:    [[INC_3]] = add nuw i32 [[J_3]], 1
 ; CHECK-NEXT:    [[EXITCOND_3:%.*]] = icmp eq i32 [[INC_3]], [[E]]
-; CHECK-NEXT:    br i1 [[EXITCOND_3]], label [[FOR_LATCH]], label [[FOR_INNER]]
-; CHECK:       for.latch:
-; CHECK-NEXT:    [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_INNER]] ]
-; CHECK-NEXT:    [[ADD_LCSSA_1:%.*]] = phi i32 [ [[ADD_1]], [[FOR_INNER]] ]
-; CHECK-NEXT:    [[ADD_LCSSA_2:%.*]] = phi i32 [ [[ADD_2]], [[FOR_INNER]] ]
-; CHECK-NEXT:    [[ADD_LCSSA_3:%.*]] = phi i32 [ [[ADD_3]], [[FOR_INNER]] ]
-; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[I]]
-; CHECK-NEXT:    store i32 [[ADD_LCSSA]], ptr [[ARRAYIDX6]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    br i1 [[EXITCOND_3]], label %[[FOR_LATCH]], label %[[FOR_INNER]]
+; CHECK:       [[FOR_LATCH]]:
+; CHECK-NEXT:    [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], %[[FOR_INNER]] ]
+; CHECK-NEXT:    [[ADD_LCSSA_1:%.*]] = phi i32 [ [[ADD_1]], %[[FOR_INNER]] ]
+; CHECK-NEXT:    [[ADD_LCSSA_2:%.*]] = phi i32 [ [[ADD_2]], %[[FOR_INNER]] ]
+; CHECK-NEXT:    [[ADD_LCSSA_3:%.*]] = phi i32 [ [[ADD_3]], %[[FOR_INNER]] ]
+; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[I]]
+; CHECK-NEXT:    store i32 [[ADD_LCSSA]], ptr [[ARRAYIDX6]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[ARRAYIDX6_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD8]]
-; CHECK-NEXT:    store i32 [[ADD_LCSSA_1]], ptr [[ARRAYIDX6_1]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    store i32 [[ADD_LCSSA_1]], ptr [[ARRAYIDX6_1]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[ARRAYIDX6_2:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD8_1]]
-; CHECK-NEXT:    store i32 [[ADD_LCSSA_2]], ptr [[ARRAYIDX6_2]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    store i32 [[ADD_LCSSA_2]], ptr [[ARRAYIDX6_2]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[ARRAYIDX6_3:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD8_2]]
-; CHECK-NEXT:    store i32 [[ADD_LCSSA_3]], ptr [[ARRAYIDX6_3]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    store i32 [[ADD_LCSSA_3]], ptr [[ARRAYIDX6_3]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[NITER_NCMP_3:%.*]] = icmp eq i32 [[NITER_NEXT_3]], [[UNROLL_ITER]]
-; CHECK-NEXT:    br i1 [[NITER_NCMP_3]], label [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT:%.*]], label [[FOR_OUTER]], !llvm.loop [[LOOP12:![0-9]+]]
-; CHECK:       for.end.loopexit.unr-lcssa.loopexit:
-; CHECK-NEXT:    [[I_UNR_PH:%.*]] = phi i32 [ [[ADD8_3]], [[FOR_LATCH]] ]
-; CHECK-NEXT:    br label [[FOR_END_LOOPEXIT_UNR_LCSSA]]
-; CHECK:       for.end.loopexit.unr-lcssa:
-; CHECK-NEXT:    [[I_UNR:%.*]] = phi i32 [ 0, [[FOR_OUTER_PREHEADER]] ], [ [[I_UNR_PH]], [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ]
+; CHECK-NEXT:    br i1 [[NITER_NCMP_3]], label %[[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT:.*]], label %[[FOR_OUTER]], !llvm.loop [[LOOP12:![0-9]+]]
+; CHECK:       [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]]:
+; CHECK-NEXT:    [[I_UNR_PH:%.*]] = phi i32 [ [[ADD8_3]], %[[FOR_LATCH]] ]
+; CHECK-NEXT:    br label %[[FOR_END_LOOPEXIT_UNR_LCSSA]]
+; CHECK:       [[FOR_END_LOOPEXIT_UNR_LCSSA]]:
+; CHECK-NEXT:    [[I_UNR:%.*]] = phi i32 [ 0, %[[FOR_OUTER_PREHEADER]] ], [ [[I_UNR_PH]], %[[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ]
 ; CHECK-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i32 [[XTRAITER]], 0
-; CHECK-NEXT:    br i1 [[LCMP_MOD]], label [[FOR_OUTER_EPIL_PREHEADER:%.*]], label [[FOR_END_LOOPEXIT:%.*]]
-; CHECK:       for.outer.epil.preheader:
-; CHECK-NEXT:    br label [[FOR_OUTER_EPIL:%.*]]
-; CHECK:       for.outer.epil:
-; CHECK-NEXT:    br label [[FOR_INNER_EPIL:%.*]]
-; CHECK:       for.inner.epil:
-; CHECK-NEXT:    [[J_EPIL:%.*]] = phi i32 [ 0, [[FOR_OUTER_EPIL]] ], [ [[INC_EPIL:%.*]], [[FOR_INNER_EPIL]] ]
-; CHECK-NEXT:    [[SUM_EPIL:%.*]] = phi i32 [ 0, [[FOR_OUTER_EPIL]] ], [ [[ADD_EPIL:%.*]], [[FOR_INNER_EPIL]] ]
+; CHECK-NEXT:    br i1 [[LCMP_MOD]], label %[[FOR_OUTER_EPIL_PREHEADER:.*]], label %[[FOR_END_LOOPEXIT:.*]]
+; CHECK:       [[FOR_OUTER_EPIL_PREHEADER]]:
+; CHECK-NEXT:    br label %[[FOR_OUTER_EPIL:.*]]
+; CHECK:       [[FOR_OUTER_EPIL]]:
+; CHECK-NEXT:    br label %[[FOR_INNER_EPIL:.*]]
+; CHECK:       [[FOR_INNER_EPIL]]:
+; CHECK-NEXT:    [[J_EPIL:%.*]] = phi i32 [ 0, %[[FOR_OUTER_EPIL]] ], [ [[INC_EPIL:%.*]], %[[FOR_INNER_EPIL]] ]
+; CHECK-NEXT:    [[SUM_EPIL:%.*]] = phi i32 [ 0, %[[FOR_OUTER_EPIL]] ], [ [[ADD_EPIL:%.*]], %[[FOR_INNER_EPIL]] ]
 ; CHECK-NEXT:    [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[J_EPIL]]
-; CHECK-NEXT:    [[TMP6:%.*]] = load i16, ptr [[ARRAYIDX_EPIL]], align 4, !tbaa [[TBAA10]]
+; CHECK-NEXT:    [[TMP6:%.*]] = load i16, ptr [[ARRAYIDX_EPIL]], align 4, !tbaa [[SHORT_TBAA10]]
 ; CHECK-NEXT:    [[SEXT_EPIL:%.*]] = sext i16 [[TMP6]] to i32
 ; CHECK-NEXT:    [[ADD_EPIL]] = add i32 [[SEXT_EPIL]], [[SUM_EPIL]]
 ; CHECK-NEXT:    [[INC_EPIL]] = add nuw i32 [[J_EPIL]], 1
 ; CHECK-NEXT:    [[EXITCOND_EPIL:%.*]] = icmp eq i32 [[INC_EPIL]], [[E]]
-; CHECK-NEXT:    br i1 [[EXITCOND_EPIL]], label [[FOR_LATCH_EPIL:%.*]], label [[FOR_INNER_EPIL]]
-; CHECK:       for.latch.epil:
-; CHECK-NEXT:    [[ADD_LCSSA_EPIL:%.*]] = phi i32 [ [[ADD_EPIL]], [[FOR_INNER_EPIL]] ]
+; CHECK-NEXT:    br i1 [[EXITCOND_EPIL]], label %[[FOR_LATCH_EPIL:.*]], label %[[FOR_INNER_EPIL]]
+; CHECK:       [[FOR_LATCH_EPIL]]:
+; CHECK-NEXT:    [[ADD_LCSSA_EPIL:%.*]] = phi i32 [ [[ADD_EPIL]], %[[FOR_INNER_EPIL]] ]
 ; CHECK-NEXT:    [[ARRAYIDX6_EPIL:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[I_UNR]]
-; CHECK-NEXT:    store i32 [[ADD_LCSSA_EPIL]], ptr [[ARRAYIDX6_EPIL]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    store i32 [[ADD_LCSSA_EPIL]], ptr [[ARRAYIDX6_EPIL]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[ADD8_EPIL:%.*]] = add nuw i32 [[I_UNR]], 1
 ; CHECK-NEXT:    [[EPIL_ITER_CMP:%.*]] = icmp ne i32 1, [[XTRAITER]]
-; CHECK-NEXT:    br i1 [[EPIL_ITER_CMP]], label [[FOR_OUTER_EPIL_1:%.*]], label [[FOR_END_LOOPEXIT_EPILOG_LCSSA:%.*]]
-; CHECK:       for.outer.epil.1:
-; CHECK-NEXT:    br label [[FOR_INNER_EPIL_1:%.*]]
-; CHECK:       for.inner.epil.1:
-; CHECK-NEXT:    [[J_EPIL_1:%.*]] = phi i32 [ 0, [[FOR_OUTER_EPIL_1]] ], [ [[INC_EPIL_1:%.*]], [[FOR_INNER_EPIL_1]] ]
-; CHECK-NEXT:    [[SUM_EPIL_1:%.*]] = phi i32 [ 0, [[FOR_OUTER_EPIL_1]] ], [ [[ADD_EPIL_1:%.*]], [[FOR_INNER_EPIL_1]] ]
+; CHECK-NEXT:    br i1 [[EPIL_ITER_CMP]], label %[[FOR_OUTER_EPIL_1:.*]], label %[[FOR_END_LOOPEXIT_EPILOG_LCSSA:.*]]
+; CHECK:       [[FOR_OUTER_EPIL_1]]:
+; CHECK-NEXT:    br label %[[FOR_INNER_EPIL_1:.*]]
+; CHECK:       [[FOR_INNER_EPIL_1]]:
+; CHECK-NEXT:    [[J_EPIL_1:%.*]] = phi i32 [ 0, %[[FOR_OUTER_EPIL_1]] ], [ [[INC_EPIL_1:%.*]], %[[FOR_INNER_EPIL_1]] ]
+; CHECK-NEXT:    [[SUM_EPIL_1:%.*]] = phi i32 [ 0, %[[FOR_OUTER_EPIL_1]] ], [ [[ADD_EPIL_1:%.*]], %[[FOR_INNER_EPIL_1]] ]
 ; CHECK-NEXT:    [[ARRAYIDX_EPIL_1:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[J_EPIL_1]]
-; CHECK-NEXT:    [[TMP7:%.*]] = load i16, ptr [[ARRAYIDX_EPIL_1]], align 4, !tbaa [[TBAA10]]
+; CHECK-NEXT:    [[TMP7:%.*]] = load i16, ptr [[ARRAYIDX_EPIL_1]], align 4, !tbaa [[SHORT_TBAA10]]
 ; CHECK-NEXT:    [[SEXT_EPIL_1:%.*]] = sext i16 [[TMP7]] to i32
 ; CHECK-NEXT:    [[ADD_EPIL_1]] = add i32 [[SEXT_EPIL_1]], [[SUM_EPIL_1]]
 ; CHECK-NEXT:    [[INC_EPIL_1]] = add nuw i32 [[J_EPIL_1]], 1
 ; CHECK-NEXT:    [[EXITCOND_EPIL_1:%.*]] = icmp eq i32 [[INC_EPIL_1]], [[E]]
-; CHECK-NEXT:    br i1 [[EXITCOND_EPIL_1]], label [[FOR_LATCH_EPIL_1:%.*]], label [[FOR_INNER_EPIL_1]]
-; CHECK:       for.latch.epil.1:
-; CHECK-NEXT:    [[ADD_LCSSA_EPIL_1:%.*]] = phi i32 [ [[ADD_EPIL_1]], [[FOR_INNER_EPIL_1]] ]
+; CHECK-NEXT:    br i1 [[EXITCOND_EPIL_1]], label %[[FOR_LATCH_EPIL_1:.*]], label %[[FOR_INNER_EPIL_1]]
+; CHECK:       [[FOR_LATCH_EPIL_1]]:
+; CHECK-NEXT:    [[ADD_LCSSA_EPIL_1:%.*]] = phi i32 [ [[ADD_EPIL_1]], %[[FOR_INNER_EPIL_1]] ]
 ; CHECK-NEXT:    [[ARRAYIDX6_EPIL_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD8_EPIL]]
-; CHECK-NEXT:    store i32 [[ADD_LCSSA_EPIL_1]], ptr [[ARRAYIDX6_EPIL_1]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    store i32 [[ADD_LCSSA_EPIL_1]], ptr [[ARRAYIDX6_EPIL_1]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[ADD8_EPIL_1:%.*]] = add nuw i32 [[I_UNR]], 2
 ; CHECK-NEXT:    [[EPIL_ITER_CMP_1:%.*]] = icmp ne i32 2, [[XTRAITER]]
-; CHECK-NEXT:    br i1 [[EPIL_ITER_CMP_1]], label [[FOR_OUTER_EPIL_2:%.*]], label [[FOR_END_LOOPEXIT_EPILOG_LCSSA]]
-; CHECK:       for.outer.epil.2:
-; CHECK-NEXT:    br label [[FOR_INNER_EPIL_2:%.*]]
-; CHECK:       for.inner.epil.2:
-; CHECK-NEXT:    [[J_EPIL_2:%.*]] = phi i32 [ 0, [[FOR_OUTER_EPIL_2]] ], [ [[INC_EPIL_2:%.*]], [[FOR_INNER_EPIL_2]] ]
-; CHECK-NEXT:    [[SUM_EPIL_2:%.*]] = phi i32 [ 0, [[FOR_OUTER_EPIL_2]] ], [ [[ADD_EPIL_2:%.*]], [[FOR_INNER_EPIL_2]] ]
+; CHECK-NEXT:    br i1 [[EPIL_ITER_CMP_1]], label %[[FOR_OUTER_EPIL_2:.*]], label %[[FOR_END_LOOPEXIT_EPILOG_LCSSA]]
+; CHECK:       [[FOR_OUTER_EPIL_2]]:
+; CHECK-NEXT:    br label %[[FOR_INNER_EPIL_2:.*]]
+; CHECK:       [[FOR_INNER_EPIL_2]]:
+; CHECK-NEXT:    [[J_EPIL_2:%.*]] = phi i32 [ 0, %[[FOR_OUTER_EPIL_2]] ], [ [[INC_EPIL_2:%.*]], %[[FOR_INNER_EPIL_2]] ]
+; CHECK-NEXT:    [[SUM_EPIL_2:%.*]] = phi i32 [ 0, %[[FOR_OUTER_EPIL_2]] ], [ [[ADD_EPIL_2:%.*]], %[[FOR_INNER_EPIL_2]] ]
 ; CHECK-NEXT:    [[ARRAYIDX_EPIL_2:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[J_EPIL_2]]
-; CHECK-NEXT:    [[TMP8:%.*]] = load i16, ptr [[ARRAYIDX_EPIL_2]], align 4, !tbaa [[TBAA10]]
+; CHECK-NEXT:    [[TMP8:%.*]] = load i16, ptr [[ARRAYIDX_EPIL_2]], align 4, !tbaa [[SHORT_TBAA10]]
 ; CHECK-NEXT:    [[SEXT_EPIL_2:%.*]] = sext i16 [[TMP8]] to i32
 ; CHECK-NEXT:    [[ADD_EPIL_2]] = add i32 [[SEXT_EPIL_2]], [[SUM_EPIL_2]]
 ; CHECK-NEXT:    [[INC_EPIL_2]] = add nuw i32 [[J_EPIL_2]], 1
 ; CHECK-NEXT:    [[EXITCOND_EPIL_2:%.*]] = icmp eq i32 [[INC_EPIL_2]], [[E]]
-; CHECK-NEXT:    br i1 [[EXITCOND_EPIL_2]], label [[FOR_LATCH_EPIL_2:%.*]], label [[FOR_INNER_EPIL_2]]
-; CHECK:       for.latch.epil.2:
-; CHECK-NEXT:    [[ADD_LCSSA_EPIL_2:%.*]] = phi i32 [ [[ADD_EPIL_2]], [[FOR_INNER_EPIL_2]] ]
+; CHECK-NEXT:    br i1 [[EXITCOND_EPIL_2]], label %[[FOR_LATCH_EPIL_2:.*]], label %[[FOR_INNER_EPIL_2]]
+; CHECK:       [[FOR_LATCH_EPIL_2]]:
+; CHECK-NEXT:    [[ADD_LCSSA_EPIL_2:%.*]] = phi i32 [ [[ADD_EPIL_2]], %[[FOR_INNER_EPIL_2]] ]
 ; CHECK-NEXT:    [[ARRAYIDX6_EPIL_2:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD8_EPIL_1]]
-; CHECK-NEXT:    store i32 [[ADD_LCSSA_EPIL_2]], ptr [[ARRAYIDX6_EPIL_2]], align 4, !tbaa [[TBAA0]]
-; CHECK-NEXT:    br label [[FOR_END_LOOPEXIT_EPILOG_LCSSA]]
-; CHECK:       for.end.loopexit.epilog-lcssa:
-; CHECK-NEXT:    br label [[FOR_END_LOOPEXIT]]
-; CHECK:       for.end.loopexit:
-; CHECK-NEXT:    br label [[FOR_END]]
-; CHECK:       for.end:
+; CHECK-NEXT:    store i32 [[ADD_LCSSA_EPIL_2]], ptr [[ARRAYIDX6_EPIL_2]], align 4, !tbaa [[INT_TBAA0]]
+; CHECK-NEXT:    br label %[[FOR_END_LOOPEXIT_EPILOG_LCSSA]]
+; CHECK:       [[FOR_END_LOOPEXIT_EPILOG_LCSSA]]:
+; CHECK-NEXT:    br label %[[FOR_END_LOOPEXIT]]
+; CHECK:       [[FOR_END_LOOPEXIT]]:
+; CHECK-NEXT:    br label %[[FOR_END]]
+; CHECK:       [[FOR_END]]:
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -1284,114 +1291,115 @@ for.end:
 @g = common global %struct.a zeroinitializer, align 8
 @c = common global [1 x i8] zeroinitializer, align 1
 define signext i16 @test10(i32 %k) #0 {
-; CHECK-LABEL: @test10(
-; CHECK-NEXT:  entry:
+; CHECK-LABEL: define signext i16 @test10(
+; CHECK-SAME: i32 [[K:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr @c, align 1
 ; CHECK-NEXT:    [[TOBOOL9:%.*]] = icmp eq i8 [[TMP0]], 0
-; CHECK-NEXT:    [[TOBOOL13:%.*]] = icmp ne i32 [[K:%.*]], 0
-; CHECK-NEXT:    br i1 false, label [[FOR_END26_UNR_LCSSA:%.*]], label [[ENTRY_NEW:%.*]]
-; CHECK:       entry.new:
-; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
-; CHECK:       for.body:
-; CHECK-NEXT:    [[STOREMERGE82:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[INC25_3:%.*]], [[FOR_INC24:%.*]] ]
-; CHECK-NEXT:    [[NITER:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[NITER_NEXT_3:%.*]], [[FOR_INC24]] ]
+; CHECK-NEXT:    [[TOBOOL13:%.*]] = icmp ne i32 [[K]], 0
+; CHECK-NEXT:    br i1 false, label %[[FOR_END26_UNR_LCSSA:.*]], label %[[ENTRY_NEW:.*]]
+; CHECK:       [[ENTRY_NEW]]:
+; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
+; CHECK:       [[FOR_BODY]]:
+; CHECK-NEXT:    [[STOREMERGE82:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[INC25_3:%.*]], %[[FOR_INC24:.*]] ]
+; CHECK-NEXT:    [[NITER:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[NITER_NEXT_3:%.*]], %[[FOR_INC24]] ]
 ; CHECK-NEXT:    [[INC25_3]] = add nuw nsw i64 [[STOREMERGE82]], 4
 ; CHECK-NEXT:    [[NITER_NEXT_3]] = add nuw nsw i64 [[NITER]], 4
-; CHECK-NEXT:    br label [[FOR_BODY2:%.*]]
-; CHECK:       for.body2:
-; CHECK-NEXT:    [[STOREMERGE:%.*]] = phi i64 [ 4, [[FOR_BODY]] ], [ [[DEC:%.*]], [[FOR_INC21_3:%.*]] ]
-; CHECK-NEXT:    [[STOREMERGE_14:%.*]] = phi i64 [ 4, [[FOR_BODY]] ], [ [[DEC_1:%.*]], [[FOR_INC21_3]] ]
-; CHECK-NEXT:    [[STOREMERGE_25:%.*]] = phi i64 [ 4, [[FOR_BODY]] ], [ [[DEC_2:%.*]], [[FOR_INC21_3]] ]
-; CHECK-NEXT:    [[STOREMERGE_36:%.*]] = phi i64 [ 4, [[FOR_BODY]] ], [ [[DEC_3:%.*]], [[FOR_INC21_3]] ]
-; CHECK-NEXT:    br i1 [[TOBOOL9]], label [[FOR_BODY2_SPLIT:%.*]], label [[FOR_BODY2_SPLIT2:%.*]]
-; CHECK:       for.body2.split2:
-; CHECK-NEXT:    br i1 [[TOBOOL13]], label [[FOR_INC21:%.*]], label [[FOR_INC21_IF:%.*]]
-; CHECK:       for.body2.split:
-; CHECK-NEXT:    br i1 [[TOBOOL13]], label [[FOR_INC21]], label [[FOR_INC21_THEN:%.*]]
-; CHECK:       for.inc21.if:
-; CHECK-NEXT:    br label [[FOR_INC21]]
-; CHECK:       for.inc21.then:
-; CHECK-NEXT:    br label [[FOR_INC21]]
-; CHECK:       for.inc21:
+; CHECK-NEXT:    br label %[[FOR_BODY2:.*]]
+; CHECK:       [[FOR_BODY2]]:
+; CHECK-NEXT:    [[STOREMERGE:%.*]] = phi i64 [ 4, %[[FOR_BODY]] ], [ [[DEC:%.*]], %[[FOR_INC21_3:.*]] ]
+; CHECK-NEXT:    [[STOREMERGE_14:%.*]] = phi i64 [ 4, %[[FOR_BODY]] ], [ [[DEC_1:%.*]], %[[FOR_INC21_3]] ]
+; CHECK-NEXT:    [[STOREMERGE_25:%.*]] = phi i64 [ 4, %[[FOR_BODY]] ], [ [[DEC_2:%.*]], %[[FOR_INC21_3]] ]
+; CHECK-NEXT:    [[STOREMERGE_36:%.*]] = phi i64 [ 4, %[[FOR_BODY]] ], [ [[DEC_3:%.*]], %[[FOR_INC21_3]] ]
+; CHECK-NEXT:    br i1 [[TOBOOL9]], label %[[FOR_BODY2_SPLIT:.*]], label %[[FOR_BODY2_SPLIT2:.*]]
+; CHECK:       [[FOR_BODY2_SPLIT2]]:
+; CHECK-NEXT:    br i1 [[TOBOOL13]], label %[[FOR_INC21:.*]], label %[[FOR_INC21_IF:.*]]
+; CHECK:       [[FOR_BODY2_SPLIT]]:
+; CHECK-NEXT:    br i1 [[TOBOOL13]], label %[[FOR_INC21]], label %[[FOR_INC21_THEN:.*]]
+; CHECK:       [[FOR_INC21_IF]]:
+; CHECK-NEXT:    br label %[[FOR_INC21]]
+; CHECK:       [[FOR_INC21_THEN]]:
+; CHECK-NEXT:    br label %[[FOR_INC21]]
+; CHECK:       [[FOR_INC21]]:
 ; CHECK-NEXT:    [[DEC]] = add nsw i64 [[STOREMERGE]], -1
-; CHECK-NEXT:    br i1 [[TOBOOL9]], label [[FOR_BODY2_SPLIT_1:%.*]], label [[FOR_BODY2_SPLIT2_1:%.*]]
-; CHECK:       for.inc24:
-; CHECK-NEXT:    [[STOREMERGE_4_LCSSA_3:%.*]] = phi i64 [ [[STOREMERGE_4_3:%.*]], [[FOR_INC21_3]] ]
-; CHECK-NEXT:    br i1 false, label [[FOR_BODY]], label [[FOR_END26_UNR_LCSSA_LOOPEXIT:%.*]], !llvm.loop [[LOOP13:![0-9]+]]
-; CHECK:       for.end26.unr-lcssa.loopexit:
-; CHECK-NEXT:    [[DEC_LCSSA_LCSSA_PH_PH:%.*]] = phi i64 [ 0, [[FOR_INC24]] ]
-; CHECK-NEXT:    [[STOREMERGE_4_LCSSA_LCSSA_PH_PH:%.*]] = phi i64 [ [[STOREMERGE_4_LCSSA_3]], [[FOR_INC24]] ]
-; CHECK-NEXT:    [[STOREMERGE_5_LCSSA_LCSSA_PH_PH:%.*]] = phi i32 [ 0, [[FOR_INC24]] ]
-; CHECK-NEXT:    br label [[FOR_END26_UNR_LCSSA]]
-; CHECK:       for.end26.unr-lcssa:
-; CHECK-NEXT:    [[DEC_LCSSA_LCSSA_PH:%.*]] = phi i64 [ poison, [[ENTRY:%.*]] ], [ [[DEC_LCSSA_LCSSA_PH_PH]], [[FOR_END26_UNR_LCSSA_LOOPEXIT]] ]
-; CHECK-NEXT:    [[STOREMERGE_4_LCSSA_LCSSA_PH:%.*]] = phi i64 [ poison, [[ENTRY]] ], [ [[STOREMERGE_4_LCSSA_LCSSA_PH_PH]], [[FOR_END26_UNR_LCSSA_LOOPEXIT]] ]
-; CHECK-NEXT:    [[STOREMERGE_5_LCSSA_LCSSA_PH:%.*]] = phi i32 [ poison, [[ENTRY]] ], [ [[STOREMERGE_5_LCSSA_LCSSA_PH_PH]], [[FOR_END26_UNR_LCSSA_LOOPEXIT]] ]
-; CHECK-NEXT:    br i1 true, label [[FOR_BODY_EPIL_PREHEADER:%.*]], label [[FOR_END26:%.*]]
-; CHECK:       for.body.epil.preheader:
-; CHECK-NEXT:    br label [[FOR_BODY_EPIL:%.*]]
-; CHECK:       for.body.epil:
-; CHECK-NEXT:    br label [[FOR_BODY2_EPIL:%.*]]
-; CHECK:       for.body2.epil:
-; CHECK-NEXT:    [[STOREMERGE_EPIL:%.*]] = phi i64 [ 4, [[FOR_BODY_EPIL]] ], [ [[DEC_EPIL:%.*]], [[FOR_INC21_EPIL:%.*]] ]
-; CHECK-NEXT:    br i1 [[TOBOOL9]], label [[FOR_BODY2_SPLIT_EPIL:%.*]], label [[FOR_BODY2_SPLIT2_EPIL:%.*]]
-; CHECK:       for.body2.split2.epil:
-; CHECK-NEXT:    br i1 [[TOBOOL13]], label [[FOR_INC21_EPIL]], label [[FOR_INC21_IF_EPIL:%.*]]
-; CHECK:       for.inc21.if.epil:
-; CHECK-NEXT:    br label [[FOR_INC21_EPIL]]
-; CHECK:       for.body2.split.epil:
-; CHECK-NEXT:    br i1 [[TOBOOL13]], label [[FOR_INC21_EPIL]], label [[FOR_INC21_THEN_EPIL:%.*]]
-; CHECK:       for.inc21.then.epil:
-; CHECK-NEXT:    br label [[FOR_INC21_EPIL]]
-; CHECK:       for.inc21.epil:
-; CHECK-NEXT:    [[STOREMERGE_4_EPIL:%.*]] = phi i64 [ 0, [[FOR_INC21_IF_EPIL]] ], [ 0, [[FOR_INC21_THEN_EPIL]] ], [ 4, [[FOR_BODY2_SPLIT2_EPIL]] ], [ 4, [[FOR_BODY2_SPLIT_EPIL]] ]
+; CHECK-NEXT:    br i1 [[TOBOOL9]], label %[[FOR_BODY2_SPLIT_1:.*]], label %[[FOR_BODY2_SPLIT2_1:.*]]
+; CHECK:       [[FOR_INC24]]:
+; CHECK-NEXT:    [[STOREMERGE_4_LCSSA_3:%.*]] = phi i64 [ [[STOREMERGE_4_3:%.*]], %[[FOR_INC21_3]] ]
+; CHECK-NEXT:    br i1 false, label %[[FOR_BODY]], label %[[FOR_END26_UNR_LCSSA_LOOPEXIT:.*]], !llvm.loop [[LOOP13:![0-9]+]]
+; CHECK:       [[FOR_END26_UNR_LCSSA_LOOPEXIT]]:
+; CHECK-NEXT:    [[DEC_LCSSA_LCSSA_PH_PH:%.*]] = phi i64 [ 0, %[[FOR_INC24]] ]
+; CHECK-NEXT:    [[STOREMERGE_4_LCSSA_LCSSA_PH_PH:%.*]] = phi i64 [ [[STOREMERGE_4_LCSSA_3]], %[[FOR_INC24]] ]
+; CHECK-NEXT:    [[STOREMERGE_5_LCSSA_LCSSA_PH_PH:%.*]] = phi i32 [ 0, %[[FOR_INC24]] ]
+; CHECK-NEXT:    br label %[[FOR_END26_UNR_LCSSA]]
+; CHECK:       [[FOR_END26_UNR_LCSSA]]:
+; CHECK-NEXT:    [[DEC_LCSSA_LCSSA_PH:%.*]] = phi i64 [ poison, %[[ENTRY]] ], [ [[DEC_LCSSA_LCSSA_PH_PH]], %[[FOR_END26_UNR_LCSSA_LOOPEXIT]] ]
+; CHECK-NEXT:    [[STOREMERGE_4_LCSSA_LCSSA_PH:%.*]] = phi i64 [ poison, %[[ENTRY]] ], [ [[STOREMERGE_4_LCSSA_LCSSA_PH_PH]], %[[FOR_END26_UNR_LCSSA_LOOPEXIT]] ]
+; CHECK-NEXT:    [[STOREMERGE_5_LCSSA_LCSSA_PH:%.*]] = phi i32 [ poison, %[[ENTRY]] ], [ [[STOREMERGE_5_LCSSA_LCSSA_PH_PH]], %[[FOR_END26_UNR_LCSSA_LOOPEXIT]] ]
+; CHECK-NEXT:    br i1 true, label %[[FOR_BODY_EPIL_PREHEADER:.*]], label %[[FOR_END26:.*]]
+; CHECK:       [[FOR_BODY_EPIL_PREHEADER]]:
+; CHECK-NEXT:    br label %[[FOR_BODY_EPIL:.*]]
+; CHECK:       [[FOR_BODY_EPIL]]:
+; CHECK-NEXT:    br label %[[FOR_BODY2_EPIL:.*]]
+; CHECK:       [[FOR_BODY2_EPIL]]:
+; CHECK-NEXT:    [[STOREMERGE_EPIL:%.*]] = phi i64 [ 4, %[[FOR_BODY_EPIL]] ], [ [[DEC_EPIL:%.*]], %[[FOR_INC21_EPIL:.*]] ]
+; CHECK-NEXT:    br i1 [[TOBOOL9]], label %[[FOR_BODY2_SPLIT_EPIL:.*]], label %[[FOR_BODY2_SPLIT2_EPIL:.*]]
+; CHECK:       [[FOR_BODY2_SPLIT2_EPIL]]:
+; CHECK-NEXT:    br i1 [[TOBOOL13]], label %[[FOR_INC21_EPIL]], label %[[FOR_INC21_IF_EPIL:.*]]
+; CHECK:       [[FOR_INC21_IF_EPIL]]:
+; CHECK-NEXT:    br label %[[FOR_INC21_EPIL]]
+; CHECK:       [[FOR_BODY2_SPLIT_EPIL]]:
+; CHECK-NEXT:    br i1 [[TOBOOL13]], label %[[FOR_INC21_EPIL]], label %[[FOR_INC21_THEN_EPIL:.*]]
+; CHECK:       [[FOR_INC21_THEN_EPIL]]:
+; CHECK-NEXT:    br label %[[FOR_INC21_EPIL]]
+; CHECK:       [[FOR_INC21_EPIL]]:
+; CHECK-NEXT:    [[STOREMERGE_4_EPIL:%.*]] = phi i64 [ 0, %[[FOR_INC21_IF_EPIL]] ], [ 0, %[[FOR_INC21_THEN_EPIL]] ], [ 4, %[[FOR_BODY2_SPLIT2_EPIL]] ], [ 4, %[[FOR_BODY2_SPLIT_EPIL]] ]
 ; CHECK-NEXT:    [[DEC_EPIL]] = add nsw i64 [[STOREMERGE_EPIL]], -1
 ; CHECK-NEXT:    [[TOBOOL_EPIL:%.*]] = icmp eq i64 [[DEC_EPIL]], 0
-; CHECK-NEXT:    br i1 [[TOBOOL_EPIL]], label [[FOR_INC24_EPIL:%.*]], label [[FOR_BODY2_EPIL]]
-; CHECK:       for.inc24.epil:
-; CHECK-NEXT:    [[STOREMERGE_4_LCSSA_EPIL:%.*]] = phi i64 [ [[STOREMERGE_4_EPIL]], [[FOR_INC21_EPIL]] ]
-; CHECK-NEXT:    br label [[FOR_END26]]
-; CHECK:       for.end26:
-; CHECK-NEXT:    [[DEC_LCSSA_LCSSA:%.*]] = phi i64 [ [[DEC_LCSSA_LCSSA_PH]], [[FOR_END26_UNR_LCSSA]] ], [ 0, [[FOR_INC24_EPIL]] ]
-; CHECK-NEXT:    [[STOREMERGE_4_LCSSA_LCSSA:%.*]] = phi i64 [ [[STOREMERGE_4_LCSSA_LCSSA_PH]], [[FOR_END26_UNR_LCSSA]] ], [ [[STOREMERGE_4_LCSSA_EPIL]], [[FOR_INC24_EPIL]] ]
-; CHECK-NEXT:    [[STOREMERGE_5_LCSSA_LCSSA:%.*]] = phi i32 [ [[STOREMERGE_5_LCSSA_LCSSA_PH]], [[FOR_END26_UNR_LCSSA]] ], [ 0, [[FOR_INC24_EPIL]] ]
+; CHECK-NEXT:    br i1 [[TOBOOL_EPIL]], label %[[FOR_INC24_EPIL:.*]], label %[[FOR_BODY2_EPIL]]
+; CHECK:       [[FOR_INC24_EPIL]]:
+; CHECK-NEXT:    [[STOREMERGE_4_LCSSA_EPIL:%.*]] = phi i64 [ [[STOREMERGE_4_EPIL]], %[[FOR_INC21_EPIL]] ]
+; CHECK-NEXT:    br label %[[FOR_END26]]
+; CHECK:       [[FOR_END26]]:
+; CHECK-NEXT:    [[DEC_LCSSA_LCSSA:%.*]] = phi i64 [ [[DEC_LCSSA_LCSSA_PH]], %[[FOR_END26_UNR_LCSSA]] ], [ 0, %[[FOR_INC24_EPIL]] ]
+; CHECK-NEXT:    [[STOREMERGE_4_LCSSA_LCSSA:%.*]] = phi i64 [ [[STOREMERGE_4_LCSSA_LCSSA_PH]], %[[FOR_END26_UNR_LCSSA]] ], [ [[STOREMERGE_4_LCSSA_EPIL]], %[[FOR_INC24_EPIL]] ]
+; CHECK-NEXT:    [[STOREMERGE_5_LCSSA_LCSSA:%.*]] = phi i32 [ [[STOREMERGE_5_LCSSA_LCSSA_PH]], %[[FOR_END26_UNR_LCSSA]] ], [ 0, %[[FOR_INC24_EPIL]] ]
 ; CHECK-NEXT:    store i64 [[DEC_LCSSA_LCSSA]], ptr @g, align 8
 ; CHECK-NEXT:    ret i16 0
-; CHECK:       for.body2.split2.1:
-; CHECK-NEXT:    br i1 [[TOBOOL13]], label [[FOR_INC21_1:%.*]], label [[FOR_INC21_IF_1:%.*]]
-; CHECK:       for.inc21.if.1:
-; CHECK-NEXT:    br label [[FOR_INC21_1]]
-; CHECK:       for.body2.split.1:
-; CHECK-NEXT:    br i1 [[TOBOOL13]], label [[FOR_INC21_1]], label [[FOR_INC21_THEN_1:%.*]]
-; CHECK:       for.inc21.then.1:
-; CHECK-NEXT:    br label [[FOR_INC21_1]]
-; CHECK:       for.inc21.1:
+; CHECK:       [[FOR_BODY2_SPLIT2_1]]:
+; CHECK-NEXT:    br i1 [[TOBOOL13]], label %[[FOR_INC21_1:.*]], label %[[FOR_INC21_IF_1:.*]]
+; CHECK:       [[FOR_INC21_IF_1]]:
+; CHECK-NEXT:    br label %[[FOR_INC21_1]]
+; CHECK:       [[FOR_BODY2_SPLIT_1]]:
+; CHECK-NEXT:    br i1 [[TOBOOL13]], label %[[FOR_INC21_1]], label %[[FOR_INC21_THEN_1:.*]]
+; CHECK:       [[FOR_INC21_THEN_1]]:
+; CHECK-NEXT:    br label %[[FOR_INC21_1]]
+; CHECK:       [[FOR_INC21_1]]:
 ; CHECK-NEXT:    [[DEC_1]] = add nsw i64 [[STOREMERGE_14]], -1
-; CHECK-NEXT:    br i1 [[TOBOOL9]], label [[FOR_BODY2_SPLIT_2:%.*]], label [[FOR_BODY2_SPLIT2_2:%.*]]
-; CHECK:       for.body2.split2.2:
-; CHECK-NEXT:    br i1 [[TOBOOL13]], label [[FOR_INC21_2:%.*]], label [[FOR_INC21_IF_2:%.*]]
-; CHECK:       for.inc21.if.2:
-; CHECK-NEXT:    br label [[FOR_INC21_2]]
-; CHECK:       for.body2.split.2:
-; CHECK-NEXT:    br i1 [[TOBOOL13]], label [[FOR_INC21_2]], label [[FOR_INC21_THEN_2:%.*]]
-; CHECK:       for.inc21.then.2:
-; CHECK-NEXT:    br label [[FOR_INC21_2]]
-; CHECK:       for.inc21.2:
+; CHECK-NEXT:    br i1 [[TOBOOL9]], label %[[FOR_BODY2_SPLIT_2:.*]], label %[[FOR_BODY2_SPLIT2_2:.*]]
+; CHECK:       [[FOR_BODY2_SPLIT2_2]]:
+; CHECK-NEXT:    br i1 [[TOBOOL13]], label %[[FOR_INC21_2:.*]], label %[[FOR_INC21_IF_2:.*]]
+; CHECK:       [[FOR_INC21_IF_2]]:
+; CHECK-NEXT:    br label %[[FOR_INC21_2]]
+; CHECK:       [[FOR_BODY2_SPLIT_2]]:
+; CHECK-NEXT:    br i1 [[TOBOOL13]], label %[[FOR_INC21_2]], label %[[FOR_INC21_THEN_2:.*]]
+; CHECK:       [[FOR_INC21_THEN_2]]:
+; CHECK-NEXT:    br label %[[FOR_INC21_2]]
+; CHECK:       [[FOR_INC21_2]]:
 ; CHECK-NEXT:    [[DEC_2]] = add nsw i64 [[STOREMERGE_25]], -1
-; CHECK-NEXT:    br i1 [[TOBOOL9]], label [[FOR_BODY2_SPLIT_3:%.*]], label [[FOR_BODY2_SPLIT2_3:%.*]]
-; CHECK:       for.body2.split2.3:
-; CHECK-NEXT:    br i1 [[TOBOOL13]], label [[FOR_INC21_3]], label [[FOR_INC21_IF_3:%.*]]
-; CHECK:       for.inc21.if.3:
-; CHECK-NEXT:    br label [[FOR_INC21_3]]
-; CHECK:       for.body2.split.3:
-; CHECK-NEXT:    br i1 [[TOBOOL13]], label [[FOR_INC21_3]], label [[FOR_INC21_THEN_3:%.*]]
-; CHECK:       for.inc21.then.3:
-; CHECK-NEXT:    br label [[FOR_INC21_3]]
-; CHECK:       for.inc21.3:
-; CHECK-NEXT:    [[STOREMERGE_4_3]] = phi i64 [ 0, [[FOR_INC21_IF_3]] ], [ 0, [[FOR_INC21_THEN_3]] ], [ 4, [[FOR_BODY2_SPLIT2_3]] ], [ 4, [[FOR_BODY2_SPLIT_3]] ]
+; CHECK-NEXT:    br i1 [[TOBOOL9]], label %[[FOR_BODY2_SPLIT_3:.*]], label %[[FOR_BODY2_SPLIT2_3:.*]]
+; CHECK:       [[FOR_BODY2_SPLIT2_3]]:
+; CHECK-NEXT:    br i1 [[TOBOOL13]], label %[[FOR_INC21_3]], label %[[FOR_INC21_IF_3:.*]]
+; CHECK:       [[FOR_INC21_IF_3]]:
+; CHECK-NEXT:    br label %[[FOR_INC21_3]]
+; CHECK:       [[FOR_BODY2_SPLIT_3]]:
+; CHECK-NEXT:    br i1 [[TOBOOL13]], label %[[FOR_INC21_3]], label %[[FOR_INC21_THEN_3:.*]]
+; CHECK:       [[FOR_INC21_THEN_3]]:
+; CHECK-NEXT:    br label %[[FOR_INC21_3]]
+; CHECK:       [[FOR_INC21_3]]:
+; CHECK-NEXT:    [[STOREMERGE_4_3]] = phi i64 [ 0, %[[FOR_INC21_IF_3]] ], [ 0, %[[FOR_INC21_THEN_3]] ], [ 4, %[[FOR_BODY2_SPLIT2_3]] ], [ 4, %[[FOR_BODY2_SPLIT_3]] ]
 ; CHECK-NEXT:    [[DEC_3]] = add nsw i64 [[STOREMERGE_36]], -1
 ; CHECK-NEXT:    [[TOBOOL_3:%.*]] = icmp eq i64 [[DEC_3]], 0
-; CHECK-NEXT:    br i1 [[TOBOOL_3]], label [[FOR_INC24]], label [[FOR_BODY2]]
+; CHECK-NEXT:    br i1 [[TOBOOL_3]], label %[[FOR_INC24]], label %[[FOR_BODY2]]
 ;
 entry:
   %0 = load i8, ptr @c, align 1
@@ -1451,3 +1459,19 @@ for.end26:
 !8 = !{!"Simple C/C++ TBAA"}
 !9 = !{!10, !10, i64 0}
 !10 = !{!"short", !7, i64 0}
+;.
+; CHECK: [[INT_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
+; CHECK: [[META1]] = !{!"int", [[META2:![0-9]+]], i64 0}
+; CHECK: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0}
+; CHECK: [[META3]] = !{!"Simple C/C++ TBAA"}
+; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META5:![0-9]+]]}
+; CHECK: [[META5]] = !{!"llvm.loop.unroll.disable"}
+; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META5]]}
+; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META5]]}
+; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META5]]}
+; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META5]]}
+; CHECK: [[SHORT_TBAA10]] = !{[[META11:![0-9]+]], [[META11]], i64 0}
+; CHECK: [[META11]] = !{!"short", [[META2]], i64 0}
+; CHECK: [[LOOP12]] = distinct !{[[LOOP12]], [[META5]]}
+; CHECK: [[LOOP13]] = distinct !{[[LOOP13]], [[META5]]}
+;.
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-bf16.ll b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-bf16.ll
index 346f1cb..097f05d 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-bf16.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-bf16.ll
@@ -1,14 +1,11 @@
 ; REQUIRES: asserts
-; RUN: opt -passes=loop-vectorize -mtriple riscv64 -mattr=+v,+zvfbfmin -prefer-predicate-over-epilogue=scalar-epilogue -debug-only=loop-vectorize,vplan --disable-output -riscv-v-register-bit-width-lmul=1 -S < %s 2>&1 | FileCheck %s
-
-; TODO: -prefer-predicate-over-epilogue=scalar-epilogue was added to allow
-; unrolling. Calculate register pressure for all VPlans, not just unrolled ones,
-; and remove.
+; RUN: opt -passes=loop-vectorize -mtriple riscv64 -mattr=+v,+zvfbfmin -debug-only=loop-vectorize,vplan --disable-output -riscv-v-register-bit-width-lmul=1 -S < %s 2>&1 | FileCheck %s
 
 define void @add(ptr noalias nocapture readonly %src1, ptr noalias nocapture readonly %src2, i32 signext %size, ptr noalias nocapture writeonly %result) {
 ; CHECK-LABEL: add
-; CHECK:  LV(REG): Found max usage: 2 item
-; CHECK-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
+; CHECK:       LV(REG): VF = vscale x 4
+; CHECK-NEXT:  LV(REG): Found max usage: 2 item
+; CHECK-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 6 registers
 ; CHECK-NEXT:  LV(REG): RegisterClass: RISCV::VRRC, 4 registers
 ; CHECK-NEXT:  LV(REG): Found invariant usage: 1 item
 ; CHECK-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-f16.ll b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-f16.ll
index b25bc48..8bbfdf3 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-f16.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-f16.ll
@@ -1,20 +1,19 @@
 ; REQUIRES: asserts
-; RUN: opt -passes=loop-vectorize -mtriple riscv64 -mattr=+v,+zvfh -prefer-predicate-over-epilogue=scalar-epilogue -debug-only=loop-vectorize,vplan --disable-output -riscv-v-register-bit-width-lmul=1 -S < %s 2>&1 | FileCheck %s --check-prefix=ZVFH
-; RUN: opt -passes=loop-vectorize -mtriple riscv64 -mattr=+v,+zvfhmin -prefer-predicate-over-epilogue=scalar-epilogue -debug-only=loop-vectorize,vplan --disable-output -riscv-v-register-bit-width-lmul=1 -S < %s 2>&1 | FileCheck %s --check-prefix=ZVFHMIN
-
-; TODO: -prefer-predicate-over-epilogue=scalar-epilogue was added to allow
-; unrolling. Calculate register pressure for all VPlans, not just unrolled ones,
-; and remove.
+; RUN: opt -passes=loop-vectorize -mtriple riscv64 -mattr=+v,+zvfh -debug-only=loop-vectorize,vplan --disable-output -riscv-v-register-bit-width-lmul=1 -S < %s 2>&1 | FileCheck %s --check-prefix=ZVFH
+; RUN: opt -passes=loop-vectorize -mtriple riscv64 -mattr=+v,+zvfhmin -debug-only=loop-vectorize,vplan --disable-output -riscv-v-register-bit-width-lmul=1 -S < %s 2>&1 | FileCheck %s --check-prefix=ZVFHMIN
 
 define void @add(ptr noalias nocapture readonly %src1, ptr noalias nocapture readonly %src2, i32 signext %size, ptr noalias nocapture writeonly %result) {
-; CHECK-LABEL: add
-; ZVFH:  LV(REG): Found max usage: 2 item
-; ZVFH-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
+; ZVFH-LABEL: add
+; ZVFH:       LV(REG): VF = vscale x 4
+; ZVFH-NEXT:  LV(REG): Found max usage: 2 item
+; ZVFH-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 6 registers
 ; ZVFH-NEXT:  LV(REG): RegisterClass: RISCV::VRRC, 2 registers
 ; ZVFH-NEXT:  LV(REG): Found invariant usage: 1 item
 ; ZVFH-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
-; ZVFHMIN:  LV(REG): Found max usage: 2 item
-; ZVFHMIN-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
+; ZVFHMIN-LABEL: add
+; ZVFHMIN:       LV(REG): VF = vscale x 4
+; ZVFHMIN-NEXT:  LV(REG): Found max usage: 2 item
+; ZVFHMIN-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 6 registers
 ; ZVFHMIN-NEXT:  LV(REG): RegisterClass: RISCV::VRRC, 4 registers
 ; ZVFHMIN-NEXT:  LV(REG): Found invariant usage: 1 item
 ; ZVFHMIN-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-prune-vf.ll b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-prune-vf.ll
new file mode 100644
index 0000000..42f12ec
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-prune-vf.ll
@@ -0,0 +1,233 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5
+; RUN: opt -p loop-vectorize -mtriple riscv64 -mattr=+v -S < %s | FileCheck %s
+; RUN: opt -p loop-vectorize -mtriple riscv64 -mattr=+v -vectorizer-consider-reg-pressure=true -S < %s | FileCheck %s
+; RUN: opt -p loop-vectorize -mtriple riscv64 -mattr=+v -vectorizer-consider-reg-pressure=false -S < %s | FileCheck %s --check-prefix=NO-REG-PRESSURE-CHECK
+
+define void @f(ptr noalias %p0, ptr noalias %p1, ptr noalias %p2) {
+; CHECK-LABEL: define void @f(
+; CHECK-SAME: ptr noalias [[P0:%.*]], ptr noalias [[P1:%.*]], ptr noalias [[P2:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
+; CHECK-NEXT:    [[TMP1:%.*]] = mul <vscale x 4 x i64> [[TMP0]], splat (i64 2)
+; CHECK-NEXT:    [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP1]]
+; CHECK-NEXT:    [[TMP2:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
+; CHECK-NEXT:    [[TMP3:%.*]] = mul <vscale x 4 x i64> [[TMP2]], splat (i64 3)
+; CHECK-NEXT:    [[INDUCTION1:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP3]]
+; CHECK-NEXT:    [[TMP4:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
+; CHECK-NEXT:    [[TMP5:%.*]] = mul <vscale x 4 x i64> [[TMP4]], splat (i64 4)
+; CHECK-NEXT:    [[INDUCTION2:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP5]]
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_IND3:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION1]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT11:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_IND4:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION2]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT12:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[AVL:%.*]] = phi i64 [ 1025, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP6:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
+; CHECK-NEXT:    [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
+; CHECK-NEXT:    [[TMP8:%.*]] = mul i64 4, [[TMP7]]
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP8]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP6]] to i64
+; CHECK-NEXT:    [[TMP10:%.*]] = mul i64 3, [[TMP9]]
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP10]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT6:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT5]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP6]] to i64
+; CHECK-NEXT:    [[TMP12:%.*]] = mul i64 2, [[TMP11]]
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP12]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT8:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT7]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP13:%.*]] = sub <vscale x 4 x i64> [[VEC_IND]], splat (i64 1)
+; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr i8, ptr [[P0]], <vscale x 4 x i64> [[TMP13]]
+; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i8> @llvm.vp.gather.nxv4i8.nxv4p0(<vscale x 4 x ptr> align 1 [[TMP14]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP6]])
+; CHECK-NEXT:    [[TMP15:%.*]] = sub <vscale x 4 x i64> [[VEC_IND3]], splat (i64 1)
+; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr i8, ptr [[P0]], <vscale x 4 x i64> [[TMP15]]
+; CHECK-NEXT:    [[WIDE_MASKED_GATHER9:%.*]] = call <vscale x 4 x i8> @llvm.vp.gather.nxv4i8.nxv4p0(<vscale x 4 x ptr> align 1 [[TMP16]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP6]])
+; CHECK-NEXT:    [[TMP17:%.*]] = sub <vscale x 4 x i64> [[VEC_IND4]], splat (i64 1)
+; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr i8, ptr [[P0]], <vscale x 4 x i64> [[TMP17]]
+; CHECK-NEXT:    [[WIDE_MASKED_GATHER10:%.*]] = call <vscale x 4 x i8> @llvm.vp.gather.nxv4i8.nxv4p0(<vscale x 4 x ptr> align 1 [[TMP18]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP6]])
+; CHECK-NEXT:    [[TMP19:%.*]] = mul i64 [[EVL_BASED_IV]], 3
+; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr i8, ptr [[P1]], i64 [[TMP19]]
+; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr i8, ptr [[TMP20]], i8 0
+; CHECK-NEXT:    [[INTERLEAVE_EVL:%.*]] = mul nuw nsw i32 [[TMP6]], 3
+; CHECK-NEXT:    [[INTERLEAVED_VEC:%.*]] = call <vscale x 12 x i8> @llvm.vector.interleave3.nxv12i8(<vscale x 4 x i8> [[WIDE_MASKED_GATHER]], <vscale x 4 x i8> [[WIDE_MASKED_GATHER9]], <vscale x 4 x i8> [[WIDE_MASKED_GATHER10]])
+; CHECK-NEXT:    call void @llvm.vp.store.nxv12i8.p0(<vscale x 12 x i8> [[INTERLEAVED_VEC]], ptr align 1 [[TMP21]], <vscale x 12 x i1> splat (i1 true), i32 [[INTERLEAVE_EVL]])
+; CHECK-NEXT:    [[TMP22:%.*]] = zext i32 [[TMP6]] to i64
+; CHECK-NEXT:    [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP22]], [[EVL_BASED_IV]]
+; CHECK-NEXT:    [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP22]]
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT8]]
+; CHECK-NEXT:    [[VEC_IND_NEXT11]] = add <vscale x 4 x i64> [[VEC_IND3]], [[BROADCAST_SPLAT6]]
+; CHECK-NEXT:    [[VEC_IND_NEXT12]] = add <vscale x 4 x i64> [[VEC_IND4]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT:    [[TMP23:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
+; CHECK-NEXT:    br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK:       [[MIDDLE_BLOCK]]:
+; CHECK-NEXT:    br label %[[EXIT:.*]]
+; CHECK:       [[SCALAR_PH]]:
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[WIDE_IV_0:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[WIDE_IV_0_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[WIDE_IV_1:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[WIDE_IV_1_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[WIDE_IV_2:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[WIDE_IV_2_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[WIDE_IV_0_SUB:%.*]] = sub i64 [[WIDE_IV_0]], 1
+; CHECK-NEXT:    [[A_GEP0:%.*]] = getelementptr i8, ptr [[P0]], i64 [[WIDE_IV_0_SUB]]
+; CHECK-NEXT:    [[A:%.*]] = load i8, ptr [[A_GEP0]], align 1
+; CHECK-NEXT:    [[WIDE_IV_1_SUB:%.*]] = sub i64 [[WIDE_IV_1]], 1
+; CHECK-NEXT:    [[B_GEP0:%.*]] = getelementptr i8, ptr [[P0]], i64 [[WIDE_IV_1_SUB]]
+; CHECK-NEXT:    [[B:%.*]] = load i8, ptr [[B_GEP0]], align 1
+; CHECK-NEXT:    [[WIDE_IV_2_SUB:%.*]] = sub i64 [[WIDE_IV_2]], 1
+; CHECK-NEXT:    [[C_GEP0:%.*]] = getelementptr i8, ptr [[P0]], i64 [[WIDE_IV_2_SUB]]
+; CHECK-NEXT:    [[C:%.*]] = load i8, ptr [[C_GEP0]], align 1
+; CHECK-NEXT:    [[IV_MUL:%.*]] = mul i64 [[IV]], 3
+; CHECK-NEXT:    [[BASE:%.*]] = getelementptr i8, ptr [[P1]], i64 [[IV_MUL]]
+; CHECK-NEXT:    [[A_GEP1:%.*]] = getelementptr i8, ptr [[BASE]], i8 0
+; CHECK-NEXT:    store i8 [[A]], ptr [[A_GEP1]], align 1
+; CHECK-NEXT:    [[B_GEP1:%.*]] = getelementptr i8, ptr [[BASE]], i8 1
+; CHECK-NEXT:    store i8 [[B]], ptr [[B_GEP1]], align 1
+; CHECK-NEXT:    [[C_GEP1:%.*]] = getelementptr i8, ptr [[BASE]], i8 2
+; CHECK-NEXT:    store i8 [[C]], ptr [[C_GEP1]], align 1
+; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
+; CHECK-NEXT:    [[WIDE_IV_0_NEXT]] = add i64 [[WIDE_IV_0]], 2
+; CHECK-NEXT:    [[WIDE_IV_1_NEXT]] = add i64 [[WIDE_IV_1]], 3
+; CHECK-NEXT:    [[WIDE_IV_2_NEXT]] = add i64 [[WIDE_IV_2]], 4
+; CHECK-NEXT:    [[DONE:%.*]] = icmp eq i64 [[IV]], 1024
+; CHECK-NEXT:    br i1 [[DONE]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret void
+;
+; NO-REG-PRESSURE-CHECK-LABEL: define void @f(
+; NO-REG-PRESSURE-CHECK-SAME: ptr noalias [[P0:%.*]], ptr noalias [[P1:%.*]], ptr noalias [[P2:%.*]]) #[[ATTR0:[0-9]+]] {
+; NO-REG-PRESSURE-CHECK-NEXT:  [[ENTRY:.*:]]
+; NO-REG-PRESSURE-CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; NO-REG-PRESSURE-CHECK:       [[VECTOR_PH]]:
+; NO-REG-PRESSURE-CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
+; NO-REG-PRESSURE-CHECK-NEXT:    [[TMP1:%.*]] = mul <vscale x 8 x i64> [[TMP0]], splat (i64 2)
+; NO-REG-PRESSURE-CHECK-NEXT:    [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP1]]
+; NO-REG-PRESSURE-CHECK-NEXT:    [[TMP2:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
+; NO-REG-PRESSURE-CHECK-NEXT:    [[TMP3:%.*]] = mul <vscale x 8 x i64> [[TMP2]], splat (i64 3)
+; NO-REG-PRESSURE-CHECK-NEXT:    [[INDUCTION1:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP3]]
+; NO-REG-PRESSURE-CHECK-NEXT:    [[TMP4:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
+; NO-REG-PRESSURE-CHECK-NEXT:    [[TMP5:%.*]] = mul <vscale x 8 x i64> [[TMP4]], splat (i64 4)
+; NO-REG-PRESSURE-CHECK-NEXT:    [[INDUCTION2:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP5]]
+; NO-REG-PRESSURE-CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; NO-REG-PRESSURE-CHECK:       [[VECTOR_BODY]]:
+; NO-REG-PRESSURE-CHECK-NEXT:    [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; NO-REG-PRESSURE-CHECK-NEXT:    [[VEC_IND:%.*]] = phi <vscale x 8 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; NO-REG-PRESSURE-CHECK-NEXT:    [[VEC_IND3:%.*]] = phi <vscale x 8 x i64> [ [[INDUCTION1]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT11:%.*]], %[[VECTOR_BODY]] ]
+; NO-REG-PRESSURE-CHECK-NEXT:    [[VEC_IND4:%.*]] = phi <vscale x 8 x i64> [ [[INDUCTION2]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT12:%.*]], %[[VECTOR_BODY]] ]
+; NO-REG-PRESSURE-CHECK-NEXT:    [[AVL:%.*]] = phi i64 [ 1025, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; NO-REG-PRESSURE-CHECK-NEXT:    [[TMP6:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 8, i1 true)
+; NO-REG-PRESSURE-CHECK-NEXT:    [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
+; NO-REG-PRESSURE-CHECK-NEXT:    [[TMP8:%.*]] = mul i64 4, [[TMP7]]
+; NO-REG-PRESSURE-CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP8]], i64 0
+; NO-REG-PRESSURE-CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+; NO-REG-PRESSURE-CHECK-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP6]] to i64
+; NO-REG-PRESSURE-CHECK-NEXT:    [[TMP10:%.*]] = mul i64 3, [[TMP9]]
+; NO-REG-PRESSURE-CHECK-NEXT:    [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP10]], i64 0
+; NO-REG-PRESSURE-CHECK-NEXT:    [[BROADCAST_SPLAT6:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT5]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+; NO-REG-PRESSURE-CHECK-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP6]] to i64
+; NO-REG-PRESSURE-CHECK-NEXT:    [[TMP12:%.*]] = mul i64 2, [[TMP11]]
+; NO-REG-PRESSURE-CHECK-NEXT:    [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP12]], i64 0
+; NO-REG-PRESSURE-CHECK-NEXT:    [[BROADCAST_SPLAT8:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT7]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+; NO-REG-PRESSURE-CHECK-NEXT:    [[TMP13:%.*]] = sub <vscale x 8 x i64> [[VEC_IND]], splat (i64 1)
+; NO-REG-PRESSURE-CHECK-NEXT:    [[TMP14:%.*]] = getelementptr i8, ptr [[P0]], <vscale x 8 x i64> [[TMP13]]
+; NO-REG-PRESSURE-CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 8 x i8> @llvm.vp.gather.nxv8i8.nxv8p0(<vscale x 8 x ptr> align 1 [[TMP14]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP6]])
+; NO-REG-PRESSURE-CHECK-NEXT:    [[TMP15:%.*]] = sub <vscale x 8 x i64> [[VEC_IND3]], splat (i64 1)
+; NO-REG-PRESSURE-CHECK-NEXT:    [[TMP16:%.*]] = getelementptr i8, ptr [[P0]], <vscale x 8 x i64> [[TMP15]]
+; NO-REG-PRESSURE-CHECK-NEXT:    [[WIDE_MASKED_GATHER9:%.*]] = call <vscale x 8 x i8> @llvm.vp.gather.nxv8i8.nxv8p0(<vscale x 8 x ptr> align 1 [[TMP16]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP6]])
+; NO-REG-PRESSURE-CHECK-NEXT:    [[TMP17:%.*]] = sub <vscale x 8 x i64> [[VEC_IND4]], splat (i64 1)
+; NO-REG-PRESSURE-CHECK-NEXT:    [[TMP18:%.*]] = getelementptr i8, ptr [[P0]], <vscale x 8 x i64> [[TMP17]]
+; NO-REG-PRESSURE-CHECK-NEXT:    [[WIDE_MASKED_GATHER10:%.*]] = call <vscale x 8 x i8> @llvm.vp.gather.nxv8i8.nxv8p0(<vscale x 8 x ptr> align 1 [[TMP18]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP6]])
+; NO-REG-PRESSURE-CHECK-NEXT:    [[TMP19:%.*]] = mul i64 [[EVL_BASED_IV]], 3
+; NO-REG-PRESSURE-CHECK-NEXT:    [[TMP20:%.*]] = getelementptr i8, ptr [[P1]], i64 [[TMP19]]
+; NO-REG-PRESSURE-CHECK-NEXT:    [[TMP21:%.*]] = getelementptr i8, ptr [[TMP20]], i8 0
+; NO-REG-PRESSURE-CHECK-NEXT:    [[INTERLEAVE_EVL:%.*]] = mul nuw nsw i32 [[TMP6]], 3
+; NO-REG-PRESSURE-CHECK-NEXT:    [[INTERLEAVED_VEC:%.*]] = call <vscale x 24 x i8> @llvm.vector.interleave3.nxv24i8(<vscale x 8 x i8> [[WIDE_MASKED_GATHER]], <vscale x 8 x i8> [[WIDE_MASKED_GATHER9]], <vscale x 8 x i8> [[WIDE_MASKED_GATHER10]])
+; NO-REG-PRESSURE-CHECK-NEXT:    call void @llvm.vp.store.nxv24i8.p0(<vscale x 24 x i8> [[INTERLEAVED_VEC]], ptr align 1 [[TMP21]], <vscale x 24 x i1> splat (i1 true), i32 [[INTERLEAVE_EVL]])
+; NO-REG-PRESSURE-CHECK-NEXT:    [[TMP22:%.*]] = zext i32 [[TMP6]] to i64
+; NO-REG-PRESSURE-CHECK-NEXT:    [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP22]], [[EVL_BASED_IV]]
+; NO-REG-PRESSURE-CHECK-NEXT:    [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP22]]
+; NO-REG-PRESSURE-CHECK-NEXT:    [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[BROADCAST_SPLAT8]]
+; NO-REG-PRESSURE-CHECK-NEXT:    [[VEC_IND_NEXT11]] = add <vscale x 8 x i64> [[VEC_IND3]], [[BROADCAST_SPLAT6]]
+; NO-REG-PRESSURE-CHECK-NEXT:    [[VEC_IND_NEXT12]] = add <vscale x 8 x i64> [[VEC_IND4]], [[BROADCAST_SPLAT]]
+; NO-REG-PRESSURE-CHECK-NEXT:    [[TMP23:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
+; NO-REG-PRESSURE-CHECK-NEXT:    br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; NO-REG-PRESSURE-CHECK:       [[MIDDLE_BLOCK]]:
+; NO-REG-PRESSURE-CHECK-NEXT:    br label %[[EXIT:.*]]
+; NO-REG-PRESSURE-CHECK:       [[SCALAR_PH]]:
+; NO-REG-PRESSURE-CHECK-NEXT:    br label %[[LOOP:.*]]
+; NO-REG-PRESSURE-CHECK:       [[LOOP]]:
+; NO-REG-PRESSURE-CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; NO-REG-PRESSURE-CHECK-NEXT:    [[WIDE_IV_0:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[WIDE_IV_0_NEXT:%.*]], %[[LOOP]] ]
+; NO-REG-PRESSURE-CHECK-NEXT:    [[WIDE_IV_1:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[WIDE_IV_1_NEXT:%.*]], %[[LOOP]] ]
+; NO-REG-PRESSURE-CHECK-NEXT:    [[WIDE_IV_2:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[WIDE_IV_2_NEXT:%.*]], %[[LOOP]] ]
+; NO-REG-PRESSURE-CHECK-NEXT:    [[WIDE_IV_0_SUB:%.*]] = sub i64 [[WIDE_IV_0]], 1
+; NO-REG-PRESSURE-CHECK-NEXT:    [[A_GEP0:%.*]] = getelementptr i8, ptr [[P0]], i64 [[WIDE_IV_0_SUB]]
+; NO-REG-PRESSURE-CHECK-NEXT:    [[A:%.*]] = load i8, ptr [[A_GEP0]], align 1
+; NO-REG-PRESSURE-CHECK-NEXT:    [[WIDE_IV_1_SUB:%.*]] = sub i64 [[WIDE_IV_1]], 1
+; NO-REG-PRESSURE-CHECK-NEXT:    [[B_GEP0:%.*]] = getelementptr i8, ptr [[P0]], i64 [[WIDE_IV_1_SUB]]
+; NO-REG-PRESSURE-CHECK-NEXT:    [[B:%.*]] = load i8, ptr [[B_GEP0]], align 1
+; NO-REG-PRESSURE-CHECK-NEXT:    [[WIDE_IV_2_SUB:%.*]] = sub i64 [[WIDE_IV_2]], 1
+; NO-REG-PRESSURE-CHECK-NEXT:    [[C_GEP0:%.*]] = getelementptr i8, ptr [[P0]], i64 [[WIDE_IV_2_SUB]]
+; NO-REG-PRESSURE-CHECK-NEXT:    [[C:%.*]] = load i8, ptr [[C_GEP0]], align 1
+; NO-REG-PRESSURE-CHECK-NEXT:    [[IV_MUL:%.*]] = mul i64 [[IV]], 3
+; NO-REG-PRESSURE-CHECK-NEXT:    [[BASE:%.*]] = getelementptr i8, ptr [[P1]], i64 [[IV_MUL]]
+; NO-REG-PRESSURE-CHECK-NEXT:    [[A_GEP1:%.*]] = getelementptr i8, ptr [[BASE]], i8 0
+; NO-REG-PRESSURE-CHECK-NEXT:    store i8 [[A]], ptr [[A_GEP1]], align 1
+; NO-REG-PRESSURE-CHECK-NEXT:    [[B_GEP1:%.*]] = getelementptr i8, ptr [[BASE]], i8 1
+; NO-REG-PRESSURE-CHECK-NEXT:    store i8 [[B]], ptr [[B_GEP1]], align 1
+; NO-REG-PRESSURE-CHECK-NEXT:    [[C_GEP1:%.*]] = getelementptr i8, ptr [[BASE]], i8 2
+; NO-REG-PRESSURE-CHECK-NEXT:    store i8 [[C]], ptr [[C_GEP1]], align 1
+; NO-REG-PRESSURE-CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
+; NO-REG-PRESSURE-CHECK-NEXT:    [[WIDE_IV_0_NEXT]] = add i64 [[WIDE_IV_0]], 2
+; NO-REG-PRESSURE-CHECK-NEXT:    [[WIDE_IV_1_NEXT]] = add i64 [[WIDE_IV_1]], 3
+; NO-REG-PRESSURE-CHECK-NEXT:    [[WIDE_IV_2_NEXT]] = add i64 [[WIDE_IV_2]], 4
+; NO-REG-PRESSURE-CHECK-NEXT:    [[DONE:%.*]] = icmp eq i64 [[IV]], 1024
+; NO-REG-PRESSURE-CHECK-NEXT:    br i1 [[DONE]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP4:![0-9]+]]
+; NO-REG-PRESSURE-CHECK:       [[EXIT]]:
+; NO-REG-PRESSURE-CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %wide.iv.0 = phi i64 [ 0, %entry ], [ %wide.iv.0.next, %loop ]
+  %wide.iv.1 = phi i64 [ 0, %entry ], [ %wide.iv.1.next, %loop ]
+  %wide.iv.2 = phi i64 [ 0, %entry ], [ %wide.iv.2.next, %loop ]
+
+  %wide.iv.0.sub = sub i64 %wide.iv.0, 1
+  %a.gep0 = getelementptr i8, ptr %p0, i64 %wide.iv.0.sub
+  %a = load i8, ptr %a.gep0
+
+  %wide.iv.1.sub = sub i64 %wide.iv.1, 1
+  %b.gep0 = getelementptr i8, ptr %p0, i64 %wide.iv.1.sub
+  %b = load i8, ptr %b.gep0
+
+  %wide.iv.2.sub = sub i64 %wide.iv.2, 1
+  %c.gep0 = getelementptr i8, ptr %p0, i64 %wide.iv.2.sub
+  %c = load i8, ptr %c.gep0
+
+  %iv.mul = mul i64 %iv, 3
+  %base = getelementptr i8, ptr %p1, i64 %iv.mul
+
+  %a.gep1 = getelementptr i8, ptr %base, i8 0
+  store i8 %a, ptr %a.gep1
+
+  %b.gep1 = getelementptr i8, ptr %base, i8 1
+  store i8 %b, ptr %b.gep1
+
+  %c.gep1 = getelementptr i8, ptr %base, i8 2
+  store i8 %c, ptr %c.gep1
+
+  %iv.next = add i64 %iv, 1
+  %wide.iv.0.next = add i64 %wide.iv.0, 2
+  %wide.iv.1.next = add i64 %wide.iv.1, 3
+  %wide.iv.2.next = add i64 %wide.iv.2, 4
+  %done = icmp eq i64 %iv, 1024
+  br i1 %done, label %exit, label %loop
+
+exit:
+  ret void
+}
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage.ll b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage.ll
index 116ccc9..99139da 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage.ll
@@ -5,50 +5,54 @@
 ; RUN:   -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK-SCALAR
 ; RUN: opt -passes=loop-vectorize -mtriple riscv64-linux-gnu \
 ; RUN:   -mattr=+v,+d -debug-only=loop-vectorize,vplan --disable-output \
-; RUN:   -riscv-v-register-bit-width-lmul=1 -prefer-predicate-over-epilogue=scalar-epilogue \
+; RUN:   -riscv-v-register-bit-width-lmul=1 \
 ; RUN:   -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK-LMUL1
 ; RUN: opt -passes=loop-vectorize -mtriple riscv64-linux-gnu \
 ; RUN:   -mattr=+v,+d -debug-only=loop-vectorize,vplan --disable-output \
-; RUN:   -riscv-v-register-bit-width-lmul=2 -prefer-predicate-over-epilogue=scalar-epilogue \
+; RUN:   -riscv-v-register-bit-width-lmul=2 \
 ; RUN:   -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK-LMUL2
 ; RUN: opt -passes=loop-vectorize -mtriple riscv64-linux-gnu \
 ; RUN:   -mattr=+v,+d -debug-only=loop-vectorize,vplan --disable-output \
-; RUN:   -riscv-v-register-bit-width-lmul=4 -prefer-predicate-over-epilogue=scalar-epilogue \
+; RUN:   -riscv-v-register-bit-width-lmul=4 \
 ; RUN:   -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK-LMUL4
 ; RUN: opt -passes=loop-vectorize -mtriple riscv64-linux-gnu \
 ; RUN:   -mattr=+v,+d -debug-only=loop-vectorize,vplan --disable-output \
-; RUN:   -riscv-v-register-bit-width-lmul=8 -prefer-predicate-over-epilogue=scalar-epilogue \
+; RUN:   -riscv-v-register-bit-width-lmul=8 \
 ; RUN:   -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK-LMUL8
 
-; TODO: -prefer-predicate-over-epilogue=scalar-epilogue was added to allow
-; unrolling. Calculate register pressure for all VPlans, not just unrolled ones,
-; and remove.
-
 define void @add(ptr noalias nocapture readonly %src1, ptr noalias nocapture readonly %src2, i32 signext %size, ptr noalias nocapture writeonly %result) {
-; CHECK-LABEL: add
+; CHECK-SCALAR-LABEL: add
 ; CHECK-SCALAR:      LV(REG): VF = 1
 ; CHECK-SCALAR-NEXT: LV(REG): Found max usage: 2 item
 ; CHECK-SCALAR-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
 ; CHECK-SCALAR-NEXT: LV(REG): RegisterClass: RISCV::FPRRC, 2 registers
 ; CHECK-SCALAR-NEXT: LV(REG): Found invariant usage: 1 item
 ; CHECK-SCALAR-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
-; CHECK-LMUL1:       LV(REG): Found max usage: 2 item
-; CHECK-LMUL1-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
+; CHECK-LMUL1-LABEL: add
+; CHECK-LMUL1:       LV(REG): VF = vscale x 2
+; CHECK-LMUL1-NEXT:  LV(REG): Found max usage: 2 item
+; CHECK-LMUL1-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 6 registers
 ; CHECK-LMUL1-NEXT:  LV(REG): RegisterClass: RISCV::VRRC, 2 registers
 ; CHECK-LMUL1-NEXT:  LV(REG): Found invariant usage: 1 item
 ; CHECK-LMUL1-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
-; CHECK-LMUL2:       LV(REG): Found max usage: 2 item
-; CHECK-LMUL2-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
+; CHECK-LMUL2-LABEL: add
+; CHECK-LMUL2:       LV(REG): VF = vscale x 4
+; CHECK-LMUL2-NEXT:  LV(REG): Found max usage: 2 item
+; CHECK-LMUL2-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 6 registers
 ; CHECK-LMUL2-NEXT:  LV(REG): RegisterClass: RISCV::VRRC, 4 registers
 ; CHECK-LMUL2-NEXT:  LV(REG): Found invariant usage: 1 item
 ; CHECK-LMUL2-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
-; CHECK-LMUL4:       LV(REG): Found max usage: 2 item
-; CHECK-LMUL4-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
+; CHECK-LMUL4-LABEL: add
+; CHECK-LMUL4:       LV(REG): VF = vscale x 8
+; CHECK-LMUL4-NEXT:  LV(REG): Found max usage: 2 item
+; CHECK-LMUL4-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 6 registers
 ; CHECK-LMUL4-NEXT:  LV(REG): RegisterClass: RISCV::VRRC, 8 registers
 ; CHECK-LMUL4-NEXT:  LV(REG): Found invariant usage: 1 item
 ; CHECK-LMUL4-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
-; CHECK-LMUL8:       LV(REG): Found max usage: 2 item
-; CHECK-LMUL8-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
+; CHECK-LMUL8-LABEL: add
+; CHECK-LMUL8:       LV(REG): VF = vscale x 16
+; CHECK-LMUL8-NEXT:  LV(REG): Found max usage: 2 item
+; CHECK-LMUL8-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 6 registers
 ; CHECK-LMUL8-NEXT:  LV(REG): RegisterClass: RISCV::VRRC, 16 registers
 ; CHECK-LMUL8-NEXT:  LV(REG): Found invariant usage: 1 item
 ; CHECK-LMUL8-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
@@ -76,22 +80,26 @@ for.body:
 }
 
 define void @goo(ptr nocapture noundef %a, i32 noundef signext %n) {
-; CHECK-LABEL: goo
+; CHECK-SCALAR-LABEL: goo
 ; CHECK-SCALAR:      LV(REG): VF = 1
 ; CHECK-SCALAR-NEXT: LV(REG): Found max usage: 1 item
 ; CHECK-SCALAR-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
-; CHECK-LMUL1:       LV(REG): Found max usage: 2 item
-; CHECK-LMUL1-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 2 registers
-; CHECK-LMUL1-NEXT:  LV(REG): RegisterClass: RISCV::VRRC, 1 registers
-; CHECK-LMUL2:       LV(REG): Found max usage: 2 item
-; CHECK-LMUL2-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 2 registers
-; CHECK-LMUL2-NEXT:  LV(REG): RegisterClass: RISCV::VRRC, 2 registers
-; CHECK-LMUL4:       LV(REG): Found max usage: 2 item
-; CHECK-LMUL4-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 2 registers
-; CHECK-LMUL4-NEXT:  LV(REG): RegisterClass: RISCV::VRRC, 4 registers
-; CHECK-LMUL8:       LV(REG): Found max usage: 2 item
-; CHECK-LMUL8-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 2 registers
-; CHECK-LMUL8-NEXT:  LV(REG): RegisterClass: RISCV::VRRC, 8 registers
+; CHECK-LMUL1:       LV(REG): VF = vscale x 2
+; CHECK-LMUL1-NEXT:  LV(REG): Found max usage: 2 item
+; CHECK-LMUL1-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 6 registers
+; CHECK-LMUL1-NEXT:  LV(REG): RegisterClass: RISCV::VRRC, 2 registers
+; CHECK-LMUL2:       LV(REG): VF = vscale x 4
+; CHECK-LMUL2-NEXT:  LV(REG): Found max usage: 2 item
+; CHECK-LMUL2-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 6 registers
+; CHECK-LMUL2-NEXT:  LV(REG): RegisterClass: RISCV::VRRC, 4 registers
+; CHECK-LMUL4:       LV(REG): VF = vscale x 8
+; CHECK-LMUL4-NEXT:  LV(REG): Found max usage: 2 item
+; CHECK-LMUL4-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 6 registers
+; CHECK-LMUL4-NEXT:  LV(REG): RegisterClass: RISCV::VRRC, 8 registers
+; CHECK-LMUL8:       LV(REG): VF = vscale x 16
+; CHECK-LMUL8-NEXT:  LV(REG): Found max usage: 2 item
+; CHECK-LMUL8-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 6 registers
+; CHECK-LMUL8-NEXT:  LV(REG): RegisterClass: RISCV::VRRC, 16 registers
 entry:
   %cmp3 = icmp sgt i32 %n, 0
   br i1 %cmp3, label %for.body.preheader, label %for.cond.cleanup
diff --git a/llvm/test/Transforms/LoopVectorize/X86/cost-model-assert.ll b/llvm/test/Transforms/LoopVectorize/X86/cost-model-assert.ll
index 8e3af54..4cff875 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/cost-model-assert.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/cost-model-assert.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt < %s -passes=loop-vectorize -S | FileCheck %s
 
 ; This is a bugpoint reduction of a test from PR43582:
@@ -12,31 +12,32 @@ target datalayout = "e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16
 target triple = "x86_64-w64-windows-gnu"
 
 define void @cff_index_load_offsets(i1 %cond, i8 %x, ptr %p) #0 {
-; CHECK-LABEL: @cff_index_load_offsets(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br i1 [[COND:%.*]], label [[IF_THEN:%.*]], label [[EXIT:%.*]]
-; CHECK:       if.then:
-; CHECK-NEXT:    br label [[FOR_BODY68:%.*]]
-; CHECK:       for.body68:
-; CHECK-NEXT:    [[P_359:%.*]] = phi ptr [ [[ADD_PTR86:%.*]], [[FOR_BODY68]] ], [ null, [[IF_THEN]] ]
-; CHECK-NEXT:    [[CONV70:%.*]] = zext i8 [[X:%.*]] to i32
+; CHECK-LABEL: define void @cff_index_load_offsets(
+; CHECK-SAME: i1 [[COND:%.*]], i8 [[X:%.*]], ptr [[P:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 [[COND]], label %[[IF_THEN:.*]], label %[[EXIT:.*]]
+; CHECK:       [[IF_THEN]]:
+; CHECK-NEXT:    br label %[[FOR_BODY68:.*]]
+; CHECK:       [[FOR_BODY68]]:
+; CHECK-NEXT:    [[P_359:%.*]] = phi ptr [ [[ADD_PTR86:%.*]], %[[FOR_BODY68]] ], [ null, %[[IF_THEN]] ]
+; CHECK-NEXT:    [[CONV70:%.*]] = zext i8 [[X]] to i32
 ; CHECK-NEXT:    [[SHL71:%.*]] = shl nuw i32 [[CONV70]], 24
-; CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[P:%.*]], align 1, !tbaa [[TBAA1:![0-9]+]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[P]], align 1, !tbaa [[CHAR_TBAA1:![0-9]+]]
 ; CHECK-NEXT:    [[CONV73:%.*]] = zext i8 [[TMP0]] to i32
 ; CHECK-NEXT:    [[SHL74:%.*]] = shl nuw nsw i32 [[CONV73]], 16
 ; CHECK-NEXT:    [[OR75:%.*]] = or i32 [[SHL74]], [[SHL71]]
-; CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr undef, align 1, !tbaa [[TBAA1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr undef, align 1, !tbaa [[CHAR_TBAA1]]
 ; CHECK-NEXT:    [[SHL78:%.*]] = shl nuw nsw i32 undef, 8
 ; CHECK-NEXT:    [[OR79:%.*]] = or i32 [[OR75]], [[SHL78]]
 ; CHECK-NEXT:    [[CONV81:%.*]] = zext i8 [[TMP1]] to i32
 ; CHECK-NEXT:    [[OR83:%.*]] = or i32 [[OR79]], [[CONV81]]
-; CHECK-NEXT:    store i32 [[OR83]], ptr undef, align 4, !tbaa [[TBAA4:![0-9]+]]
+; CHECK-NEXT:    store i32 [[OR83]], ptr undef, align 4, !tbaa [[LONG_TBAA4:![0-9]+]]
 ; CHECK-NEXT:    [[ADD_PTR86]] = getelementptr inbounds i8, ptr [[P_359]], i64 4
 ; CHECK-NEXT:    [[CMP66:%.*]] = icmp ult ptr [[ADD_PTR86]], undef
-; CHECK-NEXT:    br i1 [[CMP66]], label [[FOR_BODY68]], label [[SW_EPILOG:%.*]]
-; CHECK:       sw.epilog:
+; CHECK-NEXT:    br i1 [[CMP66]], label %[[FOR_BODY68]], label %[[SW_EPILOG:.*]]
+; CHECK:       [[SW_EPILOG]]:
 ; CHECK-NEXT:    unreachable
-; CHECK:       Exit:
+; CHECK:       [[EXIT]]:
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -80,3 +81,10 @@ attributes #0 = { "use-soft-float"="false" }
 !3 = !{!"Simple C/C++ TBAA"}
 !4 = !{!5, !5, i64 0}
 !5 = !{!"long", !2, i64 0}
+;.
+; CHECK: [[CHAR_TBAA1]] = !{[[META2:![0-9]+]], [[META2]], i64 0}
+; CHECK: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0}
+; CHECK: [[META3]] = !{!"Simple C/C++ TBAA"}
+; CHECK: [[LONG_TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0}
+; CHECK: [[META5]] = !{!"long", [[META2]], i64 0}
+;.
diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr54634.ll b/llvm/test/Transforms/LoopVectorize/X86/pr54634.ll
index 994cd33..8a48f99 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/pr54634.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/pr54634.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt -passes=loop-vectorize -mcpu=skylake-avx512 -S %s | FileCheck %s
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:10:11:12:13"
@@ -7,35 +7,36 @@ target triple = "x86_64-unknown-linux-gnu"
 @jlplt_ijl_alloc_array_1d_10294_got = external dso_local local_unnamed_addr global ptr
 
 define ptr addrspace(10) @japi1_vect_42283(ptr nocapture readonly %0, i32 %1) local_unnamed_addr #0 {
-; CHECK-LABEL: @japi1_vect_42283(
-; CHECK-NEXT:  iter.check:
-; CHECK-NEXT:    [[TMP2:%.*]] = sext i32 [[TMP1:%.*]] to i64
+; CHECK-LABEL: define ptr addrspace(10) @japi1_vect_42283(
+; CHECK-SAME: ptr readonly captures(none) [[TMP0:%.*]], i32 [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[ITER_CHECK:.*]]:
+; CHECK-NEXT:    [[TMP2:%.*]] = sext i32 [[TMP1]] to i64
 ; CHECK-NEXT:    [[TMP3:%.*]] = load atomic ptr, ptr @jlplt_ijl_alloc_array_1d_10294_got unordered, align 8
 ; CHECK-NEXT:    [[TMP4:%.*]] = tail call ptr addrspace(10) [[TMP3]](ptr addrspace(10) null, i64 0)
-; CHECK-NEXT:    [[TMP5:%.*]] = load ptr addrspace(10), ptr [[TMP0:%.*]], align 8, !tbaa [[TBAA0:![0-9]+]]
+; CHECK-NEXT:    [[TMP5:%.*]] = load ptr addrspace(10), ptr [[TMP0]], align 8, !tbaa [[JTBAA_VALUE_TBAA0:![0-9]+]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = addrspacecast ptr addrspace(10) [[TMP4]] to ptr addrspace(11)
-; CHECK-NEXT:    [[TMP7:%.*]] = load ptr addrspace(13), ptr addrspace(11) [[TMP6]], align 8, !tbaa [[TBAA5:![0-9]+]]
+; CHECK-NEXT:    [[TMP7:%.*]] = load ptr addrspace(13), ptr addrspace(11) [[TMP6]], align 8, !tbaa [[JTBAA_ARRAYPTR_TBAA5:![0-9]+]]
 ; CHECK-NEXT:    [[DOTELT:%.*]] = getelementptr inbounds { ptr addrspace(10), i64 }, ptr addrspace(10) [[TMP5]], i64 0, i32 0
-; CHECK-NEXT:    [[DOTUNPACK:%.*]] = load ptr addrspace(10), ptr addrspace(10) [[DOTELT]], align 8, !tbaa [[TBAA8:![0-9]+]]
+; CHECK-NEXT:    [[DOTUNPACK:%.*]] = load ptr addrspace(10), ptr addrspace(10) [[DOTELT]], align 8, !tbaa [[JTBAA_IMMUT_TBAA8:![0-9]+]]
 ; CHECK-NEXT:    [[DOTELT1:%.*]] = getelementptr inbounds { ptr addrspace(10), i64 }, ptr addrspace(10) [[TMP5]], i64 0, i32 1
-; CHECK-NEXT:    [[DOTUNPACK2:%.*]] = load i64, ptr addrspace(10) [[DOTELT1]], align 8, !tbaa [[TBAA8]]
+; CHECK-NEXT:    [[DOTUNPACK2:%.*]] = load i64, ptr addrspace(10) [[DOTELT1]], align 8, !tbaa [[JTBAA_IMMUT_TBAA8]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = add nsw i64 [[TMP2]], 1
 ; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP8]], 4
-; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[TOP:%.*]]
-; CHECK:       vector.main.loop.iter.check:
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[TOP:.*]]
+; CHECK:       [[TOP]]:
 ; CHECK-NEXT:    [[TMP17:%.*]] = icmp ult i64 [[TMP8]], 16
-; CHECK-NEXT:    br i1 [[TMP17]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
-; CHECK:       vector.ph:
+; CHECK-NEXT:    br i1 [[TMP17]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
 ; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP8]], 16
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP8]], [[N_MOD_VF]]
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x ptr addrspace(10)> poison, ptr addrspace(10) [[DOTUNPACK]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x ptr addrspace(10)> [[BROADCAST_SPLATINSERT]], <4 x ptr addrspace(10)> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <4 x i64> poison, i64 [[DOTUNPACK2]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT8:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT7]], <4 x i64> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
-; CHECK:       vector.body:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
 ; CHECK-NEXT:    [[STEP_ADD4:%.*]] = add <4 x i64> [[STEP_ADD]], splat (i64 4)
 ; CHECK-NEXT:    [[STEP_ADD5:%.*]] = add <4 x i64> [[STEP_ADD4]], splat (i64 4)
@@ -43,31 +44,31 @@ define ptr addrspace(10) @japi1_vect_42283(ptr nocapture readonly %0, i32 %1) lo
 ; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds { ptr addrspace(10), i64 }, ptr addrspace(13) [[TMP7]], <4 x i64> [[STEP_ADD]], i32 0
 ; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds { ptr addrspace(10), i64 }, ptr addrspace(13) [[TMP7]], <4 x i64> [[STEP_ADD4]], i32 0
 ; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds { ptr addrspace(10), i64 }, ptr addrspace(13) [[TMP7]], <4 x i64> [[STEP_ADD5]], i32 0
-; CHECK-NEXT:    call void @llvm.masked.scatter.v4p10.v4p13(<4 x ptr addrspace(10)> [[BROADCAST_SPLAT]], <4 x ptr addrspace(13)> [[TMP18]], i32 8, <4 x i1> splat (i1 true)), !tbaa [[TBAA10:![0-9]+]]
-; CHECK-NEXT:    call void @llvm.masked.scatter.v4p10.v4p13(<4 x ptr addrspace(10)> [[BROADCAST_SPLAT]], <4 x ptr addrspace(13)> [[TMP19]], i32 8, <4 x i1> splat (i1 true)), !tbaa [[TBAA10]]
-; CHECK-NEXT:    call void @llvm.masked.scatter.v4p10.v4p13(<4 x ptr addrspace(10)> [[BROADCAST_SPLAT]], <4 x ptr addrspace(13)> [[TMP20]], i32 8, <4 x i1> splat (i1 true)), !tbaa [[TBAA10]]
-; CHECK-NEXT:    call void @llvm.masked.scatter.v4p10.v4p13(<4 x ptr addrspace(10)> [[BROADCAST_SPLAT]], <4 x ptr addrspace(13)> [[TMP21]], i32 8, <4 x i1> splat (i1 true)), !tbaa [[TBAA10]]
+; CHECK-NEXT:    call void @llvm.masked.scatter.v4p10.v4p13(<4 x ptr addrspace(10)> [[BROADCAST_SPLAT]], <4 x ptr addrspace(13)> [[TMP18]], i32 8, <4 x i1> splat (i1 true)), !tbaa [[JTBAA_ARRAYBUF_TBAA10:![0-9]+]]
+; CHECK-NEXT:    call void @llvm.masked.scatter.v4p10.v4p13(<4 x ptr addrspace(10)> [[BROADCAST_SPLAT]], <4 x ptr addrspace(13)> [[TMP19]], i32 8, <4 x i1> splat (i1 true)), !tbaa [[JTBAA_ARRAYBUF_TBAA10]]
+; CHECK-NEXT:    call void @llvm.masked.scatter.v4p10.v4p13(<4 x ptr addrspace(10)> [[BROADCAST_SPLAT]], <4 x ptr addrspace(13)> [[TMP20]], i32 8, <4 x i1> splat (i1 true)), !tbaa [[JTBAA_ARRAYBUF_TBAA10]]
+; CHECK-NEXT:    call void @llvm.masked.scatter.v4p10.v4p13(<4 x ptr addrspace(10)> [[BROADCAST_SPLAT]], <4 x ptr addrspace(13)> [[TMP21]], i32 8, <4 x i1> splat (i1 true)), !tbaa [[JTBAA_ARRAYBUF_TBAA10]]
 ; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds { ptr addrspace(10), i64 }, ptr addrspace(13) [[TMP7]], <4 x i64> [[VEC_IND]], i32 1
 ; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds { ptr addrspace(10), i64 }, ptr addrspace(13) [[TMP7]], <4 x i64> [[STEP_ADD]], i32 1
 ; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds { ptr addrspace(10), i64 }, ptr addrspace(13) [[TMP7]], <4 x i64> [[STEP_ADD4]], i32 1
 ; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds { ptr addrspace(10), i64 }, ptr addrspace(13) [[TMP7]], <4 x i64> [[STEP_ADD5]], i32 1
-; CHECK-NEXT:    call void @llvm.masked.scatter.v4i64.v4p13(<4 x i64> [[BROADCAST_SPLAT8]], <4 x ptr addrspace(13)> [[TMP22]], i32 8, <4 x i1> splat (i1 true)), !tbaa [[TBAA10]]
-; CHECK-NEXT:    call void @llvm.masked.scatter.v4i64.v4p13(<4 x i64> [[BROADCAST_SPLAT8]], <4 x ptr addrspace(13)> [[TMP23]], i32 8, <4 x i1> splat (i1 true)), !tbaa [[TBAA10]]
-; CHECK-NEXT:    call void @llvm.masked.scatter.v4i64.v4p13(<4 x i64> [[BROADCAST_SPLAT8]], <4 x ptr addrspace(13)> [[TMP24]], i32 8, <4 x i1> splat (i1 true)), !tbaa [[TBAA10]]
-; CHECK-NEXT:    call void @llvm.masked.scatter.v4i64.v4p13(<4 x i64> [[BROADCAST_SPLAT8]], <4 x ptr addrspace(13)> [[TMP25]], i32 8, <4 x i1> splat (i1 true)), !tbaa [[TBAA10]]
+; CHECK-NEXT:    call void @llvm.masked.scatter.v4i64.v4p13(<4 x i64> [[BROADCAST_SPLAT8]], <4 x ptr addrspace(13)> [[TMP22]], i32 8, <4 x i1> splat (i1 true)), !tbaa [[JTBAA_ARRAYBUF_TBAA10]]
+; CHECK-NEXT:    call void @llvm.masked.scatter.v4i64.v4p13(<4 x i64> [[BROADCAST_SPLAT8]], <4 x ptr addrspace(13)> [[TMP23]], i32 8, <4 x i1> splat (i1 true)), !tbaa [[JTBAA_ARRAYBUF_TBAA10]]
+; CHECK-NEXT:    call void @llvm.masked.scatter.v4i64.v4p13(<4 x i64> [[BROADCAST_SPLAT8]], <4 x ptr addrspace(13)> [[TMP24]], i32 8, <4 x i1> splat (i1 true)), !tbaa [[JTBAA_ARRAYBUF_TBAA10]]
+; CHECK-NEXT:    call void @llvm.masked.scatter.v4i64.v4p13(<4 x i64> [[BROADCAST_SPLAT8]], <4 x ptr addrspace(13)> [[TMP25]], i32 8, <4 x i1> splat (i1 true)), !tbaa [[JTBAA_ARRAYBUF_TBAA10]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
 ; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD5]], splat (i64 4)
 ; CHECK-NEXT:    [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[TMP26]], label [[MIDDLE_BLOCK1:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
-; CHECK:       middle.block:
+; CHECK-NEXT:    br i1 [[TMP26]], label %[[MIDDLE_BLOCK1:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
+; CHECK:       [[MIDDLE_BLOCK1]]:
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP8]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[CMP_N]], label [[L44:%.*]], label [[MIDDLE_BLOCK:%.*]]
-; CHECK:       vec.epilog.iter.check:
+; CHECK-NEXT:    br i1 [[CMP_N]], label %[[L44:.*]], label %[[MIDDLE_BLOCK:.*]]
+; CHECK:       [[MIDDLE_BLOCK]]:
 ; CHECK-NEXT:    [[N_VEC_REMAINING:%.*]] = sub i64 [[TMP8]], [[N_VEC]]
 ; CHECK-NEXT:    [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 4
-; CHECK-NEXT:    br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[SCALAR_PH]]
-; CHECK:       vec.epilog.ph:
-; CHECK-NEXT:    [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[TOP]] ]
+; CHECK-NEXT:    br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[SCALAR_PH]], !prof [[PROF15:![0-9]+]]
+; CHECK:       [[SCALAR_PH]]:
+; CHECK-NEXT:    [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[TOP]] ]
 ; CHECK-NEXT:    [[N_MOD_VF4:%.*]] = urem i64 [[TMP8]], 4
 ; CHECK-NEXT:    [[N_VEC5:%.*]] = sub i64 [[TMP8]], [[N_MOD_VF4]]
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT10:%.*]] = insertelement <4 x ptr addrspace(10)> poison, ptr addrspace(10) [[DOTUNPACK]], i64 0
@@ -77,34 +78,34 @@ define ptr addrspace(10) @japi1_vect_42283(ptr nocapture readonly %0, i32 %1) lo
 ; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[VEC_EPILOG_RESUME_VAL]], i64 0
 ; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i64> [[DOTSPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[INDUCTION:%.*]] = add <4 x i64> [[DOTSPLAT]], <i64 0, i64 1, i64 2, i64 3>
-; CHECK-NEXT:    br label [[L26:%.*]]
-; CHECK:       vec.epilog.vector.body:
-; CHECK-NEXT:    [[INDEX7:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDEX_NEXT14:%.*]], [[L26]] ]
-; CHECK-NEXT:    [[VEC_IND8:%.*]] = phi <4 x i64> [ [[INDUCTION]], [[SCALAR_PH]] ], [ [[VEC_IND_NEXT9:%.*]], [[L26]] ]
+; CHECK-NEXT:    br label %[[L26:.*]]
+; CHECK:       [[L26]]:
+; CHECK-NEXT:    [[INDEX7:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDEX_NEXT14:%.*]], %[[L26]] ]
+; CHECK-NEXT:    [[VEC_IND8:%.*]] = phi <4 x i64> [ [[INDUCTION]], %[[SCALAR_PH]] ], [ [[VEC_IND_NEXT9:%.*]], %[[L26]] ]
 ; CHECK-NEXT:    [[TMP28:%.*]] = getelementptr inbounds { ptr addrspace(10), i64 }, ptr addrspace(13) [[TMP7]], <4 x i64> [[VEC_IND8]], i32 0
-; CHECK-NEXT:    call void @llvm.masked.scatter.v4p10.v4p13(<4 x ptr addrspace(10)> [[BROADCAST_SPLAT11]], <4 x ptr addrspace(13)> [[TMP28]], i32 8, <4 x i1> splat (i1 true)), !tbaa [[TBAA10]]
+; CHECK-NEXT:    call void @llvm.masked.scatter.v4p10.v4p13(<4 x ptr addrspace(10)> [[BROADCAST_SPLAT11]], <4 x ptr addrspace(13)> [[TMP28]], i32 8, <4 x i1> splat (i1 true)), !tbaa [[JTBAA_ARRAYBUF_TBAA10]]
 ; CHECK-NEXT:    [[TMP29:%.*]] = getelementptr inbounds { ptr addrspace(10), i64 }, ptr addrspace(13) [[TMP7]], <4 x i64> [[VEC_IND8]], i32 1
-; CHECK-NEXT:    call void @llvm.masked.scatter.v4i64.v4p13(<4 x i64> [[BROADCAST_SPLAT13]], <4 x ptr addrspace(13)> [[TMP29]], i32 8, <4 x i1> splat (i1 true)), !tbaa [[TBAA10]]
+; CHECK-NEXT:    call void @llvm.masked.scatter.v4i64.v4p13(<4 x i64> [[BROADCAST_SPLAT13]], <4 x ptr addrspace(13)> [[TMP29]], i32 8, <4 x i1> splat (i1 true)), !tbaa [[JTBAA_ARRAYBUF_TBAA10]]
 ; CHECK-NEXT:    [[INDEX_NEXT14]] = add nuw i64 [[INDEX7]], 4
 ; CHECK-NEXT:    [[VEC_IND_NEXT9]] = add <4 x i64> [[VEC_IND8]], splat (i64 4)
 ; CHECK-NEXT:    [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT14]], [[N_VEC5]]
-; CHECK-NEXT:    br i1 [[TMP30]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[L26]], !llvm.loop [[LOOP15:![0-9]+]]
-; CHECK:       vec.epilog.middle.block:
+; CHECK-NEXT:    br i1 [[TMP30]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[L26]], !llvm.loop [[LOOP16:![0-9]+]]
+; CHECK:       [[VEC_EPILOG_MIDDLE_BLOCK]]:
 ; CHECK-NEXT:    [[CMP_N15:%.*]] = icmp eq i64 [[TMP8]], [[N_VEC5]]
-; CHECK-NEXT:    br i1 [[CMP_N15]], label [[L44]], label [[VEC_EPILOG_SCALAR_PH]]
-; CHECK:       vec.epilog.scalar.ph:
-; CHECK-NEXT:    [[BC_RESUME_VAL6:%.*]] = phi i64 [ [[N_VEC5]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK:%.*]] ]
-; CHECK-NEXT:    br label [[L27:%.*]]
-; CHECK:       L26:
-; CHECK-NEXT:    [[VALUE_PHI5:%.*]] = phi i64 [ [[BC_RESUME_VAL6]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[TMP27:%.*]], [[L27]] ]
+; CHECK-NEXT:    br i1 [[CMP_N15]], label %[[L44]], label %[[VEC_EPILOG_SCALAR_PH]]
+; CHECK:       [[VEC_EPILOG_SCALAR_PH]]:
+; CHECK-NEXT:    [[BC_RESUME_VAL6:%.*]] = phi i64 [ [[N_VEC5]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ITER_CHECK]] ]
+; CHECK-NEXT:    br label %[[L27:.*]]
+; CHECK:       [[L27]]:
+; CHECK-NEXT:    [[VALUE_PHI5:%.*]] = phi i64 [ [[BC_RESUME_VAL6]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[TMP27:%.*]], %[[L27]] ]
 ; CHECK-NEXT:    [[DOTREPACK:%.*]] = getelementptr inbounds { ptr addrspace(10), i64 }, ptr addrspace(13) [[TMP7]], i64 [[VALUE_PHI5]], i32 0
-; CHECK-NEXT:    store ptr addrspace(10) [[DOTUNPACK]], ptr addrspace(13) [[DOTREPACK]], align 8, !tbaa [[TBAA10]]
+; CHECK-NEXT:    store ptr addrspace(10) [[DOTUNPACK]], ptr addrspace(13) [[DOTREPACK]], align 8, !tbaa [[JTBAA_ARRAYBUF_TBAA10]]
 ; CHECK-NEXT:    [[DOTREPACK4:%.*]] = getelementptr inbounds { ptr addrspace(10), i64 }, ptr addrspace(13) [[TMP7]], i64 [[VALUE_PHI5]], i32 1
-; CHECK-NEXT:    store i64 [[DOTUNPACK2]], ptr addrspace(13) [[DOTREPACK4]], align 8, !tbaa [[TBAA10]]
+; CHECK-NEXT:    store i64 [[DOTUNPACK2]], ptr addrspace(13) [[DOTREPACK4]], align 8, !tbaa [[JTBAA_ARRAYBUF_TBAA10]]
 ; CHECK-NEXT:    [[TMP27]] = add i64 [[VALUE_PHI5]], 1
 ; CHECK-NEXT:    [[DOTNOT:%.*]] = icmp eq i64 [[VALUE_PHI5]], [[TMP2]]
-; CHECK-NEXT:    br i1 [[DOTNOT]], label [[L44]], label [[L27]], !llvm.loop [[LOOP16:![0-9]+]]
-; CHECK:       L44:
+; CHECK-NEXT:    br i1 [[DOTNOT]], label %[[L44]], label %[[L27]], !llvm.loop [[LOOP17:![0-9]+]]
+; CHECK:       [[L44]]:
 ; CHECK-NEXT:    ret ptr addrspace(10) null
 ;
 top:
@@ -146,3 +147,23 @@ L44:                                              ; preds = %L26
 !9 = !{!"jtbaa_immut", !1, i64 0}
 !10 = !{!11, !11, i64 0}
 !11 = !{!"jtbaa_arraybuf", !2, i64 0}
+;.
+; CHECK: [[JTBAA_VALUE_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
+; CHECK: [[META1]] = !{!"jtbaa_value", [[META2:![0-9]+]], i64 0}
+; CHECK: [[META2]] = !{!"jtbaa_data", [[META3:![0-9]+]], i64 0}
+; CHECK: [[META3]] = !{!"jtbaa", [[META4:![0-9]+]], i64 0}
+; CHECK: [[META4]] = !{!"jtbaa"}
+; CHECK: [[JTBAA_ARRAYPTR_TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0}
+; CHECK: [[META6]] = !{!"jtbaa_arrayptr", [[META7:![0-9]+]], i64 0}
+; CHECK: [[META7]] = !{!"jtbaa_array", [[META3]], i64 0}
+; CHECK: [[JTBAA_IMMUT_TBAA8]] = !{[[META9:![0-9]+]], [[META9]], i64 0}
+; CHECK: [[META9]] = !{!"jtbaa_immut", [[META1]], i64 0}
+; CHECK: [[JTBAA_ARRAYBUF_TBAA10]] = !{[[META11:![0-9]+]], [[META11]], i64 0}
+; CHECK: [[META11]] = !{!"jtbaa_arraybuf", [[META2]], i64 0}
+; CHECK: [[LOOP12]] = distinct !{[[LOOP12]], [[META13:![0-9]+]], [[META14:![0-9]+]]}
+; CHECK: [[META13]] = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK: [[META14]] = !{!"llvm.loop.unroll.runtime.disable"}
+; CHECK: [[PROF15]] = !{!"branch_weights", i32 4, i32 12}
+; CHECK: [[LOOP16]] = distinct !{[[LOOP16]], [[META13]], [[META14]]}
+; CHECK: [[LOOP17]] = distinct !{[[LOOP17]], [[META14]], [[META13]]}
+;.
diff --git a/llvm/test/Transforms/LoopVectorize/X86/strided_load_cost.ll b/llvm/test/Transforms/LoopVectorize/X86/strided_load_cost.ll
index 35f61b2..050243f 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/strided_load_cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/strided_load_cost.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt < %s -passes=loop-vectorize -S -o - | FileCheck %s
 ; RUN: opt < %s -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -o - | FileCheck --check-prefix=MAX-BW %s
 
@@ -10,21 +10,22 @@ target triple = "x86_64-unknown-linux-gnu"
 
 ; Function Attrs: norecurse nounwind readonly uwtable
 define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_unnamed_addr #0 {
-; CHECK-LABEL: @matrix_row_col(
-; CHECK-NEXT:  iter.check:
-; CHECK-NEXT:    [[IDXPROM:%.*]] = sext i32 [[I:%.*]] to i64
-; CHECK-NEXT:    [[IDXPROM5:%.*]] = sext i32 [[J:%.*]] to i64
-; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
-; CHECK:       vector.main.loop.iter.check:
-; CHECK-NEXT:    br i1 false, label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH1:%.*]]
-; CHECK:       vector.ph:
-; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
-; CHECK:       vector.body:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH1]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <8 x i32> [ zeroinitializer, [[VECTOR_PH1]] ], [ [[TMP144:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi <8 x i32> [ zeroinitializer, [[VECTOR_PH1]] ], [ [[TMP145:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI2:%.*]] = phi <8 x i32> [ zeroinitializer, [[VECTOR_PH1]] ], [ [[TMP146:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI3:%.*]] = phi <8 x i32> [ zeroinitializer, [[VECTOR_PH1]] ], [ [[TMP147:%.*]], [[VECTOR_BODY]] ]
+; CHECK-LABEL: define i32 @matrix_row_col(
+; CHECK-SAME: ptr readonly captures(none) [[DATA:%.*]], i32 [[I:%.*]], i32 [[J:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[ITER_CHECK:.*]]:
+; CHECK-NEXT:    [[IDXPROM:%.*]] = sext i32 [[I]] to i64
+; CHECK-NEXT:    [[IDXPROM5:%.*]] = sext i32 [[J]] to i64
+; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    br i1 false, label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH1:.*]]
+; CHECK:       [[VECTOR_PH1]]:
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH1]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <8 x i32> [ zeroinitializer, %[[VECTOR_PH1]] ], [ [[TMP144:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi <8 x i32> [ zeroinitializer, %[[VECTOR_PH1]] ], [ [[TMP145:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI2:%.*]] = phi <8 x i32> [ zeroinitializer, %[[VECTOR_PH1]] ], [ [[TMP146:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI3:%.*]] = phi <8 x i32> [ zeroinitializer, %[[VECTOR_PH1]] ], [ [[TMP147:%.*]], %[[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
 ; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 2
@@ -57,14 +58,14 @@ define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_u
 ; CHECK-NEXT:    [[TMP29:%.*]] = add i64 [[INDEX]], 29
 ; CHECK-NEXT:    [[TMP30:%.*]] = add i64 [[INDEX]], 30
 ; CHECK-NEXT:    [[TMP31:%.*]] = add i64 [[INDEX]], 31
-; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA:%.*]], i64 [[IDXPROM]], i64 [[TMP0]]
+; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[IDXPROM]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP37:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i32 8
 ; CHECK-NEXT:    [[TMP38:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i32 16
 ; CHECK-NEXT:    [[TMP39:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i32 24
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP32]], align 4, !tbaa [[TBAA1:![0-9]+]]
-; CHECK-NEXT:    [[WIDE_LOAD4:%.*]] = load <8 x i32>, ptr [[TMP37]], align 4, !tbaa [[TBAA1]]
-; CHECK-NEXT:    [[WIDE_LOAD5:%.*]] = load <8 x i32>, ptr [[TMP38]], align 4, !tbaa [[TBAA1]]
-; CHECK-NEXT:    [[WIDE_LOAD6:%.*]] = load <8 x i32>, ptr [[TMP39]], align 4, !tbaa [[TBAA1]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP32]], align 4, !tbaa [[INT_TBAA1:![0-9]+]]
+; CHECK-NEXT:    [[WIDE_LOAD4:%.*]] = load <8 x i32>, ptr [[TMP37]], align 4, !tbaa [[INT_TBAA1]]
+; CHECK-NEXT:    [[WIDE_LOAD5:%.*]] = load <8 x i32>, ptr [[TMP38]], align 4, !tbaa [[INT_TBAA1]]
+; CHECK-NEXT:    [[WIDE_LOAD6:%.*]] = load <8 x i32>, ptr [[TMP39]], align 4, !tbaa [[INT_TBAA1]]
 ; CHECK-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP0]], i64 [[IDXPROM5]]
 ; CHECK-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP1]], i64 [[IDXPROM5]]
 ; CHECK-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP2]], i64 [[IDXPROM5]]
@@ -97,14 +98,14 @@ define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_u
 ; CHECK-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP29]], i64 [[IDXPROM5]]
 ; CHECK-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP30]], i64 [[IDXPROM5]]
 ; CHECK-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP31]], i64 [[IDXPROM5]]
-; CHECK-NEXT:    [[TMP72:%.*]] = load i32, ptr [[TMP40]], align 4, !tbaa [[TBAA1]]
-; CHECK-NEXT:    [[TMP73:%.*]] = load i32, ptr [[TMP41]], align 4, !tbaa [[TBAA1]]
-; CHECK-NEXT:    [[TMP74:%.*]] = load i32, ptr [[TMP42]], align 4, !tbaa [[TBAA1]]
-; CHECK-NEXT:    [[TMP75:%.*]] = load i32, ptr [[TMP43]], align 4, !tbaa [[TBAA1]]
-; CHECK-NEXT:    [[TMP76:%.*]] = load i32, ptr [[TMP44]], align 4, !tbaa [[TBAA1]]
-; CHECK-NEXT:    [[TMP77:%.*]] = load i32, ptr [[TMP45]], align 4, !tbaa [[TBAA1]]
-; CHECK-NEXT:    [[TMP78:%.*]] = load i32, ptr [[TMP46]], align 4, !tbaa [[TBAA1]]
-; CHECK-NEXT:    [[TMP79:%.*]] = load i32, ptr [[TMP47]], align 4, !tbaa [[TBAA1]]
+; CHECK-NEXT:    [[TMP72:%.*]] = load i32, ptr [[TMP40]], align 4, !tbaa [[INT_TBAA1]]
+; CHECK-NEXT:    [[TMP73:%.*]] = load i32, ptr [[TMP41]], align 4, !tbaa [[INT_TBAA1]]
+; CHECK-NEXT:    [[TMP74:%.*]] = load i32, ptr [[TMP42]], align 4, !tbaa [[INT_TBAA1]]
+; CHECK-NEXT:    [[TMP75:%.*]] = load i32, ptr [[TMP43]], align 4, !tbaa [[INT_TBAA1]]
+; CHECK-NEXT:    [[TMP76:%.*]] = load i32, ptr [[TMP44]], align 4, !tbaa [[INT_TBAA1]]
+; CHECK-NEXT:    [[TMP77:%.*]] = load i32, ptr [[TMP45]], align 4, !tbaa [[INT_TBAA1]]
+; CHECK-NEXT:    [[TMP78:%.*]] = load i32, ptr [[TMP46]], align 4, !tbaa [[INT_TBAA1]]
+; CHECK-NEXT:    [[TMP79:%.*]] = load i32, ptr [[TMP47]], align 4, !tbaa [[INT_TBAA1]]
 ; CHECK-NEXT:    [[TMP80:%.*]] = insertelement <8 x i32> poison, i32 [[TMP72]], i32 0
 ; CHECK-NEXT:    [[TMP81:%.*]] = insertelement <8 x i32> [[TMP80]], i32 [[TMP73]], i32 1
 ; CHECK-NEXT:    [[TMP82:%.*]] = insertelement <8 x i32> [[TMP81]], i32 [[TMP74]], i32 2
@@ -113,14 +114,14 @@ define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_u
 ; CHECK-NEXT:    [[TMP85:%.*]] = insertelement <8 x i32> [[TMP84]], i32 [[TMP77]], i32 5
 ; CHECK-NEXT:    [[TMP86:%.*]] = insertelement <8 x i32> [[TMP85]], i32 [[TMP78]], i32 6
 ; CHECK-NEXT:    [[TMP87:%.*]] = insertelement <8 x i32> [[TMP86]], i32 [[TMP79]], i32 7
-; CHECK-NEXT:    [[TMP88:%.*]] = load i32, ptr [[TMP48]], align 4, !tbaa [[TBAA1]]
-; CHECK-NEXT:    [[TMP89:%.*]] = load i32, ptr [[TMP49]], align 4, !tbaa [[TBAA1]]
-; CHECK-NEXT:    [[TMP90:%.*]] = load i32, ptr [[TMP50]], align 4, !tbaa [[TBAA1]]
-; CHECK-NEXT:    [[TMP91:%.*]] = load i32, ptr [[TMP51]], align 4, !tbaa [[TBAA1]]
-; CHECK-NEXT:    [[TMP92:%.*]] = load i32, ptr [[TMP52]], align 4, !tbaa [[TBAA1]]
-; CHECK-NEXT:    [[TMP93:%.*]] = load i32, ptr [[TMP53]], align 4, !tbaa [[TBAA1]]
-; CHECK-NEXT:    [[TMP94:%.*]] = load i32, ptr [[TMP54]], align 4, !tbaa [[TBAA1]]
-; CHECK-NEXT:    [[TMP95:%.*]] = load i32, ptr [[TMP55]], align 4, !tbaa [[TBAA1]]
+; CHECK-NEXT:    [[TMP88:%.*]] = load i32, ptr [[TMP48]], align 4, !tbaa [[INT_TBAA1]]
+; CHECK-NEXT:    [[TMP89:%.*]] = load i32, ptr [[TMP49]], align 4, !tbaa [[INT_TBAA1]]
+; CHECK-NEXT:    [[TMP90:%.*]] = load i32, ptr [[TMP50]], align 4, !tbaa [[INT_TBAA1]]
+; CHECK-NEXT:    [[TMP91:%.*]] = load i32, ptr [[TMP51]], align 4, !tbaa [[INT_TBAA1]]
+; CHECK-NEXT:    [[TMP92:%.*]] = load i32, ptr [[TMP52]], align 4, !tbaa [[INT_TBAA1]]
+; CHECK-NEXT:    [[TMP93:%.*]] = load i32, ptr [[TMP53]], align 4, !tbaa [[INT_TBAA1]]
+; CHECK-NEXT:    [[TMP94:%.*]] = load i32, ptr [[TMP54]], align 4, !tbaa [[INT_TBAA1]]
+; CHECK-NEXT:    [[TMP95:%.*]] = load i32, ptr [[TMP55]], align 4, !tbaa [[INT_TBAA1]]
 ; CHECK-NEXT:    [[TMP96:%.*]] = insertelement <8 x i32> poison, i32 [[TMP88]], i32 0
 ; CHECK-NEXT:    [[TMP97:%.*]] = insertelement <8 x i32> [[TMP96]], i32 [[TMP89]], i32 1
 ; CHECK-NEXT:    [[TMP98:%.*]] = insertelement <8 x i32> [[TMP97]], i32 [[TMP90]], i32 2
@@ -129,14 +130,14 @@ define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_u
 ; CHECK-NEXT:    [[TMP101:%.*]] = insertelement <8 x i32> [[TMP100]], i32 [[TMP93]], i32 5
 ; CHECK-NEXT:    [[TMP102:%.*]] = insertelement <8 x i32> [[TMP101]], i32 [[TMP94]], i32 6
 ; CHECK-NEXT:    [[TMP103:%.*]] = insertelement <8 x i32> [[TMP102]], i32 [[TMP95]], i32 7
-; CHECK-NEXT:    [[TMP104:%.*]] = load i32, ptr [[TMP56]], align 4, !tbaa [[TBAA1]]
-; CHECK-NEXT:    [[TMP105:%.*]] = load i32, ptr [[TMP57]], align 4, !tbaa [[TBAA1]]
-; CHECK-NEXT:    [[TMP106:%.*]] = load i32, ptr [[TMP58]], align 4, !tbaa [[TBAA1]]
-; CHECK-NEXT:    [[TMP107:%.*]] = load i32, ptr [[TMP59]], align 4, !tbaa [[TBAA1]]
-; CHECK-NEXT:    [[TMP108:%.*]] = load i32, ptr [[TMP60]], align 4, !tbaa [[TBAA1]]
-; CHECK-NEXT:    [[TMP109:%.*]] = load i32, ptr [[TMP61]], align 4, !tbaa [[TBAA1]]
-; CHECK-NEXT:    [[TMP110:%.*]] = load i32, ptr [[TMP62]], align 4, !tbaa [[TBAA1]]
-; CHECK-NEXT:    [[TMP111:%.*]] = load i32, ptr [[TMP63]], align 4, !tbaa [[TBAA1]]
+; CHECK-NEXT:    [[TMP104:%.*]] = load i32, ptr [[TMP56]], align 4, !tbaa [[INT_TBAA1]]
+; CHECK-NEXT:    [[TMP105:%.*]] = load i32, ptr [[TMP57]], align 4, !tbaa [[INT_TBAA1]]
+; CHECK-NEXT:    [[TMP106:%.*]] = load i32, ptr [[TMP58]], align 4, !tbaa [[INT_TBAA1]]
+; CHECK-NEXT:    [[TMP107:%.*]] = load i32, ptr [[TMP59]], align 4, !tbaa [[INT_TBAA1]]
+; CHECK-NEXT:    [[TMP108:%.*]] = load i32, ptr [[TMP60]], align 4, !tbaa [[INT_TBAA1]]
+; CHECK-NEXT:    [[TMP109:%.*]] = load i32, ptr [[TMP61]], align 4, !tbaa [[INT_TBAA1]]
+; CHECK-NEXT:    [[TMP110:%.*]] = load i32, ptr [[TMP62]], align 4, !tbaa [[INT_TBAA1]]
+; CHECK-NEXT:    [[TMP111:%.*]] = load i32, ptr [[TMP63]], align 4, !tbaa [[INT_TBAA1]]
 ; CHECK-NEXT:    [[TMP112:%.*]] = insertelement <8 x i32> poison, i32 [[TMP104]], i32 0
 ; CHECK-NEXT:    [[TMP113:%.*]] = insertelement <8 x i32> [[TMP112]], i32 [[TMP105]], i32 1
 ; CHECK-NEXT:    [[TMP114:%.*]] = insertelement <8 x i32> [[TMP113]], i32 [[TMP106]], i32 2
@@ -145,14 +146,14 @@ define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_u
 ; CHECK-NEXT:    [[TMP117:%.*]] = insertelement <8 x i32> [[TMP116]], i32 [[TMP109]], i32 5
 ; CHECK-NEXT:    [[TMP118:%.*]] = insertelement <8 x i32> [[TMP117]], i32 [[TMP110]], i32 6
 ; CHECK-NEXT:    [[TMP119:%.*]] = insertelement <8 x i32> [[TMP118]], i32 [[TMP111]], i32 7
-; CHECK-NEXT:    [[TMP120:%.*]] = load i32, ptr [[TMP64]], align 4, !tbaa [[TBAA1]]
-; CHECK-NEXT:    [[TMP121:%.*]] = load i32, ptr [[TMP65]], align 4, !tbaa [[TBAA1]]
-; CHECK-NEXT:    [[TMP122:%.*]] = load i32, ptr [[TMP66]], align 4, !tbaa [[TBAA1]]
-; CHECK-NEXT:    [[TMP123:%.*]] = load i32, ptr [[TMP67]], align 4, !tbaa [[TBAA1]]
-; CHECK-NEXT:    [[TMP124:%.*]] = load i32, ptr [[TMP68]], align 4, !tbaa [[TBAA1]]
-; CHECK-NEXT:    [[TMP125:%.*]] = load i32, ptr [[TMP69]], align 4, !tbaa [[TBAA1]]
-; CHECK-NEXT:    [[TMP126:%.*]] = load i32, ptr [[TMP70]], align 4, !tbaa [[TBAA1]]
-; CHECK-NEXT:    [[TMP127:%.*]] = load i32, ptr [[TMP71]], align 4, !tbaa [[TBAA1]]
+; CHECK-NEXT:    [[TMP120:%.*]] = load i32, ptr [[TMP64]], align 4, !tbaa [[INT_TBAA1]]
+; CHECK-NEXT:    [[TMP121:%.*]] = load i32, ptr [[TMP65]], align 4, !tbaa [[INT_TBAA1]]
+; CHECK-NEXT:    [[TMP122:%.*]] = load i32, ptr [[TMP66]], align 4, !tbaa [[INT_TBAA1]]
+; CHECK-NEXT:    [[TMP123:%.*]] = load i32, ptr [[TMP67]], align 4, !tbaa [[INT_TBAA1]]
+; CHECK-NEXT:    [[TMP124:%.*]] = load i32, ptr [[TMP68]], align 4, !tbaa [[INT_TBAA1]]
+; CHECK-NEXT:    [[TMP125:%.*]] = load i32, ptr [[TMP69]], align 4, !tbaa [[INT_TBAA1]]
+; CHECK-NEXT:    [[TMP126:%.*]] = load i32, ptr [[TMP70]], align 4, !tbaa [[INT_TBAA1]]
+; CHECK-NEXT:    [[TMP127:%.*]] = load i32, ptr [[TMP71]], align 4, !tbaa [[INT_TBAA1]]
 ; CHECK-NEXT:    [[TMP128:%.*]] = insertelement <8 x i32> poison, i32 [[TMP120]], i32 0
 ; CHECK-NEXT:    [[TMP129:%.*]] = insertelement <8 x i32> [[TMP128]], i32 [[TMP121]], i32 1
 ; CHECK-NEXT:    [[TMP130:%.*]] = insertelement <8 x i32> [[TMP129]], i32 [[TMP122]], i32 2
@@ -175,37 +176,37 @@ define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_u
 ; CHECK-NEXT:    [[TMP147]] = add <8 x i32> [[TMP143]], [[TMP139]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
 ; CHECK-NEXT:    [[TMP148:%.*]] = icmp eq i64 [[INDEX_NEXT]], 96
-; CHECK-NEXT:    br i1 [[TMP148]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
-; CHECK:       middle.block:
+; CHECK-NEXT:    br i1 [[TMP148]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK:       [[MIDDLE_BLOCK]]:
 ; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <8 x i32> [[TMP145]], [[TMP144]]
 ; CHECK-NEXT:    [[BIN_RDX7:%.*]] = add <8 x i32> [[TMP146]], [[BIN_RDX]]
 ; CHECK-NEXT:    [[BIN_RDX8:%.*]] = add <8 x i32> [[TMP147]], [[BIN_RDX7]]
 ; CHECK-NEXT:    [[TMP149:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[BIN_RDX8]])
-; CHECK-NEXT:    br i1 false, label [[FOR_COND_CLEANUP:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
-; CHECK:       vec.epilog.iter.check:
-; CHECK-NEXT:    br i1 false, label [[SCALAR_PH]], label [[VEC_EPILOG_PH]]
-; CHECK:       vec.epilog.ph:
-; CHECK-NEXT:    [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 96, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_PH]] ]
-; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP149]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_PH]] ]
+; CHECK-NEXT:    br i1 false, label %[[FOR_COND_CLEANUP:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]]
+; CHECK:       [[VEC_EPILOG_ITER_CHECK]]:
+; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH]], label %[[VEC_EPILOG_PH]]
+; CHECK:       [[VEC_EPILOG_PH]]:
+; CHECK-NEXT:    [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 96, %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_PH]] ]
+; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP149]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_PH]] ]
 ; CHECK-NEXT:    [[TMP171:%.*]] = insertelement <4 x i32> zeroinitializer, i32 [[BC_MERGE_RDX]], i32 0
-; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
-; CHECK:       vec.epilog.vector.body:
-; CHECK-NEXT:    [[INDEX9:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT12:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI10:%.*]] = phi <4 x i32> [ [[TMP171]], [[VEC_EPILOG_PH]] ], [ [[TMP168:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
+; CHECK:       [[FOR_BODY]]:
+; CHECK-NEXT:    [[INDEX9:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT12:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI10:%.*]] = phi <4 x i32> [ [[TMP171]], %[[VEC_EPILOG_PH]] ], [ [[TMP168:%.*]], %[[FOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP172:%.*]] = add i64 [[INDEX9]], 0
 ; CHECK-NEXT:    [[TMP173:%.*]] = add i64 [[INDEX9]], 1
 ; CHECK-NEXT:    [[TMP174:%.*]] = add i64 [[INDEX9]], 2
 ; CHECK-NEXT:    [[TMP175:%.*]] = add i64 [[INDEX9]], 3
 ; CHECK-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[IDXPROM]], i64 [[TMP172]]
-; CHECK-NEXT:    [[WIDE_LOAD11:%.*]] = load <4 x i32>, ptr [[TMP152]], align 4, !tbaa [[TBAA1]]
+; CHECK-NEXT:    [[WIDE_LOAD11:%.*]] = load <4 x i32>, ptr [[TMP152]], align 4, !tbaa [[INT_TBAA1]]
 ; CHECK-NEXT:    [[TMP154:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP172]], i64 [[IDXPROM5]]
 ; CHECK-NEXT:    [[TMP155:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP173]], i64 [[IDXPROM5]]
 ; CHECK-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP174]], i64 [[IDXPROM5]]
 ; CHECK-NEXT:    [[TMP157:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP175]], i64 [[IDXPROM5]]
-; CHECK-NEXT:    [[TMP158:%.*]] = load i32, ptr [[TMP154]], align 4, !tbaa [[TBAA1]]
-; CHECK-NEXT:    [[TMP159:%.*]] = load i32, ptr [[TMP155]], align 4, !tbaa [[TBAA1]]
-; CHECK-NEXT:    [[TMP160:%.*]] = load i32, ptr [[TMP156]], align 4, !tbaa [[TBAA1]]
-; CHECK-NEXT:    [[TMP161:%.*]] = load i32, ptr [[TMP157]], align 4, !tbaa [[TBAA1]]
+; CHECK-NEXT:    [[TMP158:%.*]] = load i32, ptr [[TMP154]], align 4, !tbaa [[INT_TBAA1]]
+; CHECK-NEXT:    [[TMP159:%.*]] = load i32, ptr [[TMP155]], align 4, !tbaa [[INT_TBAA1]]
+; CHECK-NEXT:    [[TMP160:%.*]] = load i32, ptr [[TMP156]], align 4, !tbaa [[INT_TBAA1]]
+; CHECK-NEXT:    [[TMP161:%.*]] = load i32, ptr [[TMP157]], align 4, !tbaa [[INT_TBAA1]]
 ; CHECK-NEXT:    [[TMP162:%.*]] = insertelement <4 x i32> poison, i32 [[TMP158]], i32 0
 ; CHECK-NEXT:    [[TMP163:%.*]] = insertelement <4 x i32> [[TMP162]], i32 [[TMP159]], i32 1
 ; CHECK-NEXT:    [[TMP164:%.*]] = insertelement <4 x i32> [[TMP163]], i32 [[TMP160]], i32 2
@@ -215,46 +216,47 @@ define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_u
 ; CHECK-NEXT:    [[TMP168]] = add <4 x i32> [[TMP167]], [[TMP166]]
 ; CHECK-NEXT:    [[INDEX_NEXT12]] = add nuw i64 [[INDEX9]], 4
 ; CHECK-NEXT:    [[TMP169:%.*]] = icmp eq i64 [[INDEX_NEXT12]], 100
-; CHECK-NEXT:    br i1 [[TMP169]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
-; CHECK:       vec.epilog.middle.block:
+; CHECK-NEXT:    br i1 [[TMP169]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[FOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK:       [[VEC_EPILOG_MIDDLE_BLOCK]]:
 ; CHECK-NEXT:    [[TMP170:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP168]])
-; CHECK-NEXT:    br i1 true, label [[FOR_COND_CLEANUP]], label [[SCALAR_PH]]
-; CHECK:       vec.epilog.scalar.ph:
-; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 100, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 96, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ]
-; CHECK-NEXT:    [[BC_MERGE_RDX13:%.*]] = phi i32 [ [[TMP170]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP149]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ]
-; CHECK-NEXT:    br label [[FOR_BODY1:%.*]]
-; CHECK:       for.cond.cleanup:
-; CHECK-NEXT:    [[ADD7_LCSSA:%.*]] = phi i32 [ [[ADD7:%.*]], [[FOR_BODY1]] ], [ [[TMP149]], [[MIDDLE_BLOCK]] ], [ [[TMP170]], [[VEC_EPILOG_MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    br i1 true, label %[[FOR_COND_CLEANUP]], label %[[SCALAR_PH]]
+; CHECK:       [[SCALAR_PH]]:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 100, %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 96, %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ]
+; CHECK-NEXT:    [[BC_MERGE_RDX13:%.*]] = phi i32 [ [[TMP170]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP149]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ]
+; CHECK-NEXT:    br label %[[FOR_BODY1:.*]]
+; CHECK:       [[FOR_COND_CLEANUP]]:
+; CHECK-NEXT:    [[ADD7_LCSSA:%.*]] = phi i32 [ [[ADD7:%.*]], %[[FOR_BODY1]] ], [ [[TMP149]], %[[MIDDLE_BLOCK]] ], [ [[TMP170]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ]
 ; CHECK-NEXT:    ret i32 [[ADD7_LCSSA]]
-; CHECK:       for.body:
-; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY1]] ]
-; CHECK-NEXT:    [[SUM_015:%.*]] = phi i32 [ [[BC_MERGE_RDX13]], [[SCALAR_PH]] ], [ [[ADD7]], [[FOR_BODY1]] ]
+; CHECK:       [[FOR_BODY1]]:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY1]] ]
+; CHECK-NEXT:    [[SUM_015:%.*]] = phi i32 [ [[BC_MERGE_RDX13]], %[[SCALAR_PH]] ], [ [[ADD7]], %[[FOR_BODY1]] ]
 ; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[IDXPROM]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[TMP150:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4, !tbaa [[TBAA1]]
+; CHECK-NEXT:    [[TMP150:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4, !tbaa [[INT_TBAA1]]
 ; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[INDVARS_IV]], i64 [[IDXPROM5]]
-; CHECK-NEXT:    [[TMP151:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4, !tbaa [[TBAA1]]
+; CHECK-NEXT:    [[TMP151:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4, !tbaa [[INT_TBAA1]]
 ; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP151]], [[TMP150]]
 ; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[SUM_015]], 4
 ; CHECK-NEXT:    [[ADD7]] = add i32 [[ADD]], [[MUL]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 100
-; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY1]], !llvm.loop [[LOOP9:![0-9]+]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY1]], !llvm.loop [[LOOP9:![0-9]+]]
 ;
-; MAX-BW-LABEL: @matrix_row_col(
-; MAX-BW-NEXT:  iter.check:
-; MAX-BW-NEXT:    [[IDXPROM:%.*]] = sext i32 [[I:%.*]] to i64
-; MAX-BW-NEXT:    [[IDXPROM5:%.*]] = sext i32 [[J:%.*]] to i64
-; MAX-BW-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
-; MAX-BW:       vector.main.loop.iter.check:
-; MAX-BW-NEXT:    br i1 false, label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH1:%.*]]
-; MAX-BW:       vector.ph:
-; MAX-BW-NEXT:    br label [[VECTOR_BODY:%.*]]
-; MAX-BW:       vector.body:
-; MAX-BW-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH1]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; MAX-BW-NEXT:    [[VEC_PHI:%.*]] = phi <8 x i32> [ zeroinitializer, [[VECTOR_PH1]] ], [ [[TMP144:%.*]], [[VECTOR_BODY]] ]
-; MAX-BW-NEXT:    [[VEC_PHI1:%.*]] = phi <8 x i32> [ zeroinitializer, [[VECTOR_PH1]] ], [ [[TMP145:%.*]], [[VECTOR_BODY]] ]
-; MAX-BW-NEXT:    [[VEC_PHI2:%.*]] = phi <8 x i32> [ zeroinitializer, [[VECTOR_PH1]] ], [ [[TMP146:%.*]], [[VECTOR_BODY]] ]
-; MAX-BW-NEXT:    [[VEC_PHI3:%.*]] = phi <8 x i32> [ zeroinitializer, [[VECTOR_PH1]] ], [ [[TMP147:%.*]], [[VECTOR_BODY]] ]
+; MAX-BW-LABEL: define i32 @matrix_row_col(
+; MAX-BW-SAME: ptr readonly captures(none) [[DATA:%.*]], i32 [[I:%.*]], i32 [[J:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; MAX-BW-NEXT:  [[ITER_CHECK:.*]]:
+; MAX-BW-NEXT:    [[IDXPROM:%.*]] = sext i32 [[I]] to i64
+; MAX-BW-NEXT:    [[IDXPROM5:%.*]] = sext i32 [[J]] to i64
+; MAX-BW-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; MAX-BW:       [[VECTOR_PH]]:
+; MAX-BW-NEXT:    br i1 false, label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH1:.*]]
+; MAX-BW:       [[VECTOR_PH1]]:
+; MAX-BW-NEXT:    br label %[[VECTOR_BODY:.*]]
+; MAX-BW:       [[VECTOR_BODY]]:
+; MAX-BW-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH1]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; MAX-BW-NEXT:    [[VEC_PHI:%.*]] = phi <8 x i32> [ zeroinitializer, %[[VECTOR_PH1]] ], [ [[TMP144:%.*]], %[[VECTOR_BODY]] ]
+; MAX-BW-NEXT:    [[VEC_PHI1:%.*]] = phi <8 x i32> [ zeroinitializer, %[[VECTOR_PH1]] ], [ [[TMP145:%.*]], %[[VECTOR_BODY]] ]
+; MAX-BW-NEXT:    [[VEC_PHI2:%.*]] = phi <8 x i32> [ zeroinitializer, %[[VECTOR_PH1]] ], [ [[TMP146:%.*]], %[[VECTOR_BODY]] ]
+; MAX-BW-NEXT:    [[VEC_PHI3:%.*]] = phi <8 x i32> [ zeroinitializer, %[[VECTOR_PH1]] ], [ [[TMP147:%.*]], %[[VECTOR_BODY]] ]
 ; MAX-BW-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
 ; MAX-BW-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
 ; MAX-BW-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 2
@@ -287,14 +289,14 @@ define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_u
 ; MAX-BW-NEXT:    [[TMP29:%.*]] = add i64 [[INDEX]], 29
 ; MAX-BW-NEXT:    [[TMP30:%.*]] = add i64 [[INDEX]], 30
 ; MAX-BW-NEXT:    [[TMP31:%.*]] = add i64 [[INDEX]], 31
-; MAX-BW-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA:%.*]], i64 [[IDXPROM]], i64 [[TMP0]]
+; MAX-BW-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[IDXPROM]], i64 [[TMP0]]
 ; MAX-BW-NEXT:    [[TMP37:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i32 8
 ; MAX-BW-NEXT:    [[TMP38:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i32 16
 ; MAX-BW-NEXT:    [[TMP39:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i32 24
-; MAX-BW-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP32]], align 4, !tbaa [[TBAA1:![0-9]+]]
-; MAX-BW-NEXT:    [[WIDE_LOAD4:%.*]] = load <8 x i32>, ptr [[TMP37]], align 4, !tbaa [[TBAA1]]
-; MAX-BW-NEXT:    [[WIDE_LOAD5:%.*]] = load <8 x i32>, ptr [[TMP38]], align 4, !tbaa [[TBAA1]]
-; MAX-BW-NEXT:    [[WIDE_LOAD6:%.*]] = load <8 x i32>, ptr [[TMP39]], align 4, !tbaa [[TBAA1]]
+; MAX-BW-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP32]], align 4, !tbaa [[INT_TBAA1:![0-9]+]]
+; MAX-BW-NEXT:    [[WIDE_LOAD4:%.*]] = load <8 x i32>, ptr [[TMP37]], align 4, !tbaa [[INT_TBAA1]]
+; MAX-BW-NEXT:    [[WIDE_LOAD5:%.*]] = load <8 x i32>, ptr [[TMP38]], align 4, !tbaa [[INT_TBAA1]]
+; MAX-BW-NEXT:    [[WIDE_LOAD6:%.*]] = load <8 x i32>, ptr [[TMP39]], align 4, !tbaa [[INT_TBAA1]]
 ; MAX-BW-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP0]], i64 [[IDXPROM5]]
 ; MAX-BW-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP1]], i64 [[IDXPROM5]]
 ; MAX-BW-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP2]], i64 [[IDXPROM5]]
@@ -327,14 +329,14 @@ define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_u
 ; MAX-BW-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP29]], i64 [[IDXPROM5]]
 ; MAX-BW-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP30]], i64 [[IDXPROM5]]
 ; MAX-BW-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP31]], i64 [[IDXPROM5]]
-; MAX-BW-NEXT:    [[TMP72:%.*]] = load i32, ptr [[TMP40]], align 4, !tbaa [[TBAA1]]
-; MAX-BW-NEXT:    [[TMP73:%.*]] = load i32, ptr [[TMP41]], align 4, !tbaa [[TBAA1]]
-; MAX-BW-NEXT:    [[TMP74:%.*]] = load i32, ptr [[TMP42]], align 4, !tbaa [[TBAA1]]
-; MAX-BW-NEXT:    [[TMP75:%.*]] = load i32, ptr [[TMP43]], align 4, !tbaa [[TBAA1]]
-; MAX-BW-NEXT:    [[TMP76:%.*]] = load i32, ptr [[TMP44]], align 4, !tbaa [[TBAA1]]
-; MAX-BW-NEXT:    [[TMP77:%.*]] = load i32, ptr [[TMP45]], align 4, !tbaa [[TBAA1]]
-; MAX-BW-NEXT:    [[TMP78:%.*]] = load i32, ptr [[TMP46]], align 4, !tbaa [[TBAA1]]
-; MAX-BW-NEXT:    [[TMP79:%.*]] = load i32, ptr [[TMP47]], align 4, !tbaa [[TBAA1]]
+; MAX-BW-NEXT:    [[TMP72:%.*]] = load i32, ptr [[TMP40]], align 4, !tbaa [[INT_TBAA1]]
+; MAX-BW-NEXT:    [[TMP73:%.*]] = load i32, ptr [[TMP41]], align 4, !tbaa [[INT_TBAA1]]
+; MAX-BW-NEXT:    [[TMP74:%.*]] = load i32, ptr [[TMP42]], align 4, !tbaa [[INT_TBAA1]]
+; MAX-BW-NEXT:    [[TMP75:%.*]] = load i32, ptr [[TMP43]], align 4, !tbaa [[INT_TBAA1]]
+; MAX-BW-NEXT:    [[TMP76:%.*]] = load i32, ptr [[TMP44]], align 4, !tbaa [[INT_TBAA1]]
+; MAX-BW-NEXT:    [[TMP77:%.*]] = load i32, ptr [[TMP45]], align 4, !tbaa [[INT_TBAA1]]
+; MAX-BW-NEXT:    [[TMP78:%.*]] = load i32, ptr [[TMP46]], align 4, !tbaa [[INT_TBAA1]]
+; MAX-BW-NEXT:    [[TMP79:%.*]] = load i32, ptr [[TMP47]], align 4, !tbaa [[INT_TBAA1]]
 ; MAX-BW-NEXT:    [[TMP80:%.*]] = insertelement <8 x i32> poison, i32 [[TMP72]], i32 0
 ; MAX-BW-NEXT:    [[TMP81:%.*]] = insertelement <8 x i32> [[TMP80]], i32 [[TMP73]], i32 1
 ; MAX-BW-NEXT:    [[TMP82:%.*]] = insertelement <8 x i32> [[TMP81]], i32 [[TMP74]], i32 2
@@ -343,14 +345,14 @@ define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_u
 ; MAX-BW-NEXT:    [[TMP85:%.*]] = insertelement <8 x i32> [[TMP84]], i32 [[TMP77]], i32 5
 ; MAX-BW-NEXT:    [[TMP86:%.*]] = insertelement <8 x i32> [[TMP85]], i32 [[TMP78]], i32 6
 ; MAX-BW-NEXT:    [[TMP87:%.*]] = insertelement <8 x i32> [[TMP86]], i32 [[TMP79]], i32 7
-; MAX-BW-NEXT:    [[TMP88:%.*]] = load i32, ptr [[TMP48]], align 4, !tbaa [[TBAA1]]
-; MAX-BW-NEXT:    [[TMP89:%.*]] = load i32, ptr [[TMP49]], align 4, !tbaa [[TBAA1]]
-; MAX-BW-NEXT:    [[TMP90:%.*]] = load i32, ptr [[TMP50]], align 4, !tbaa [[TBAA1]]
-; MAX-BW-NEXT:    [[TMP91:%.*]] = load i32, ptr [[TMP51]], align 4, !tbaa [[TBAA1]]
-; MAX-BW-NEXT:    [[TMP92:%.*]] = load i32, ptr [[TMP52]], align 4, !tbaa [[TBAA1]]
-; MAX-BW-NEXT:    [[TMP93:%.*]] = load i32, ptr [[TMP53]], align 4, !tbaa [[TBAA1]]
-; MAX-BW-NEXT:    [[TMP94:%.*]] = load i32, ptr [[TMP54]], align 4, !tbaa [[TBAA1]]
-; MAX-BW-NEXT:    [[TMP95:%.*]] = load i32, ptr [[TMP55]], align 4, !tbaa [[TBAA1]]
+; MAX-BW-NEXT:    [[TMP88:%.*]] = load i32, ptr [[TMP48]], align 4, !tbaa [[INT_TBAA1]]
+; MAX-BW-NEXT:    [[TMP89:%.*]] = load i32, ptr [[TMP49]], align 4, !tbaa [[INT_TBAA1]]
+; MAX-BW-NEXT:    [[TMP90:%.*]] = load i32, ptr [[TMP50]], align 4, !tbaa [[INT_TBAA1]]
+; MAX-BW-NEXT:    [[TMP91:%.*]] = load i32, ptr [[TMP51]], align 4, !tbaa [[INT_TBAA1]]
+; MAX-BW-NEXT:    [[TMP92:%.*]] = load i32, ptr [[TMP52]], align 4, !tbaa [[INT_TBAA1]]
+; MAX-BW-NEXT:    [[TMP93:%.*]] = load i32, ptr [[TMP53]], align 4, !tbaa [[INT_TBAA1]]
+; MAX-BW-NEXT:    [[TMP94:%.*]] = load i32, ptr [[TMP54]], align 4, !tbaa [[INT_TBAA1]]
+; MAX-BW-NEXT:    [[TMP95:%.*]] = load i32, ptr [[TMP55]], align 4, !tbaa [[INT_TBAA1]]
 ; MAX-BW-NEXT:    [[TMP96:%.*]] = insertelement <8 x i32> poison, i32 [[TMP88]], i32 0
 ; MAX-BW-NEXT:    [[TMP97:%.*]] = insertelement <8 x i32> [[TMP96]], i32 [[TMP89]], i32 1
 ; MAX-BW-NEXT:    [[TMP98:%.*]] = insertelement <8 x i32> [[TMP97]], i32 [[TMP90]], i32 2
@@ -359,14 +361,14 @@ define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_u
 ; MAX-BW-NEXT:    [[TMP101:%.*]] = insertelement <8 x i32> [[TMP100]], i32 [[TMP93]], i32 5
 ; MAX-BW-NEXT:    [[TMP102:%.*]] = insertelement <8 x i32> [[TMP101]], i32 [[TMP94]], i32 6
 ; MAX-BW-NEXT:    [[TMP103:%.*]] = insertelement <8 x i32> [[TMP102]], i32 [[TMP95]], i32 7
-; MAX-BW-NEXT:    [[TMP104:%.*]] = load i32, ptr [[TMP56]], align 4, !tbaa [[TBAA1]]
-; MAX-BW-NEXT:    [[TMP105:%.*]] = load i32, ptr [[TMP57]], align 4, !tbaa [[TBAA1]]
-; MAX-BW-NEXT:    [[TMP106:%.*]] = load i32, ptr [[TMP58]], align 4, !tbaa [[TBAA1]]
-; MAX-BW-NEXT:    [[TMP107:%.*]] = load i32, ptr [[TMP59]], align 4, !tbaa [[TBAA1]]
-; MAX-BW-NEXT:    [[TMP108:%.*]] = load i32, ptr [[TMP60]], align 4, !tbaa [[TBAA1]]
-; MAX-BW-NEXT:    [[TMP109:%.*]] = load i32, ptr [[TMP61]], align 4, !tbaa [[TBAA1]]
-; MAX-BW-NEXT:    [[TMP110:%.*]] = load i32, ptr [[TMP62]], align 4, !tbaa [[TBAA1]]
-; MAX-BW-NEXT:    [[TMP111:%.*]] = load i32, ptr [[TMP63]], align 4, !tbaa [[TBAA1]]
+; MAX-BW-NEXT:    [[TMP104:%.*]] = load i32, ptr [[TMP56]], align 4, !tbaa [[INT_TBAA1]]
+; MAX-BW-NEXT:    [[TMP105:%.*]] = load i32, ptr [[TMP57]], align 4, !tbaa [[INT_TBAA1]]
+; MAX-BW-NEXT:    [[TMP106:%.*]] = load i32, ptr [[TMP58]], align 4, !tbaa [[INT_TBAA1]]
+; MAX-BW-NEXT:    [[TMP107:%.*]] = load i32, ptr [[TMP59]], align 4, !tbaa [[INT_TBAA1]]
+; MAX-BW-NEXT:    [[TMP108:%.*]] = load i32, ptr [[TMP60]], align 4, !tbaa [[INT_TBAA1]]
+; MAX-BW-NEXT:    [[TMP109:%.*]] = load i32, ptr [[TMP61]], align 4, !tbaa [[INT_TBAA1]]
+; MAX-BW-NEXT:    [[TMP110:%.*]] = load i32, ptr [[TMP62]], align 4, !tbaa [[INT_TBAA1]]
+; MAX-BW-NEXT:    [[TMP111:%.*]] = load i32, ptr [[TMP63]], align 4, !tbaa [[INT_TBAA1]]
 ; MAX-BW-NEXT:    [[TMP112:%.*]] = insertelement <8 x i32> poison, i32 [[TMP104]], i32 0
 ; MAX-BW-NEXT:    [[TMP113:%.*]] = insertelement <8 x i32> [[TMP112]], i32 [[TMP105]], i32 1
 ; MAX-BW-NEXT:    [[TMP114:%.*]] = insertelement <8 x i32> [[TMP113]], i32 [[TMP106]], i32 2
@@ -375,14 +377,14 @@ define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_u
 ; MAX-BW-NEXT:    [[TMP117:%.*]] = insertelement <8 x i32> [[TMP116]], i32 [[TMP109]], i32 5
 ; MAX-BW-NEXT:    [[TMP118:%.*]] = insertelement <8 x i32> [[TMP117]], i32 [[TMP110]], i32 6
 ; MAX-BW-NEXT:    [[TMP119:%.*]] = insertelement <8 x i32> [[TMP118]], i32 [[TMP111]], i32 7
-; MAX-BW-NEXT:    [[TMP120:%.*]] = load i32, ptr [[TMP64]], align 4, !tbaa [[TBAA1]]
-; MAX-BW-NEXT:    [[TMP121:%.*]] = load i32, ptr [[TMP65]], align 4, !tbaa [[TBAA1]]
-; MAX-BW-NEXT:    [[TMP122:%.*]] = load i32, ptr [[TMP66]], align 4, !tbaa [[TBAA1]]
-; MAX-BW-NEXT:    [[TMP123:%.*]] = load i32, ptr [[TMP67]], align 4, !tbaa [[TBAA1]]
-; MAX-BW-NEXT:    [[TMP124:%.*]] = load i32, ptr [[TMP68]], align 4, !tbaa [[TBAA1]]
-; MAX-BW-NEXT:    [[TMP125:%.*]] = load i32, ptr [[TMP69]], align 4, !tbaa [[TBAA1]]
-; MAX-BW-NEXT:    [[TMP126:%.*]] = load i32, ptr [[TMP70]], align 4, !tbaa [[TBAA1]]
-; MAX-BW-NEXT:    [[TMP127:%.*]] = load i32, ptr [[TMP71]], align 4, !tbaa [[TBAA1]]
+; MAX-BW-NEXT:    [[TMP120:%.*]] = load i32, ptr [[TMP64]], align 4, !tbaa [[INT_TBAA1]]
+; MAX-BW-NEXT:    [[TMP121:%.*]] = load i32, ptr [[TMP65]], align 4, !tbaa [[INT_TBAA1]]
+; MAX-BW-NEXT:    [[TMP122:%.*]] = load i32, ptr [[TMP66]], align 4, !tbaa [[INT_TBAA1]]
+; MAX-BW-NEXT:    [[TMP123:%.*]] = load i32, ptr [[TMP67]], align 4, !tbaa [[INT_TBAA1]]
+; MAX-BW-NEXT:    [[TMP124:%.*]] = load i32, ptr [[TMP68]], align 4, !tbaa [[INT_TBAA1]]
+; MAX-BW-NEXT:    [[TMP125:%.*]] = load i32, ptr [[TMP69]], align 4, !tbaa [[INT_TBAA1]]
+; MAX-BW-NEXT:    [[TMP126:%.*]] = load i32, ptr [[TMP70]], align 4, !tbaa [[INT_TBAA1]]
+; MAX-BW-NEXT:    [[TMP127:%.*]] = load i32, ptr [[TMP71]], align 4, !tbaa [[INT_TBAA1]]
 ; MAX-BW-NEXT:    [[TMP128:%.*]] = insertelement <8 x i32> poison, i32 [[TMP120]], i32 0
 ; MAX-BW-NEXT:    [[TMP129:%.*]] = insertelement <8 x i32> [[TMP128]], i32 [[TMP121]], i32 1
 ; MAX-BW-NEXT:    [[TMP130:%.*]] = insertelement <8 x i32> [[TMP129]], i32 [[TMP122]], i32 2
@@ -405,37 +407,37 @@ define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_u
 ; MAX-BW-NEXT:    [[TMP147]] = add <8 x i32> [[TMP143]], [[TMP139]]
 ; MAX-BW-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
 ; MAX-BW-NEXT:    [[TMP148:%.*]] = icmp eq i64 [[INDEX_NEXT]], 96
-; MAX-BW-NEXT:    br i1 [[TMP148]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
-; MAX-BW:       middle.block:
+; MAX-BW-NEXT:    br i1 [[TMP148]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+; MAX-BW:       [[MIDDLE_BLOCK]]:
 ; MAX-BW-NEXT:    [[BIN_RDX:%.*]] = add <8 x i32> [[TMP145]], [[TMP144]]
 ; MAX-BW-NEXT:    [[BIN_RDX7:%.*]] = add <8 x i32> [[TMP146]], [[BIN_RDX]]
 ; MAX-BW-NEXT:    [[BIN_RDX8:%.*]] = add <8 x i32> [[TMP147]], [[BIN_RDX7]]
 ; MAX-BW-NEXT:    [[TMP149:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[BIN_RDX8]])
-; MAX-BW-NEXT:    br i1 false, label [[FOR_COND_CLEANUP:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
-; MAX-BW:       vec.epilog.iter.check:
-; MAX-BW-NEXT:    br i1 false, label [[SCALAR_PH]], label [[VEC_EPILOG_PH]]
-; MAX-BW:       vec.epilog.ph:
-; MAX-BW-NEXT:    [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 96, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_PH]] ]
-; MAX-BW-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP149]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_PH]] ]
+; MAX-BW-NEXT:    br i1 false, label %[[FOR_COND_CLEANUP:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]]
+; MAX-BW:       [[VEC_EPILOG_ITER_CHECK]]:
+; MAX-BW-NEXT:    br i1 false, label %[[SCALAR_PH]], label %[[VEC_EPILOG_PH]]
+; MAX-BW:       [[VEC_EPILOG_PH]]:
+; MAX-BW-NEXT:    [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 96, %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_PH]] ]
+; MAX-BW-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP149]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_PH]] ]
 ; MAX-BW-NEXT:    [[TMP171:%.*]] = insertelement <4 x i32> zeroinitializer, i32 [[BC_MERGE_RDX]], i32 0
-; MAX-BW-NEXT:    br label [[FOR_BODY:%.*]]
-; MAX-BW:       vec.epilog.vector.body:
-; MAX-BW-NEXT:    [[INDEX9:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT12:%.*]], [[FOR_BODY]] ]
-; MAX-BW-NEXT:    [[VEC_PHI10:%.*]] = phi <4 x i32> [ [[TMP171]], [[VEC_EPILOG_PH]] ], [ [[TMP168:%.*]], [[FOR_BODY]] ]
+; MAX-BW-NEXT:    br label %[[FOR_BODY:.*]]
+; MAX-BW:       [[FOR_BODY]]:
+; MAX-BW-NEXT:    [[INDEX9:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT12:%.*]], %[[FOR_BODY]] ]
+; MAX-BW-NEXT:    [[VEC_PHI10:%.*]] = phi <4 x i32> [ [[TMP171]], %[[VEC_EPILOG_PH]] ], [ [[TMP168:%.*]], %[[FOR_BODY]] ]
 ; MAX-BW-NEXT:    [[TMP172:%.*]] = add i64 [[INDEX9]], 0
 ; MAX-BW-NEXT:    [[TMP173:%.*]] = add i64 [[INDEX9]], 1
 ; MAX-BW-NEXT:    [[TMP174:%.*]] = add i64 [[INDEX9]], 2
 ; MAX-BW-NEXT:    [[TMP175:%.*]] = add i64 [[INDEX9]], 3
 ; MAX-BW-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[IDXPROM]], i64 [[TMP172]]
-; MAX-BW-NEXT:    [[WIDE_LOAD11:%.*]] = load <4 x i32>, ptr [[TMP152]], align 4, !tbaa [[TBAA1]]
+; MAX-BW-NEXT:    [[WIDE_LOAD11:%.*]] = load <4 x i32>, ptr [[TMP152]], align 4, !tbaa [[INT_TBAA1]]
 ; MAX-BW-NEXT:    [[TMP154:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP172]], i64 [[IDXPROM5]]
 ; MAX-BW-NEXT:    [[TMP155:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP173]], i64 [[IDXPROM5]]
 ; MAX-BW-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP174]], i64 [[IDXPROM5]]
 ; MAX-BW-NEXT:    [[TMP157:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP175]], i64 [[IDXPROM5]]
-; MAX-BW-NEXT:    [[TMP158:%.*]] = load i32, ptr [[TMP154]], align 4, !tbaa [[TBAA1]]
-; MAX-BW-NEXT:    [[TMP159:%.*]] = load i32, ptr [[TMP155]], align 4, !tbaa [[TBAA1]]
-; MAX-BW-NEXT:    [[TMP160:%.*]] = load i32, ptr [[TMP156]], align 4, !tbaa [[TBAA1]]
-; MAX-BW-NEXT:    [[TMP161:%.*]] = load i32, ptr [[TMP157]], align 4, !tbaa [[TBAA1]]
+; MAX-BW-NEXT:    [[TMP158:%.*]] = load i32, ptr [[TMP154]], align 4, !tbaa [[INT_TBAA1]]
+; MAX-BW-NEXT:    [[TMP159:%.*]] = load i32, ptr [[TMP155]], align 4, !tbaa [[INT_TBAA1]]
+; MAX-BW-NEXT:    [[TMP160:%.*]] = load i32, ptr [[TMP156]], align 4, !tbaa [[INT_TBAA1]]
+; MAX-BW-NEXT:    [[TMP161:%.*]] = load i32, ptr [[TMP157]], align 4, !tbaa [[INT_TBAA1]]
 ; MAX-BW-NEXT:    [[TMP162:%.*]] = insertelement <4 x i32> poison, i32 [[TMP158]], i32 0
 ; MAX-BW-NEXT:    [[TMP163:%.*]] = insertelement <4 x i32> [[TMP162]], i32 [[TMP159]], i32 1
 ; MAX-BW-NEXT:    [[TMP164:%.*]] = insertelement <4 x i32> [[TMP163]], i32 [[TMP160]], i32 2
@@ -445,30 +447,30 @@ define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_u
 ; MAX-BW-NEXT:    [[TMP168]] = add <4 x i32> [[TMP167]], [[TMP166]]
 ; MAX-BW-NEXT:    [[INDEX_NEXT12]] = add nuw i64 [[INDEX9]], 4
 ; MAX-BW-NEXT:    [[TMP169:%.*]] = icmp eq i64 [[INDEX_NEXT12]], 100
-; MAX-BW-NEXT:    br i1 [[TMP169]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
-; MAX-BW:       vec.epilog.middle.block:
+; MAX-BW-NEXT:    br i1 [[TMP169]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[FOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; MAX-BW:       [[VEC_EPILOG_MIDDLE_BLOCK]]:
 ; MAX-BW-NEXT:    [[TMP170:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP168]])
-; MAX-BW-NEXT:    br i1 true, label [[FOR_COND_CLEANUP]], label [[SCALAR_PH]]
-; MAX-BW:       vec.epilog.scalar.ph:
-; MAX-BW-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 100, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 96, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ]
-; MAX-BW-NEXT:    [[BC_MERGE_RDX13:%.*]] = phi i32 [ [[TMP170]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP149]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ]
-; MAX-BW-NEXT:    br label [[FOR_BODY1:%.*]]
-; MAX-BW:       for.cond.cleanup:
-; MAX-BW-NEXT:    [[ADD7_LCSSA:%.*]] = phi i32 [ [[ADD7:%.*]], [[FOR_BODY1]] ], [ [[TMP149]], [[MIDDLE_BLOCK]] ], [ [[TMP170]], [[VEC_EPILOG_MIDDLE_BLOCK]] ]
+; MAX-BW-NEXT:    br i1 true, label %[[FOR_COND_CLEANUP]], label %[[SCALAR_PH]]
+; MAX-BW:       [[SCALAR_PH]]:
+; MAX-BW-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 100, %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 96, %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ]
+; MAX-BW-NEXT:    [[BC_MERGE_RDX13:%.*]] = phi i32 [ [[TMP170]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP149]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ]
+; MAX-BW-NEXT:    br label %[[FOR_BODY1:.*]]
+; MAX-BW:       [[FOR_COND_CLEANUP]]:
+; MAX-BW-NEXT:    [[ADD7_LCSSA:%.*]] = phi i32 [ [[ADD7:%.*]], %[[FOR_BODY1]] ], [ [[TMP149]], %[[MIDDLE_BLOCK]] ], [ [[TMP170]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ]
 ; MAX-BW-NEXT:    ret i32 [[ADD7_LCSSA]]
-; MAX-BW:       for.body:
-; MAX-BW-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY1]] ]
-; MAX-BW-NEXT:    [[SUM_015:%.*]] = phi i32 [ [[BC_MERGE_RDX13]], [[SCALAR_PH]] ], [ [[ADD7]], [[FOR_BODY1]] ]
+; MAX-BW:       [[FOR_BODY1]]:
+; MAX-BW-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY1]] ]
+; MAX-BW-NEXT:    [[SUM_015:%.*]] = phi i32 [ [[BC_MERGE_RDX13]], %[[SCALAR_PH]] ], [ [[ADD7]], %[[FOR_BODY1]] ]
 ; MAX-BW-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[IDXPROM]], i64 [[INDVARS_IV]]
-; MAX-BW-NEXT:    [[TMP150:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4, !tbaa [[TBAA1]]
+; MAX-BW-NEXT:    [[TMP150:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4, !tbaa [[INT_TBAA1]]
 ; MAX-BW-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[INDVARS_IV]], i64 [[IDXPROM5]]
-; MAX-BW-NEXT:    [[TMP151:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4, !tbaa [[TBAA1]]
+; MAX-BW-NEXT:    [[TMP151:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4, !tbaa [[INT_TBAA1]]
 ; MAX-BW-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP151]], [[TMP150]]
 ; MAX-BW-NEXT:    [[ADD:%.*]] = add i32 [[SUM_015]], 4
 ; MAX-BW-NEXT:    [[ADD7]] = add i32 [[ADD]], [[MUL]]
 ; MAX-BW-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; MAX-BW-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 100
-; MAX-BW-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY1]], !llvm.loop [[LOOP9:![0-9]+]]
+; MAX-BW-NEXT:    br i1 [[EXITCOND]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY1]], !llvm.loop [[LOOP9:![0-9]+]]
 ;
 entry:
   %idxprom = sext i32 %i to i64
@@ -496,13 +498,14 @@ entry:
 }
 
 define void @test(ptr %A, ptr noalias %B) #0 {
-; CHECK-LABEL: @test(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
-; CHECK:       vector.ph:
-; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
-; CHECK:       vector.body:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-LABEL: define void @test(
+; CHECK-SAME: ptr [[A:%.*]], ptr noalias [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
@@ -520,13 +523,13 @@ define void @test(ptr %A, ptr noalias %B) #0 {
 ; CHECK-NEXT:    [[TMP13:%.*]] = add nuw nsw i64 [[TMP5]], 0
 ; CHECK-NEXT:    [[TMP14:%.*]] = add nuw nsw i64 [[TMP6]], 0
 ; CHECK-NEXT:    [[TMP15:%.*]] = add nuw nsw i64 [[TMP7]], 0
-; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A:%.*]], i64 0, i64 [[TMP8]]
+; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i64 0, i64 [[TMP8]]
 ; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <16 x i32>, ptr [[TMP16]], align 4
 ; CHECK-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
 ; CHECK-NEXT:    [[STRIDED_VEC1:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
 ; CHECK-NEXT:    [[TMP18:%.*]] = add <8 x i32> [[STRIDED_VEC]], [[STRIDED_VEC1]]
 ; CHECK-NEXT:    [[TMP19:%.*]] = trunc <8 x i32> [[TMP18]] to <8 x i8>
-; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B:%.*]], i64 0, i64 [[TMP8]]
+; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B]], i64 0, i64 [[TMP8]]
 ; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B]], i64 0, i64 [[TMP9]]
 ; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B]], i64 0, i64 [[TMP10]]
 ; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B]], i64 0, i64 [[TMP11]]
@@ -552,13 +555,13 @@ define void @test(ptr %A, ptr noalias %B) #0 {
 ; CHECK-NEXT:    store i8 [[TMP35]], ptr [[TMP27]], align 1
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
 ; CHECK-NEXT:    [[TMP36:%.*]] = icmp eq i64 [[INDEX_NEXT]], 512
-; CHECK-NEXT:    br i1 [[TMP36]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
-; CHECK:       middle.block:
-; CHECK-NEXT:    br label [[FOR_COND_CLEANUP:%.*]]
-; CHECK:       scalar.ph:
-; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
-; CHECK:       for.body:
-; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    br i1 [[TMP36]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
+; CHECK:       [[MIDDLE_BLOCK]]:
+; CHECK-NEXT:    br label %[[FOR_COND_CLEANUP:.*]]
+; CHECK:       [[SCALAR_PH]]:
+; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
+; CHECK:       [[FOR_BODY]]:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ]
 ; CHECK-NEXT:    [[IV_0:%.*]] = add nuw nsw i64 [[IV]], 0
 ; CHECK-NEXT:    [[IV_1:%.*]] = add nuw nsw i64 [[IV]], 1
 ; CHECK-NEXT:    [[IN0:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i64 0, i64 [[IV_0]]
@@ -571,17 +574,18 @@ define void @test(ptr %A, ptr noalias %B) #0 {
 ; CHECK-NEXT:    store i8 [[REDUCE_ADD_0_NARROW]], ptr [[OUT]], align 1
 ; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV_0]], 2
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[IV_NEXT]], 1024
-; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP]], !llvm.loop [[LOOP11:![0-9]+]]
-; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP]], !llvm.loop [[LOOP11:![0-9]+]]
+; CHECK:       [[FOR_COND_CLEANUP]]:
 ; CHECK-NEXT:    ret void
 ;
-; MAX-BW-LABEL: @test(
-; MAX-BW-NEXT:  entry:
-; MAX-BW-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
-; MAX-BW:       vector.ph:
-; MAX-BW-NEXT:    br label [[VECTOR_BODY:%.*]]
-; MAX-BW:       vector.body:
-; MAX-BW-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; MAX-BW-LABEL: define void @test(
+; MAX-BW-SAME: ptr [[A:%.*]], ptr noalias [[B:%.*]]) #[[ATTR0]] {
+; MAX-BW-NEXT:  [[ENTRY:.*:]]
+; MAX-BW-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; MAX-BW:       [[VECTOR_PH]]:
+; MAX-BW-NEXT:    br label %[[VECTOR_BODY:.*]]
+; MAX-BW:       [[VECTOR_BODY]]:
+; MAX-BW-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
 ; MAX-BW-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
 ; MAX-BW-NEXT:    [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
 ; MAX-BW-NEXT:    [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
@@ -615,13 +619,13 @@ define void @test(ptr %A, ptr noalias %B) #0 {
 ; MAX-BW-NEXT:    [[TMP29:%.*]] = add nuw nsw i64 [[TMP13]], 0
 ; MAX-BW-NEXT:    [[TMP30:%.*]] = add nuw nsw i64 [[TMP14]], 0
 ; MAX-BW-NEXT:    [[TMP31:%.*]] = add nuw nsw i64 [[TMP15]], 0
-; MAX-BW-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A:%.*]], i64 0, i64 [[TMP16]]
+; MAX-BW-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i64 0, i64 [[TMP16]]
 ; MAX-BW-NEXT:    [[WIDE_VEC:%.*]] = load <32 x i32>, ptr [[TMP32]], align 4
 ; MAX-BW-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <32 x i32> [[WIDE_VEC]], <32 x i32> poison, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
 ; MAX-BW-NEXT:    [[STRIDED_VEC1:%.*]] = shufflevector <32 x i32> [[WIDE_VEC]], <32 x i32> poison, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
 ; MAX-BW-NEXT:    [[TMP34:%.*]] = add <16 x i32> [[STRIDED_VEC]], [[STRIDED_VEC1]]
 ; MAX-BW-NEXT:    [[TMP35:%.*]] = trunc <16 x i32> [[TMP34]] to <16 x i8>
-; MAX-BW-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B:%.*]], i64 0, i64 [[TMP16]]
+; MAX-BW-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B]], i64 0, i64 [[TMP16]]
 ; MAX-BW-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B]], i64 0, i64 [[TMP17]]
 ; MAX-BW-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B]], i64 0, i64 [[TMP18]]
 ; MAX-BW-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B]], i64 0, i64 [[TMP19]]
@@ -671,13 +675,13 @@ define void @test(ptr %A, ptr noalias %B) #0 {
 ; MAX-BW-NEXT:    store i8 [[TMP67]], ptr [[TMP51]], align 1
 ; MAX-BW-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
 ; MAX-BW-NEXT:    [[TMP68:%.*]] = icmp eq i64 [[INDEX_NEXT]], 512
-; MAX-BW-NEXT:    br i1 [[TMP68]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
-; MAX-BW:       middle.block:
-; MAX-BW-NEXT:    br label [[FOR_COND_CLEANUP:%.*]]
-; MAX-BW:       scalar.ph:
-; MAX-BW-NEXT:    br label [[FOR_BODY:%.*]]
-; MAX-BW:       for.body:
-; MAX-BW-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
+; MAX-BW-NEXT:    br i1 [[TMP68]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
+; MAX-BW:       [[MIDDLE_BLOCK]]:
+; MAX-BW-NEXT:    br label %[[FOR_COND_CLEANUP:.*]]
+; MAX-BW:       [[SCALAR_PH]]:
+; MAX-BW-NEXT:    br label %[[FOR_BODY:.*]]
+; MAX-BW:       [[FOR_BODY]]:
+; MAX-BW-NEXT:    [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ]
 ; MAX-BW-NEXT:    [[IV_0:%.*]] = add nuw nsw i64 [[IV]], 0
 ; MAX-BW-NEXT:    [[IV_1:%.*]] = add nuw nsw i64 [[IV]], 1
 ; MAX-BW-NEXT:    [[IN0:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i64 0, i64 [[IV_0]]
@@ -690,8 +694,8 @@ define void @test(ptr %A, ptr noalias %B) #0 {
 ; MAX-BW-NEXT:    store i8 [[REDUCE_ADD_0_NARROW]], ptr [[OUT]], align 1
 ; MAX-BW-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV_0]], 2
 ; MAX-BW-NEXT:    [[CMP:%.*]] = icmp ult i64 [[IV_NEXT]], 1024
-; MAX-BW-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP]], !llvm.loop [[LOOP11:![0-9]+]]
-; MAX-BW:       for.cond.cleanup:
+; MAX-BW-NEXT:    br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP]], !llvm.loop [[LOOP11:![0-9]+]]
+; MAX-BW:       [[FOR_COND_CLEANUP]]:
 ; MAX-BW-NEXT:    ret void
 ;
 entry:
@@ -733,3 +737,28 @@ attributes #0 = { "target-cpu"="core-avx2" "target-features"="+avx,+avx2,+sse,+s
 !2 = !{!"int", !3, i64 0}
 !3 = !{!"omnipotent char", !4, i64 0}
 !4 = !{!"Simple C/C++ TBAA"}
+;.
+; CHECK: [[INT_TBAA1]] = !{[[META2:![0-9]+]], [[META2]], i64 0}
+; CHECK: [[META2]] = !{!"int", [[META3:![0-9]+]], i64 0}
+; CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0}
+; CHECK: [[META4]] = !{!"Simple C/C++ TBAA"}
+; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META6:![0-9]+]], [[META7:![0-9]+]]}
+; CHECK: [[META6]] = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK: [[META7]] = !{!"llvm.loop.unroll.runtime.disable"}
+; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META6]], [[META7]]}
+; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META7]], [[META6]]}
+; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META6]], [[META7]]}
+; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META7]], [[META6]]}
+;.
+; MAX-BW: [[INT_TBAA1]] = !{[[META2:![0-9]+]], [[META2]], i64 0}
+; MAX-BW: [[META2]] = !{!"int", [[META3:![0-9]+]], i64 0}
+; MAX-BW: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0}
+; MAX-BW: [[META4]] = !{!"Simple C/C++ TBAA"}
+; MAX-BW: [[LOOP5]] = distinct !{[[LOOP5]], [[META6:![0-9]+]], [[META7:![0-9]+]]}
+; MAX-BW: [[META6]] = !{!"llvm.loop.isvectorized", i32 1}
+; MAX-BW: [[META7]] = !{!"llvm.loop.unroll.runtime.disable"}
+; MAX-BW: [[LOOP8]] = distinct !{[[LOOP8]], [[META6]], [[META7]]}
+; MAX-BW: [[LOOP9]] = distinct !{[[LOOP9]], [[META7]], [[META6]]}
+; MAX-BW: [[LOOP10]] = distinct !{[[LOOP10]], [[META6]], [[META7]]}
+; MAX-BW: [[LOOP11]] = distinct !{[[LOOP11]], [[META7]], [[META6]]}
+;.
diff --git a/llvm/test/Transforms/LoopVectorize/constantfolder-infer-correct-gepty.ll b/llvm/test/Transforms/LoopVectorize/constantfolder-infer-correct-gepty.ll
index e629560..f86ad8f 100644
--- a/llvm/test/Transforms/LoopVectorize/constantfolder-infer-correct-gepty.ll
+++ b/llvm/test/Transforms/LoopVectorize/constantfolder-infer-correct-gepty.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 6
 ; RUN: opt -passes=loop-vectorize -force-vector-width=8 -S %s | FileCheck %s
 
 @postscale = external constant [64 x float]
@@ -11,11 +11,11 @@ define void @test(ptr %data) {
 ; CHECK:       [[VECTOR_PH]]:
 ; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; CHECK:       [[VECTOR_BODY]]:
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x float>, ptr getelementptr inbounds nuw (i8, ptr @postscale, i64 4), align 4, !tbaa [[TBAA0:![0-9]+]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x float>, ptr getelementptr inbounds nuw (i8, ptr @postscale, i64 4), align 4, !tbaa [[FLOAT_TBAA0:![0-9]+]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i64> @llvm.lrint.v8i64.v8f32(<8 x float> [[WIDE_LOAD]])
 ; CHECK-NEXT:    [[TMP3:%.*]] = trunc <8 x i64> [[TMP2]] to <8 x i16>
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <8 x i16> [[TMP3]], i32 7
-; CHECK-NEXT:    store i16 [[TMP4]], ptr [[DATA]], align 2, !tbaa [[TBAA4:![0-9]+]]
+; CHECK-NEXT:    store i16 [[TMP4]], ptr [[DATA]], align 2, !tbaa [[SHORT_TBAA4:![0-9]+]]
 ; CHECK-NEXT:    br label %[[MIDDLE_BLOCK:.*]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
 ; CHECK-NEXT:    br label %[[END:.*]]
@@ -25,10 +25,10 @@ define void @test(ptr %data) {
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
 ; CHECK-NEXT:    [[OR_IV_1:%.*]] = or disjoint i64 [[IV]], 1
 ; CHECK-NEXT:    [[GEP_POSTSCALE:%.*]] = getelementptr [64 x float], ptr @postscale, i64 0, i64 [[OR_IV_1]]
-; CHECK-NEXT:    [[LOAD_POSTSCALE:%.*]] = load float, ptr [[GEP_POSTSCALE]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    [[LOAD_POSTSCALE:%.*]] = load float, ptr [[GEP_POSTSCALE]], align 4, !tbaa [[FLOAT_TBAA0]]
 ; CHECK-NEXT:    [[LRINT:%.*]] = tail call i64 @llvm.lrint.i64.f32(float [[LOAD_POSTSCALE]])
 ; CHECK-NEXT:    [[LRINT_TRUNC:%.*]] = trunc i64 [[LRINT]] to i16
-; CHECK-NEXT:    store i16 [[LRINT_TRUNC]], ptr [[DATA]], align 2, !tbaa [[TBAA4]]
+; CHECK-NEXT:    store i16 [[LRINT_TRUNC]], ptr [[DATA]], align 2, !tbaa [[SHORT_TBAA4]]
 ; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
 ; CHECK-NEXT:    [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], 8
 ; CHECK-NEXT:    br i1 [[EXIT_COND]], label %[[END]], label %[[LOOP]], !llvm.loop [[LOOP6:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll b/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll
index 8ae404c..4226c5d 100644
--- a/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll
+++ b/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll
@@ -602,5 +602,40 @@ exit:
   ret void
 }
 
+;; ICE was caused by assert for the load used in the uncountable exit condition
+;; being guaranteed to execute.
+@e = external addrspace(21) global [4 x i8]
+define void @crash_conditional_load_for_uncountable_exit() {
+; CHECK-LABEL: LV: Checking a loop in 'crash_conditional_load_for_uncountable_exit'
+; CHECK:       LV: Not vectorizing: Load for uncountable exit not guaranteed to execute.
+entry:
+  br label %cont
+
+handler.out_of_bounds:
+  unreachable
+
+cont:
+  %h.06 = phi i64 [ 0, %entry ], [ %inc, %a.exit ]
+  %arrayidx = getelementptr i8, ptr addrspace(21) @e, i64 %h.06
+  br i1 false, label %cont1, label %handler.type_mismatch
+
+handler.type_mismatch:
+  unreachable
+
+cont1:
+  %0 = load i8, ptr addrspace(21) %arrayidx, align 1
+  store i16 0, ptr null, align 2
+  %cmp.not.i.i = icmp eq i8 %0, 0
+  br i1 %cmp.not.i.i, label %a.exit, label %if.then.i.i
+
+if.then.i.i:
+  unreachable
+
+a.exit:
+  %inc = add i64 %h.06, 1
+  br i1 true, label %handler.out_of_bounds, label %cont
+}
+
+
 declare void @init_mem(ptr, i64);
 declare i64 @get_an_unknown_offset();
diff --git a/llvm/test/Transforms/LoopVectorize/metadata.ll b/llvm/test/Transforms/LoopVectorize/metadata.ll
index 54779ed..e487eac 100644
--- a/llvm/test/Transforms/LoopVectorize/metadata.ll
+++ b/llvm/test/Transforms/LoopVectorize/metadata.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt -S < %s -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=2 | FileCheck %s
 ; RUN: opt -S < %s -passes=loop-vectorize -force-vector-interleave=2 -force-vector-width=2 | FileCheck --check-prefix=INTERLEAVE %s
 
@@ -18,12 +18,12 @@ define void @fp_math(ptr nocapture %a, ptr noalias %b, i64 %size) {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[INDEX]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]]
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP0]], align 4, !tbaa [[TBAA0:![0-9]+]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP0]], align 4, !tbaa [[CHAR_TBAA0:![0-9]+]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = fadd <2 x double> [[WIDE_LOAD]], splat (double 9.900000e+01), !fpmath [[META3:![0-9]+]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = fcmp oge <2 x double> [[TMP3]], splat (double 1.000000e+01)
 ; CHECK-NEXT:    [[TMP6:%.*]] = select <2 x i1> [[TMP4]], <2 x double> [[WIDE_LOAD]], <2 x double> zeroinitializer, !fpmath [[META3]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = fptrunc <2 x double> [[TMP6]] to <2 x float>, !fpmath [[META3]]
-; CHECK-NEXT:    store <2 x float> [[TMP5]], ptr [[TMP1]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    store <2 x float> [[TMP5]], ptr [[TMP1]], align 4, !tbaa [[CHAR_TBAA0]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
@@ -37,12 +37,12 @@ define void @fp_math(ptr nocapture %a, ptr noalias %b, i64 %size) {
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
 ; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[IV]]
 ; CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]]
-; CHECK-NEXT:    [[L_1:%.*]] = load double, ptr [[ARRAYIDX_1]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    [[L_1:%.*]] = load double, ptr [[ARRAYIDX_1]], align 4, !tbaa [[CHAR_TBAA0]]
 ; CHECK-NEXT:    [[ADD:%.*]] = fadd double [[L_1]], 9.900000e+01, !fpmath [[META3]]
 ; CHECK-NEXT:    [[C:%.*]] = fcmp oge double [[ADD]], 1.000000e+01
 ; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[C]], double [[L_1]], double 0.000000e+00, !fpmath [[META3]]
 ; CHECK-NEXT:    [[T:%.*]] = fptrunc double [[SEL]] to float, !fpmath [[META3]]
-; CHECK-NEXT:    store float [[T]], ptr [[ARRAYIDX_2]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    store float [[T]], ptr [[ARRAYIDX_2]], align 4, !tbaa [[CHAR_TBAA0]]
 ; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], [[SIZE]]
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label %[[LOOP]], label %[[EXIT]], !llvm.loop [[LOOP7:![0-9]+]]
@@ -63,8 +63,8 @@ define void @fp_math(ptr nocapture %a, ptr noalias %b, i64 %size) {
 ; INTERLEAVE-NEXT:    [[TMP0:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[INDEX]]
 ; INTERLEAVE-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]]
 ; INTERLEAVE-NEXT:    [[TMP3:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 2
-; INTERLEAVE-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP0]], align 4, !tbaa [[TBAA0:![0-9]+]]
-; INTERLEAVE-NEXT:    [[WIDE_LOAD1:%.*]] = load <2 x double>, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
+; INTERLEAVE-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP0]], align 4, !tbaa [[CHAR_TBAA0:![0-9]+]]
+; INTERLEAVE-NEXT:    [[WIDE_LOAD1:%.*]] = load <2 x double>, ptr [[TMP3]], align 4, !tbaa [[CHAR_TBAA0]]
 ; INTERLEAVE-NEXT:    [[TMP4:%.*]] = fadd <2 x double> [[WIDE_LOAD]], splat (double 9.900000e+01), !fpmath [[META3:![0-9]+]]
 ; INTERLEAVE-NEXT:    [[TMP5:%.*]] = fadd <2 x double> [[WIDE_LOAD1]], splat (double 9.900000e+01), !fpmath [[META3]]
 ; INTERLEAVE-NEXT:    [[TMP6:%.*]] = fcmp oge <2 x double> [[TMP4]], splat (double 1.000000e+01)
@@ -74,8 +74,8 @@ define void @fp_math(ptr nocapture %a, ptr noalias %b, i64 %size) {
 ; INTERLEAVE-NEXT:    [[TMP9:%.*]] = fptrunc <2 x double> [[TMP11]] to <2 x float>, !fpmath [[META3]]
 ; INTERLEAVE-NEXT:    [[TMP10:%.*]] = fptrunc <2 x double> [[TMP8]] to <2 x float>, !fpmath [[META3]]
 ; INTERLEAVE-NEXT:    [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 2
-; INTERLEAVE-NEXT:    store <2 x float> [[TMP9]], ptr [[TMP1]], align 4, !tbaa [[TBAA0]]
-; INTERLEAVE-NEXT:    store <2 x float> [[TMP10]], ptr [[TMP13]], align 4, !tbaa [[TBAA0]]
+; INTERLEAVE-NEXT:    store <2 x float> [[TMP9]], ptr [[TMP1]], align 4, !tbaa [[CHAR_TBAA0]]
+; INTERLEAVE-NEXT:    store <2 x float> [[TMP10]], ptr [[TMP13]], align 4, !tbaa [[CHAR_TBAA0]]
 ; INTERLEAVE-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; INTERLEAVE-NEXT:    [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; INTERLEAVE-NEXT:    br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
@@ -89,12 +89,12 @@ define void @fp_math(ptr nocapture %a, ptr noalias %b, i64 %size) {
 ; INTERLEAVE-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
 ; INTERLEAVE-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[IV]]
 ; INTERLEAVE-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]]
-; INTERLEAVE-NEXT:    [[L_1:%.*]] = load double, ptr [[ARRAYIDX_1]], align 4, !tbaa [[TBAA0]]
+; INTERLEAVE-NEXT:    [[L_1:%.*]] = load double, ptr [[ARRAYIDX_1]], align 4, !tbaa [[CHAR_TBAA0]]
 ; INTERLEAVE-NEXT:    [[ADD:%.*]] = fadd double [[L_1]], 9.900000e+01, !fpmath [[META3]]
 ; INTERLEAVE-NEXT:    [[C:%.*]] = fcmp oge double [[ADD]], 1.000000e+01
 ; INTERLEAVE-NEXT:    [[SEL:%.*]] = select i1 [[C]], double [[L_1]], double 0.000000e+00, !fpmath [[META3]]
 ; INTERLEAVE-NEXT:    [[T:%.*]] = fptrunc double [[SEL]] to float, !fpmath [[META3]]
-; INTERLEAVE-NEXT:    store float [[T]], ptr [[ARRAYIDX_2]], align 4, !tbaa [[TBAA0]]
+; INTERLEAVE-NEXT:    store float [[T]], ptr [[ARRAYIDX_2]], align 4, !tbaa [[CHAR_TBAA0]]
 ; INTERLEAVE-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
 ; INTERLEAVE-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], [[SIZE]]
 ; INTERLEAVE-NEXT:    br i1 [[EXITCOND]], label %[[LOOP]], label %[[EXIT]], !llvm.loop [[LOOP7:![0-9]+]]
@@ -133,7 +133,7 @@ define void @widen_call_range(ptr noalias %a, ptr readonly %b) {
 ; CHECK:       [[VECTOR_BODY]]:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDEX]]
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP0]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP0]], align 4, !tbaa [[CHAR_TBAA0]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = call <2 x i64> @foo_vector_fixed2_nomask(<2 x i64> [[WIDE_LOAD]])
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]]
 ; CHECK-NEXT:    store <2 x i64> [[TMP2]], ptr [[TMP3]], align 4
@@ -147,7 +147,7 @@ define void @widen_call_range(ptr noalias %a, ptr readonly %b) {
 ; CHECK:       [[LOOP]]:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
 ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i64, ptr [[B]], i64 [[IV]]
-; CHECK-NEXT:    [[LOAD:%.*]] = load i64, ptr [[GEP]], align 4, !tbaa [[TBAA0]], !range [[RNG9:![0-9]+]]
+; CHECK-NEXT:    [[LOAD:%.*]] = load i64, ptr [[GEP]], align 4, !tbaa [[CHAR_TBAA0]], !range [[RNG9:![0-9]+]]
 ; CHECK-NEXT:    [[CALL:%.*]] = call i64 @foo(i64 [[LOAD]]) #[[ATTR1:[0-9]+]], !range [[RNG9]]
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
 ; CHECK-NEXT:    store i64 [[CALL]], ptr [[ARRAYIDX]], align 4
@@ -167,8 +167,8 @@ define void @widen_call_range(ptr noalias %a, ptr readonly %b) {
 ; INTERLEAVE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
 ; INTERLEAVE-NEXT:    [[TMP0:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDEX]]
 ; INTERLEAVE-NEXT:    [[TMP2:%.*]] = getelementptr i64, ptr [[TMP0]], i32 2
-; INTERLEAVE-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP0]], align 4, !tbaa [[TBAA0]]
-; INTERLEAVE-NEXT:    [[WIDE_LOAD1:%.*]] = load <2 x i64>, ptr [[TMP2]], align 4, !tbaa [[TBAA0]]
+; INTERLEAVE-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP0]], align 4, !tbaa [[CHAR_TBAA0]]
+; INTERLEAVE-NEXT:    [[WIDE_LOAD1:%.*]] = load <2 x i64>, ptr [[TMP2]], align 4, !tbaa [[CHAR_TBAA0]]
 ; INTERLEAVE-NEXT:    [[TMP3:%.*]] = call <2 x i64> @foo_vector_fixed2_nomask(<2 x i64> [[WIDE_LOAD]])
 ; INTERLEAVE-NEXT:    [[TMP4:%.*]] = call <2 x i64> @foo_vector_fixed2_nomask(<2 x i64> [[WIDE_LOAD1]])
 ; INTERLEAVE-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]]
@@ -185,7 +185,7 @@ define void @widen_call_range(ptr noalias %a, ptr readonly %b) {
 ; INTERLEAVE:       [[LOOP]]:
 ; INTERLEAVE-NEXT:    [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
 ; INTERLEAVE-NEXT:    [[GEP:%.*]] = getelementptr i64, ptr [[B]], i64 [[IV]]
-; INTERLEAVE-NEXT:    [[LOAD:%.*]] = load i64, ptr [[GEP]], align 4, !tbaa [[TBAA0]], !range [[RNG9:![0-9]+]]
+; INTERLEAVE-NEXT:    [[LOAD:%.*]] = load i64, ptr [[GEP]], align 4, !tbaa [[CHAR_TBAA0]], !range [[RNG9:![0-9]+]]
 ; INTERLEAVE-NEXT:    [[CALL:%.*]] = call i64 @foo(i64 [[LOAD]]) #[[ATTR1:[0-9]+]], !range [[RNG9]]
 ; INTERLEAVE-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
 ; INTERLEAVE-NEXT:    store i64 [[CALL]], ptr [[ARRAYIDX]], align 4
@@ -223,7 +223,7 @@ define void @widen_call_fpmath(ptr noalias %a, ptr readonly %b) {
 ; CHECK:       [[VECTOR_BODY]]:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr double, ptr [[B]], i64 [[INDEX]]
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP0]], align 8, !tbaa [[TBAA0]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP0]], align 8, !tbaa [[CHAR_TBAA0]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x double> @bar_vector_fixed2_nomask(<2 x double> [[WIDE_LOAD]]), !fpmath [[META3]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[INDEX]]
 ; CHECK-NEXT:    store <2 x double> [[TMP1]], ptr [[TMP3]], align 8
@@ -237,7 +237,7 @@ define void @widen_call_fpmath(ptr noalias %a, ptr readonly %b) {
 ; CHECK:       [[LOOP]]:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
 ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr double, ptr [[B]], i64 [[IV]]
-; CHECK-NEXT:    [[LOAD:%.*]] = load double, ptr [[GEP]], align 8, !tbaa [[TBAA0]]
+; CHECK-NEXT:    [[LOAD:%.*]] = load double, ptr [[GEP]], align 8, !tbaa [[CHAR_TBAA0]]
 ; CHECK-NEXT:    [[CALL:%.*]] = call double @bar(double [[LOAD]]) #[[ATTR2:[0-9]+]], !fpmath [[META3]]
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[IV]]
 ; CHECK-NEXT:    store double [[CALL]], ptr [[ARRAYIDX]], align 8
@@ -257,8 +257,8 @@ define void @widen_call_fpmath(ptr noalias %a, ptr readonly %b) {
 ; INTERLEAVE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
 ; INTERLEAVE-NEXT:    [[TMP0:%.*]] = getelementptr double, ptr [[B]], i64 [[INDEX]]
 ; INTERLEAVE-NEXT:    [[TMP2:%.*]] = getelementptr double, ptr [[TMP0]], i32 2
-; INTERLEAVE-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP0]], align 8, !tbaa [[TBAA0]]
-; INTERLEAVE-NEXT:    [[WIDE_LOAD1:%.*]] = load <2 x double>, ptr [[TMP2]], align 8, !tbaa [[TBAA0]]
+; INTERLEAVE-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP0]], align 8, !tbaa [[CHAR_TBAA0]]
+; INTERLEAVE-NEXT:    [[WIDE_LOAD1:%.*]] = load <2 x double>, ptr [[TMP2]], align 8, !tbaa [[CHAR_TBAA0]]
 ; INTERLEAVE-NEXT:    [[TMP3:%.*]] = call <2 x double> @bar_vector_fixed2_nomask(<2 x double> [[WIDE_LOAD]]), !fpmath [[META3]]
 ; INTERLEAVE-NEXT:    [[TMP4:%.*]] = call <2 x double> @bar_vector_fixed2_nomask(<2 x double> [[WIDE_LOAD1]]), !fpmath [[META3]]
 ; INTERLEAVE-NEXT:    [[TMP5:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[INDEX]]
@@ -275,7 +275,7 @@ define void @widen_call_fpmath(ptr noalias %a, ptr readonly %b) {
 ; INTERLEAVE:       [[LOOP]]:
 ; INTERLEAVE-NEXT:    [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
 ; INTERLEAVE-NEXT:    [[GEP:%.*]] = getelementptr double, ptr [[B]], i64 [[IV]]
-; INTERLEAVE-NEXT:    [[LOAD:%.*]] = load double, ptr [[GEP]], align 8, !tbaa [[TBAA0]]
+; INTERLEAVE-NEXT:    [[LOAD:%.*]] = load double, ptr [[GEP]], align 8, !tbaa [[CHAR_TBAA0]]
 ; INTERLEAVE-NEXT:    [[CALL:%.*]] = call double @bar(double [[LOAD]]) #[[ATTR2:[0-9]+]], !fpmath [[META3]]
 ; INTERLEAVE-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[IV]]
 ; INTERLEAVE-NEXT:    store double [[CALL]], ptr [[ARRAYIDX]], align 8
@@ -403,7 +403,7 @@ define void @widen_intrinsic_fpmath(ptr noalias %a, ptr readonly %b) {
 ; CHECK:       [[VECTOR_BODY]]:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr double, ptr [[B]], i64 [[INDEX]]
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP0]], align 8, !tbaa [[TBAA0]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP0]], align 8, !tbaa [[CHAR_TBAA0]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x double> @llvm.sin.v2f64(<2 x double> [[WIDE_LOAD]]), !fpmath [[META3]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[INDEX]]
 ; CHECK-NEXT:    store <2 x double> [[TMP1]], ptr [[TMP3]], align 8
@@ -417,7 +417,7 @@ define void @widen_intrinsic_fpmath(ptr noalias %a, ptr readonly %b) {
 ; CHECK:       [[LOOP]]:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
 ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr double, ptr [[B]], i64 [[IV]]
-; CHECK-NEXT:    [[LOAD:%.*]] = load double, ptr [[GEP]], align 8, !tbaa [[TBAA0]]
+; CHECK-NEXT:    [[LOAD:%.*]] = load double, ptr [[GEP]], align 8, !tbaa [[CHAR_TBAA0]]
 ; CHECK-NEXT:    [[CALL:%.*]] = call double @llvm.sin.f64(double [[LOAD]]) #[[ATTR2]], !fpmath [[META3]]
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[IV]]
 ; CHECK-NEXT:    store double [[CALL]], ptr [[ARRAYIDX]], align 8
@@ -437,8 +437,8 @@ define void @widen_intrinsic_fpmath(ptr noalias %a, ptr readonly %b) {
 ; INTERLEAVE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
 ; INTERLEAVE-NEXT:    [[TMP0:%.*]] = getelementptr double, ptr [[B]], i64 [[INDEX]]
 ; INTERLEAVE-NEXT:    [[TMP2:%.*]] = getelementptr double, ptr [[TMP0]], i32 2
-; INTERLEAVE-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP0]], align 8, !tbaa [[TBAA0]]
-; INTERLEAVE-NEXT:    [[WIDE_LOAD1:%.*]] = load <2 x double>, ptr [[TMP2]], align 8, !tbaa [[TBAA0]]
+; INTERLEAVE-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP0]], align 8, !tbaa [[CHAR_TBAA0]]
+; INTERLEAVE-NEXT:    [[WIDE_LOAD1:%.*]] = load <2 x double>, ptr [[TMP2]], align 8, !tbaa [[CHAR_TBAA0]]
 ; INTERLEAVE-NEXT:    [[TMP3:%.*]] = call <2 x double> @llvm.sin.v2f64(<2 x double> [[WIDE_LOAD]]), !fpmath [[META3]]
 ; INTERLEAVE-NEXT:    [[TMP4:%.*]] = call <2 x double> @llvm.sin.v2f64(<2 x double> [[WIDE_LOAD1]]), !fpmath [[META3]]
 ; INTERLEAVE-NEXT:    [[TMP5:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[INDEX]]
@@ -455,7 +455,7 @@ define void @widen_intrinsic_fpmath(ptr noalias %a, ptr readonly %b) {
 ; INTERLEAVE:       [[LOOP]]:
 ; INTERLEAVE-NEXT:    [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
 ; INTERLEAVE-NEXT:    [[GEP:%.*]] = getelementptr double, ptr [[B]], i64 [[IV]]
-; INTERLEAVE-NEXT:    [[LOAD:%.*]] = load double, ptr [[GEP]], align 8, !tbaa [[TBAA0]]
+; INTERLEAVE-NEXT:    [[LOAD:%.*]] = load double, ptr [[GEP]], align 8, !tbaa [[CHAR_TBAA0]]
 ; INTERLEAVE-NEXT:    [[CALL:%.*]] = call double @llvm.sin.f64(double [[LOAD]]) #[[ATTR2]], !fpmath [[META3]]
 ; INTERLEAVE-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[IV]]
 ; INTERLEAVE-NEXT:    store double [[CALL]], ptr [[ARRAYIDX]], align 8
@@ -608,7 +608,7 @@ attributes #1 = { nounwind "vector-function-abi-variant"="_ZGV_LLVM_N2v_bar(bar_
 !3 = !{!"omnipotent char", !2, i64 0}
 
 ;.
-; CHECK: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0, i64 0}
+; CHECK: [[CHAR_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0, i64 0}
 ; CHECK: [[META1]] = !{!"omnipotent char", [[META2]]}
 ; CHECK: [[META2]] = !{!"Simple C/C++ TBAA"}
 ; CHECK: [[META3]] = !{float 2.500000e+00}
@@ -628,7 +628,7 @@ attributes #1 = { nounwind "vector-function-abi-variant"="_ZGV_LLVM_N2v_bar(bar_
 ; CHECK: [[LOOP17]] = distinct !{[[LOOP17]], [[META5]], [[META6]]}
 ; CHECK: [[LOOP18]] = distinct !{[[LOOP18]], [[META6]], [[META5]]}
 ;.
-; INTERLEAVE: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0, i64 0}
+; INTERLEAVE: [[CHAR_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0, i64 0}
 ; INTERLEAVE: [[META1]] = !{!"omnipotent char", [[META2]]}
 ; INTERLEAVE: [[META2]] = !{!"Simple C/C++ TBAA"}
 ; INTERLEAVE: [[META3]] = !{float 2.500000e+00}
diff --git a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll
index d2c53f4..a633dfe 100644
--- a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll
@@ -33,6 +33,10 @@ define void @a(ptr readnone %b) {
 ; CHECK-NEXT:    [[NEXT_GEP2:%.*]] = getelementptr i8, ptr null, i64 [[TMP11]]
 ; CHECK-NEXT:    [[NEXT_GEP3:%.*]] = getelementptr i8, ptr null, i64 [[TMP14]]
 ; CHECK-NEXT:    [[NEXT_GEP4:%.*]] = getelementptr i8, ptr null, i64 [[TMP17]]
+; CHECK-NEXT:    [[TMP21:%.*]] = insertelement <4 x ptr> poison, ptr [[NEXT_GEP]], i32 0
+; CHECK-NEXT:    [[TMP22:%.*]] = insertelement <4 x ptr> [[TMP21]], ptr [[NEXT_GEP2]], i32 1
+; CHECK-NEXT:    [[TMP23:%.*]] = insertelement <4 x ptr> [[TMP22]], ptr [[NEXT_GEP3]], i32 2
+; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <4 x ptr> [[TMP23]], ptr [[NEXT_GEP4]], i32 3
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP]], i64 -1
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 0
 ; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 -3
@@ -649,9 +653,6 @@ define i64 @ivopt_widen_ptr_indvar_3(ptr noalias %a, i64 %stride, i64 %n) {
 ; STRIDED-NEXT:    [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], [[TMP8]]
 ; STRIDED-NEXT:    [[TMP10:%.*]] = mul i64 3, [[TMP1]]
 ; STRIDED-NEXT:    [[TMP11:%.*]] = add i64 [[OFFSET_IDX]], [[TMP10]]
-; STRIDED-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr null, i64 [[TMP5]]
-; STRIDED-NEXT:    [[NEXT_GEP1:%.*]] = getelementptr i8, ptr null, i64 [[TMP7]]
-; STRIDED-NEXT:    [[NEXT_GEP2:%.*]] = getelementptr i8, ptr null, i64 [[TMP9]]
 ; STRIDED-NEXT:    [[NEXT_GEP3:%.*]] = getelementptr i8, ptr null, i64 [[TMP11]]
 ; STRIDED-NEXT:    [[TMP12:%.*]] = getelementptr i64, ptr [[A:%.*]], i64 [[INDEX]]
 ; STRIDED-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP12]], align 8
diff --git a/llvm/test/Transforms/LoopVectorize/predicate-switch.ll b/llvm/test/Transforms/LoopVectorize/predicate-switch.ll
index 97f4542..87447b6 100644
--- a/llvm/test/Transforms/LoopVectorize/predicate-switch.ll
+++ b/llvm/test/Transforms/LoopVectorize/predicate-switch.ll
@@ -22,6 +22,8 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) {
 ; IC1-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 1
 ; IC1-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP1]]
 ; IC1-NEXT:    [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP2]]
+; IC1-NEXT:    [[TMP12:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP]], i32 0
+; IC1-NEXT:    [[TMP16:%.*]] = insertelement <2 x ptr> [[TMP12]], ptr [[NEXT_GEP3]], i32 1
 ; IC1-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i8>, ptr [[NEXT_GEP]], align 1
 ; IC1-NEXT:    [[TMP7:%.*]] = icmp eq <2 x i8> [[WIDE_LOAD]], splat (i8 -12)
 ; IC1-NEXT:    [[TMP4:%.*]] = icmp eq <2 x i8> [[WIDE_LOAD]], splat (i8 13)
@@ -117,8 +119,12 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) {
 ; IC2-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX]], 3
 ; IC2-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP1]]
 ; IC2-NEXT:    [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP2]]
+; IC2-NEXT:    [[TMP23:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP]], i32 0
+; IC2-NEXT:    [[TMP24:%.*]] = insertelement <2 x ptr> [[TMP23]], ptr [[NEXT_GEP3]], i32 1
 ; IC2-NEXT:    [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP3]]
 ; IC2-NEXT:    [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]]
+; IC2-NEXT:    [[TMP30:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP4]], i32 0
+; IC2-NEXT:    [[TMP31:%.*]] = insertelement <2 x ptr> [[TMP30]], ptr [[NEXT_GEP5]], i32 1
 ; IC2-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 2
 ; IC2-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i8>, ptr [[NEXT_GEP]], align 1
 ; IC2-NEXT:    [[WIDE_LOAD6:%.*]] = load <2 x i8>, ptr [[TMP6]], align 1
@@ -338,21 +344,21 @@ define void @switch_to_header(ptr %start) {
 ; IC1-NEXT:  [[ENTRY:.*]]:
 ; IC1-NEXT:    br label %[[LOOP_HEADER:.*]]
 ; IC1:       [[LOOP_HEADER]]:
-; IC1-NEXT:    [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[IF_THEN:.*]] ]
+; IC1-NEXT:    [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[IF_THEN1:.*]] ]
 ; IC1-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
 ; IC1-NEXT:    switch i64 [[IV]], label %[[LOOP_LATCH:.*]] [
-; IC1-NEXT:      i64 120, label %[[IF_THEN]]
+; IC1-NEXT:      i64 120, label %[[IF_THEN1]]
 ; IC1-NEXT:      i64 100, label %[[LOOP_LATCH]]
 ; IC1-NEXT:    ]
-; IC1:       [[IF_THEN]]:
+; IC1:       [[IF_THEN1]]:
 ; IC1-NEXT:    br label %[[LOOP_HEADER]]
-; IC1:       [[IF_THEN1:.*:]]
+; IC1:       [[IF_THEN:.*:]]
 ; IC1-NEXT:    [[GEP:%.*]] = getelementptr inbounds i64, ptr [[START]], i64 poison
 ; IC1-NEXT:    store i64 42, ptr [[GEP]], align 1
 ; IC1-NEXT:    unreachable
 ; IC1:       [[LOOP_LATCH]]:
 ; IC1-NEXT:    [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 100
-; IC1-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[IF_THEN]]
+; IC1-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[IF_THEN1]]
 ; IC1:       [[EXIT]]:
 ; IC1-NEXT:    ret void
 ;
@@ -361,21 +367,21 @@ define void @switch_to_header(ptr %start) {
 ; IC2-NEXT:  [[ENTRY:.*]]:
 ; IC2-NEXT:    br label %[[LOOP_HEADER:.*]]
 ; IC2:       [[LOOP_HEADER]]:
-; IC2-NEXT:    [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[IF_THEN:.*]] ]
+; IC2-NEXT:    [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[IF_THEN1:.*]] ]
 ; IC2-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
 ; IC2-NEXT:    switch i64 [[IV]], label %[[LOOP_LATCH:.*]] [
-; IC2-NEXT:      i64 120, label %[[IF_THEN]]
+; IC2-NEXT:      i64 120, label %[[IF_THEN1]]
 ; IC2-NEXT:      i64 100, label %[[LOOP_LATCH]]
 ; IC2-NEXT:    ]
-; IC2:       [[IF_THEN]]:
+; IC2:       [[IF_THEN1]]:
 ; IC2-NEXT:    br label %[[LOOP_HEADER]]
-; IC2:       [[IF_THEN1:.*:]]
+; IC2:       [[IF_THEN:.*:]]
 ; IC2-NEXT:    [[GEP:%.*]] = getelementptr inbounds i64, ptr [[START]], i64 poison
 ; IC2-NEXT:    store i64 42, ptr [[GEP]], align 1
 ; IC2-NEXT:    unreachable
 ; IC2:       [[LOOP_LATCH]]:
 ; IC2-NEXT:    [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 100
-; IC2-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[IF_THEN]]
+; IC2-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[IF_THEN1]]
 ; IC2:       [[EXIT]]:
 ; IC2-NEXT:    ret void
 ;
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll b/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll
index 3d05ee7..cf85f26 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll
@@ -22,7 +22,11 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) {
 ; CHECK-NEXT: vector.body:
 ; CHECK-NEXT:   EMIT-SCALAR vp<[[CAN_IV:%.+]]> = phi [ ir<0>, vector.ph ], [ vp<[[CAN_IV_NEXT:%.+]]>, default.2 ]
 ; CHECK-NEXT:   vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, ir<2>
-; CHECK-NEXT:   EMIT vp<[[PTR:%.+]]> = ptradd ir<%start>, vp<[[STEPS]]>
+; CHECK-NEXT:   EMIT vp<[[STEP1:%.+]]> = extractelement vp<[[STEPS]]>, ir<0>
+; CHECK-NEXT:   EMIT vp<[[PTR:%.+]]> = ptradd ir<%start>, vp<[[STEP1]]>
+; CHECK-NEXT:   EMIT vp<[[STEP2:%.+]]> = extractelement vp<[[STEPS]]>, ir<1>
+; CHECK-NEXT:   EMIT vp<[[PTR]]>.1 = ptradd ir<%start>, vp<[[STEP2]]>
+; CHECK-NEXT:   EMIT vp<[[PTR_VEC:%.+]]> = buildvector vp<[[PTR]]>, vp<[[PTR]]>.1
 ; CHECK-NEXT:   WIDEN ir<%l> = load vp<[[PTR]]>
 ; CHECK-NEXT:   EMIT vp<[[C1:%.+]]> = icmp eq ir<%l>, ir<-12>
 ; CHECK-NEXT:   EMIT vp<[[C2:%.+]]> = icmp eq ir<%l>, ir<13>
@@ -36,7 +40,7 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) {
 ; CHECK-NEXT:   Successor(s): pred.store.if, pred.store.continue
 ; CHECK-EMPTY:
 ; CHECK-NEXT:   pred.store.if:
-; CHECK-NEXT:     REPLICATE store ir<0>, vp<[[PTR]]>
+; CHECK-NEXT:     REPLICATE store ir<0>, vp<[[PTR_VEC]]>
 ; CHECK-NEXT:   Successor(s): pred.store.continue
 ; CHECK-EMPTY:
 ; CHECK-NEXT:   pred.store.continue:
@@ -53,7 +57,7 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) {
 ; CHECK-NEXT:   Successor(s): pred.store.if, pred.store.continue
 ; CHECK-EMPTY:
 ; CHECK-NEXT:     pred.store.if:
-; CHECK-NEXT:     REPLICATE store ir<42>, vp<[[PTR]]>
+; CHECK-NEXT:     REPLICATE store ir<42>, vp<[[PTR_VEC]]>
 ; CHECK-NEXT:   Successor(s): pred.store.continue
 ; CHECK-EMPTY:
 ; CHECK-NEXT:   pred.store.continue:
@@ -70,7 +74,7 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) {
 ; CHECK-NEXT:   Successor(s): pred.store.if, pred.store.continue
 ; CHECK-EMPTY:
 ; CHECK-NEXT:   pred.store.if:
-; CHECK-NEXT:     REPLICATE store ir<2>, vp<[[PTR]]>
+; CHECK-NEXT:     REPLICATE store ir<2>, vp<[[PTR_VEC]]>
 ; CHECK-NEXT:   Successor(s): pred.store.continue
 ; CHECK-EMPTY:
 ; CHECK-NEXT:   pred.store.continue:
diff --git a/llvm/test/Transforms/LoopVersioning/add-phi-update-users.ll b/llvm/test/Transforms/LoopVersioning/add-phi-update-users.ll
index 16ad4bf..9f77bbf 100644
--- a/llvm/test/Transforms/LoopVersioning/add-phi-update-users.ll
+++ b/llvm/test/Transforms/LoopVersioning/add-phi-update-users.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt < %s -passes=loop-versioning -S -o - | FileCheck %s
 
 ; This test case used to end like this:
@@ -22,48 +22,48 @@
 
 define void @f1() {
 ; CHECK-LABEL: define void @f1() {
-; CHECK-NEXT:  entry:
+; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[T0:%.*]] = load ptr, ptr @c, align 1
 ; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[T0]], i64 2
-; CHECK-NEXT:    br label [[FOR_BODY_LVER_CHECK:%.*]]
-; CHECK:       for.body.lver.check:
+; CHECK-NEXT:    br label %[[FOR_BODY_LVER_CHECK:.*]]
+; CHECK:       [[FOR_BODY_LVER_CHECK]]:
 ; CHECK-NEXT:    [[BOUND0:%.*]] = icmp ult ptr [[T0]], getelementptr inbounds nuw (i8, ptr @b, i64 2)
 ; CHECK-NEXT:    [[BOUND1:%.*]] = icmp ult ptr @b, [[SCEVGEP]]
 ; CHECK-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
-; CHECK-NEXT:    br i1 [[FOUND_CONFLICT]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]]
-; CHECK:       for.body.ph.lver.orig:
-; CHECK-NEXT:    br label [[FOR_BODY_LVER_ORIG:%.*]]
-; CHECK:       for.body.lver.orig:
-; CHECK-NEXT:    [[T1_LVER_ORIG:%.*]] = phi i64 [ 0, [[FOR_BODY_PH_LVER_ORIG]] ], [ [[INC_LVER_ORIG:%.*]], [[FOR_BODY_LVER_ORIG]] ]
-; CHECK-NEXT:    [[T2_LVER_ORIG:%.*]] = load i16, ptr @b, align 1, !tbaa [[TBAA2:![0-9]+]]
-; CHECK-NEXT:    store i16 [[T2_LVER_ORIG]], ptr [[T0]], align 1, !tbaa [[TBAA2]]
+; CHECK-NEXT:    br i1 [[FOUND_CONFLICT]], label %[[FOR_BODY_PH_LVER_ORIG:.*]], label %[[FOR_BODY_PH:.*]]
+; CHECK:       [[FOR_BODY_PH_LVER_ORIG]]:
+; CHECK-NEXT:    br label %[[FOR_BODY_LVER_ORIG:.*]]
+; CHECK:       [[FOR_BODY_LVER_ORIG]]:
+; CHECK-NEXT:    [[T1_LVER_ORIG:%.*]] = phi i64 [ 0, %[[FOR_BODY_PH_LVER_ORIG]] ], [ [[INC_LVER_ORIG:%.*]], %[[FOR_BODY_LVER_ORIG]] ]
+; CHECK-NEXT:    [[T2_LVER_ORIG:%.*]] = load i16, ptr @b, align 1, !tbaa [[LONG_LONG_TBAA2:![0-9]+]]
+; CHECK-NEXT:    store i16 [[T2_LVER_ORIG]], ptr [[T0]], align 1, !tbaa [[LONG_LONG_TBAA2]]
 ; CHECK-NEXT:    [[INC_LVER_ORIG]] = add nuw nsw i64 [[T1_LVER_ORIG]], 1
 ; CHECK-NEXT:    [[CMP_LVER_ORIG:%.*]] = icmp ult i64 [[INC_LVER_ORIG]], 3
-; CHECK-NEXT:    br i1 [[CMP_LVER_ORIG]], label [[FOR_BODY_LVER_ORIG]], label [[FOR_END_LOOPEXIT:%.*]]
-; CHECK:       for.body.ph:
-; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
-; CHECK:       for.body:
-; CHECK-NEXT:    [[T1:%.*]] = phi i64 [ 0, [[FOR_BODY_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[T2:%.*]] = load i16, ptr @b, align 1, !tbaa [[TBAA2]], !alias.scope [[META6:![0-9]+]]
-; CHECK-NEXT:    store i16 [[T2]], ptr [[T0]], align 1, !tbaa [[TBAA2]], !alias.scope [[META9:![0-9]+]], !noalias [[META6]]
+; CHECK-NEXT:    br i1 [[CMP_LVER_ORIG]], label %[[FOR_BODY_LVER_ORIG]], label %[[FOR_END_LOOPEXIT:.*]]
+; CHECK:       [[FOR_BODY_PH]]:
+; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
+; CHECK:       [[FOR_BODY]]:
+; CHECK-NEXT:    [[T1:%.*]] = phi i64 [ 0, %[[FOR_BODY_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT:    [[T2:%.*]] = load i16, ptr @b, align 1, !tbaa [[LONG_LONG_TBAA2]], !alias.scope [[META6:![0-9]+]]
+; CHECK-NEXT:    store i16 [[T2]], ptr [[T0]], align 1, !tbaa [[LONG_LONG_TBAA2]], !alias.scope [[META9:![0-9]+]], !noalias [[META6]]
 ; CHECK-NEXT:    [[INC]] = add nuw nsw i64 [[T1]], 1
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[INC]], 3
-; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT1:%.*]]
-; CHECK:       for.end.loopexit:
-; CHECK-NEXT:    [[T2_LVER_PH:%.*]] = phi i16 [ [[T2_LVER_ORIG]], [[FOR_BODY_LVER_ORIG]] ]
-; CHECK-NEXT:    br label [[FOR_END:%.*]]
-; CHECK:       for.end.loopexit1:
-; CHECK-NEXT:    [[T2_LVER_PH2:%.*]] = phi i16 [ [[T2]], [[FOR_BODY]] ]
-; CHECK-NEXT:    br label [[FOR_END]]
-; CHECK:       for.end:
-; CHECK-NEXT:    [[T2_LVER:%.*]] = phi i16 [ [[T2_LVER_PH]], [[FOR_END_LOOPEXIT]] ], [ [[T2_LVER_PH2]], [[FOR_END_LOOPEXIT1]] ]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_END_LOOPEXIT1:.*]]
+; CHECK:       [[FOR_END_LOOPEXIT]]:
+; CHECK-NEXT:    [[T2_LVER_PH:%.*]] = phi i16 [ [[T2_LVER_ORIG]], %[[FOR_BODY_LVER_ORIG]] ]
+; CHECK-NEXT:    br label %[[FOR_END:.*]]
+; CHECK:       [[FOR_END_LOOPEXIT1]]:
+; CHECK-NEXT:    [[T2_LVER_PH2:%.*]] = phi i16 [ [[T2]], %[[FOR_BODY]] ]
+; CHECK-NEXT:    br label %[[FOR_END]]
+; CHECK:       [[FOR_END]]:
+; CHECK-NEXT:    [[T2_LVER:%.*]] = phi i16 [ [[T2_LVER_PH]], %[[FOR_END_LOOPEXIT]] ], [ [[T2_LVER_PH2]], %[[FOR_END_LOOPEXIT1]] ]
 ; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp eq i16 [[T2_LVER]], 0
-; CHECK-NEXT:    br i1 [[TOBOOL]], label [[FOR_COND_BACKEDGE:%.*]], label [[IF_THEN:%.*]]
-; CHECK:       for.cond.backedge:
-; CHECK-NEXT:    br label [[FOR_BODY_LVER_CHECK]]
-; CHECK:       if.then:
-; CHECK-NEXT:    store i16 [[T2_LVER]], ptr @a, align 1, !tbaa [[TBAA2]]
-; CHECK-NEXT:    br label [[FOR_COND_BACKEDGE]]
+; CHECK-NEXT:    br i1 [[TOBOOL]], label %[[FOR_COND_BACKEDGE:.*]], label %[[IF_THEN:.*]]
+; CHECK:       [[FOR_COND_BACKEDGE]]:
+; CHECK-NEXT:    br label %[[FOR_BODY_LVER_CHECK]]
+; CHECK:       [[IF_THEN]]:
+; CHECK-NEXT:    store i16 [[T2_LVER]], ptr @a, align 1, !tbaa [[LONG_LONG_TBAA2]]
+; CHECK-NEXT:    br label %[[FOR_COND_BACKEDGE]]
 ;
 entry:
   %t0 = load ptr, ptr @c, align 1
@@ -101,3 +101,14 @@ if.then:                                          ; preds = %for.end
 !3 = !{!"long long", !4, i64 0}
 !4 = !{!"omnipotent char", !5, i64 0}
 !5 = !{!"Simple C/C++ TBAA"}
+;.
+; CHECK: [[LONG_LONG_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0}
+; CHECK: [[META3]] = !{!"long long", [[META4:![0-9]+]], i64 0}
+; CHECK: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0}
+; CHECK: [[META5]] = !{!"Simple C/C++ TBAA"}
+; CHECK: [[META6]] = !{[[META7:![0-9]+]]}
+; CHECK: [[META7]] = distinct !{[[META7]], [[META8:![0-9]+]]}
+; CHECK: [[META8]] = distinct !{[[META8]], !"LVerDomain"}
+; CHECK: [[META9]] = !{[[META10:![0-9]+]]}
+; CHECK: [[META10]] = distinct !{[[META10]], [[META8]]}
+;.
diff --git a/llvm/test/Transforms/MergedLoadStoreMotion/preserve-store-metadata.ll b/llvm/test/Transforms/MergedLoadStoreMotion/preserve-store-metadata.ll
index 33e37c9..1dfdf09 100644
--- a/llvm/test/Transforms/MergedLoadStoreMotion/preserve-store-metadata.ll
+++ b/llvm/test/Transforms/MergedLoadStoreMotion/preserve-store-metadata.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt -passes=mldst-motion -S %s | FileCheck %s
 
 target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32"
@@ -10,7 +10,7 @@ define void @perserve_common_metadata(i1 %c, ptr %dst, ptr %min) {
 ; CHECK-NEXT:    [[GEP_DST_16:%.*]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 16
 ; CHECK-NEXT:    br i1 [[C]], label %[[THEN:.*]], label %[[ELSE:.*]]
 ; CHECK:       [[THEN]]:
-; CHECK-NEXT:    store ptr [[DST]], ptr [[MIN]], align 8, !tbaa [[TBAA0:![0-9]+]]
+; CHECK-NEXT:    store ptr [[DST]], ptr [[MIN]], align 8, !tbaa [[INT_TBAA0:![0-9]+]]
 ; CHECK-NEXT:    br label %[[RETURN:.*]]
 ; CHECK:       [[ELSE]]:
 ; CHECK-NEXT:    [[GEP_DST_24:%.*]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 24
@@ -18,7 +18,7 @@ define void @perserve_common_metadata(i1 %c, ptr %dst, ptr %min) {
 ; CHECK-NEXT:    br label %[[RETURN]]
 ; CHECK:       [[RETURN]]:
 ; CHECK-NEXT:    [[DOTSINK:%.*]] = phi ptr [ [[DST]], %[[THEN]] ], [ null, %[[ELSE]] ]
-; CHECK-NEXT:    store ptr [[DOTSINK]], ptr [[GEP_DST_16]], align 8, !tbaa [[TBAA4:![0-9]+]], !alias.scope [[META6:![0-9]+]], !noalias [[META6]], !llvm.access.group [[ACC_GRP9:![0-9]+]]
+; CHECK-NEXT:    store ptr [[DOTSINK]], ptr [[GEP_DST_16]], align 8, !tbaa [[LONG_TBAA4:![0-9]+]], !alias.scope [[META6:![0-9]+]], !noalias [[META6]], !llvm.access.group [[ACC_GRP9:![0-9]+]]
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -47,7 +47,7 @@ define void @clear_different_metadata(i1 %c, ptr %dst, ptr %min) {
 ; CHECK-NEXT:    [[GEP_DST_16:%.*]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 16
 ; CHECK-NEXT:    br i1 [[C]], label %[[THEN:.*]], label %[[ELSE:.*]]
 ; CHECK:       [[THEN]]:
-; CHECK-NEXT:    store ptr [[DST]], ptr [[MIN]], align 8, !tbaa [[TBAA10:![0-9]+]]
+; CHECK-NEXT:    store ptr [[DST]], ptr [[MIN]], align 8, !tbaa [[_FOOPTR_TBAA10:![0-9]+]]
 ; CHECK-NEXT:    br label %[[RETURN:.*]]
 ; CHECK:       [[ELSE]]:
 ; CHECK-NEXT:    [[GEP_DST_24:%.*]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 24
@@ -55,7 +55,7 @@ define void @clear_different_metadata(i1 %c, ptr %dst, ptr %min) {
 ; CHECK-NEXT:    br label %[[RETURN]]
 ; CHECK:       [[RETURN]]:
 ; CHECK-NEXT:    [[DOTSINK:%.*]] = phi ptr [ [[DST]], %[[THEN]] ], [ null, %[[ELSE]] ]
-; CHECK-NEXT:    store ptr [[DOTSINK]], ptr [[GEP_DST_16]], align 8
+; CHECK-NEXT:    store ptr [[DOTSINK]], ptr [[GEP_DST_16]], align 8, !tbaa [[CHAR_TBAA13:![0-9]+]], !alias.scope [[META6]], !noalias [[META6]]
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -93,17 +93,18 @@ return:
 !13 = distinct !{}
 !14 = distinct !{}
 ;.
-; CHECK: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
+; CHECK: [[INT_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
 ; CHECK: [[META1]] = !{!"int", [[META2:![0-9]+]]}
 ; CHECK: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0}
 ; CHECK: [[META3]] = !{!"Simple C++ TBAA"}
-; CHECK: [[TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0, i64 0}
+; CHECK: [[LONG_TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0, i64 0}
 ; CHECK: [[META5]] = !{!"long", [[META2]]}
 ; CHECK: [[META6]] = !{[[META7:![0-9]+]]}
 ; CHECK: [[META7]] = distinct !{[[META7]], [[META8:![0-9]+]]}
 ; CHECK: [[META8]] = distinct !{[[META8]]}
 ; CHECK: [[ACC_GRP9]] = distinct !{}
-; CHECK: [[TBAA10]] = !{[[META11:![0-9]+]], [[META11]], i64 0, i64 0}
+; CHECK: [[_FOOPTR_TBAA10]] = !{[[META11:![0-9]+]], [[META11]], i64 0, i64 0}
 ; CHECK: [[META11]] = !{!"p2 _Foo", [[META12:![0-9]+]]}
 ; CHECK: [[META12]] = !{!"any pointer", [[META2]], i64 0}
+; CHECK: [[CHAR_TBAA13]] = !{[[META2]], [[META2]], i64 0}
 ;.
diff --git a/llvm/test/Transforms/NewGVN/memory-handling.ll b/llvm/test/Transforms/NewGVN/memory-handling.ll
index bf07edf..f83d145 100644
--- a/llvm/test/Transforms/NewGVN/memory-handling.ll
+++ b/llvm/test/Transforms/NewGVN/memory-handling.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ;; This test is really dependent on propagating a lot of memory info around, but in the end, not
 ;; screwing up a single add.
 ; RUN: opt < %s -passes=newgvn -S | FileCheck %s
@@ -26,114 +26,114 @@ define void @BuildMask(ptr nocapture readonly) local_unnamed_addr #0 {
 ; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr align 16 @alPhrase, i8 0, i64 416, i1 false)
 ; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr align 16 @aqMainMask, i8 0, i64 16, i1 false)
 ; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr align 16 @aqMainSign, i8 0, i64 16, i1 false)
-; CHECK-NEXT:    br label [[DOTSINK_SPLIT:%.*]]
-; CHECK:       .sink.split:
-; CHECK-NEXT:    [[DOT0:%.*]] = phi ptr [ [[TMP0]], [[TMP1:%.*]] ], [ [[TMP3:%.*]], [[TMP14:%.*]] ]
-; CHECK-NEXT:    [[DOTSINK:%.*]] = phi i32 [ 0, [[TMP1]] ], [ [[TMP22:%.*]], [[TMP14]] ]
-; CHECK-NEXT:    store i32 [[DOTSINK]], ptr @cchPhraseLength, align 4, !tbaa [[TBAA1:![0-9]+]]
-; CHECK-NEXT:    br label [[TMP2:%.*]]
-; CHECK:       2:
-; CHECK-NEXT:    [[DOT1:%.*]] = phi ptr [ [[DOT0]], [[DOTSINK_SPLIT]] ], [ [[TMP3]], [[TMP6:%.*]] ]
+; CHECK-NEXT:    br label %[[DOTSINK_SPLIT:.*]]
+; CHECK:       [[_SINK_SPLIT:.*:]]
+; CHECK-NEXT:    [[DOT0:%.*]] = phi ptr [ [[TMP0]], [[TMP1:%.*]] ], [ [[TMP3:%.*]], %[[TMP14:.*]] ]
+; CHECK-NEXT:    [[DOTSINK:%.*]] = phi i32 [ 0, [[TMP1]] ], [ [[TMP22:%.*]], %[[TMP14]] ]
+; CHECK-NEXT:    store i32 [[DOTSINK]], ptr @cchPhraseLength, align 4, !tbaa [[INT_TBAA1:![0-9]+]]
+; CHECK-NEXT:    br label %[[BB2:.*]]
+; CHECK:       [[BB2]]:
+; CHECK-NEXT:    [[DOT1:%.*]] = phi ptr [ [[DOT0]], %[[DOTSINK_SPLIT]] ], [ [[TMP3]], %[[TMP6:.*]] ]
 ; CHECK-NEXT:    [[TMP3]] = getelementptr inbounds i8, ptr [[DOT1]], i64 1
-; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr [[DOT1]], align 1, !tbaa [[TBAA5:![0-9]+]]
+; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr [[DOT1]], align 1, !tbaa [[CHAR_TBAA5:![0-9]+]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i8 [[TMP4]], 0
-; CHECK-NEXT:    br i1 [[TMP5]], label [[DOTPREHEADER_PREHEADER:%.*]], label [[TMP6]]
-; CHECK:       .preheader.preheader:
-; CHECK-NEXT:    br label [[DOTPREHEADER:%.*]]
-; CHECK:       6:
+; CHECK-NEXT:    br i1 [[TMP5]], label %[[DOTPREHEADER_PREHEADER:.*]], label %[[TMP6]]
+; CHECK:       [[_PREHEADER_PREHEADER:.*:]]
+; CHECK-NEXT:    br [[DOTPREHEADER:label %.*]]
+; CHECK:       [[TMP6]]:
 ; CHECK-NEXT:    [[TMP7:%.*]] = tail call ptr @__ctype_b_loc() #[[ATTR4:[0-9]+]]
-; CHECK-NEXT:    [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8, !tbaa [[TBAA6:![0-9]+]]
+; CHECK-NEXT:    [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8, !tbaa [[ANYPTR_TBAA6:![0-9]+]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = sext i8 [[TMP4]] to i64
 ; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i16, ptr [[TMP8]], i64 [[TMP9]]
-; CHECK-NEXT:    [[TMP11:%.*]] = load i16, ptr [[TMP10]], align 2, !tbaa [[TBAA8:![0-9]+]]
+; CHECK-NEXT:    [[TMP11:%.*]] = load i16, ptr [[TMP10]], align 2, !tbaa [[SHORT_TBAA8:![0-9]+]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = and i16 [[TMP11]], 1024
 ; CHECK-NEXT:    [[TMP13:%.*]] = icmp eq i16 [[TMP12]], 0
-; CHECK-NEXT:    br i1 [[TMP13]], label [[TMP2]], label [[TMP14]]
-; CHECK:       14:
+; CHECK-NEXT:    br i1 [[TMP13]], label %[[BB2]], label %[[TMP14]]
+; CHECK:       [[TMP14]]:
 ; CHECK-NEXT:    [[TMP15:%.*]] = sext i8 [[TMP4]] to i32
 ; CHECK-NEXT:    [[TMP16:%.*]] = tail call i32 @tolower(i32 [[TMP15]]) #[[ATTR5:[0-9]+]]
 ; CHECK-NEXT:    [[TMP17:%.*]] = add nsw i32 [[TMP16]], -97
 ; CHECK-NEXT:    [[TMP18:%.*]] = sext i32 [[TMP17]] to i64
 ; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [26 x %struct.Letter], ptr @alPhrase, i64 0, i64 [[TMP18]], i32 0
-; CHECK-NEXT:    [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 16, !tbaa [[TBAA10:![0-9]+]]
+; CHECK-NEXT:    [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 16, !tbaa [[INT_TBAA10:![0-9]+]]
 ; CHECK-NEXT:    [[TMP21:%.*]] = add i32 [[TMP20]], 1
-; CHECK-NEXT:    store i32 [[TMP21]], ptr [[TMP19]], align 16, !tbaa [[TBAA10]]
+; CHECK-NEXT:    store i32 [[TMP21]], ptr [[TMP19]], align 16, !tbaa [[INT_TBAA10]]
 ; CHECK-NEXT:    [[TMP22]] = add nsw i32 [[DOTSINK]], 1
-; CHECK-NEXT:    br label [[DOTSINK_SPLIT]]
-; CHECK:       .preheader:
-; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[DOTPREHEADER_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[TMP57:%.*]] ]
-; CHECK-NEXT:    [[DOT04961:%.*]] = phi i32 [ [[DOT2:%.*]], [[TMP57]] ], [ 0, [[DOTPREHEADER_PREHEADER]] ]
-; CHECK-NEXT:    [[DOT05160:%.*]] = phi i32 [ [[DOT253:%.*]], [[TMP57]] ], [ 0, [[DOTPREHEADER_PREHEADER]] ]
+; CHECK-NEXT:    br label %[[DOTSINK_SPLIT]]
+; CHECK:       [[_PREHEADER:.*:]]
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[DOTPREHEADER_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[TMP57:.*]] ]
+; CHECK-NEXT:    [[DOT04961:%.*]] = phi i32 [ [[DOT2:%.*]], %[[TMP57]] ], [ 0, %[[DOTPREHEADER_PREHEADER]] ]
+; CHECK-NEXT:    [[DOT05160:%.*]] = phi i32 [ [[DOT253:%.*]], %[[TMP57]] ], [ 0, %[[DOTPREHEADER_PREHEADER]] ]
 ; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [26 x %struct.Letter], ptr @alPhrase, i64 0, i64 [[INDVARS_IV]], i32 0
-; CHECK-NEXT:    [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 16, !tbaa [[TBAA10]]
+; CHECK-NEXT:    [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 16, !tbaa [[INT_TBAA10]]
 ; CHECK-NEXT:    [[TMP25:%.*]] = icmp eq i32 [[TMP24]], 0
 ; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [26 x i32], ptr @auGlobalFrequency, i64 0, i64 [[INDVARS_IV]]
-; CHECK-NEXT:    br i1 [[TMP25]], label [[TMP27:%.*]], label [[TMP28:%.*]]
-; CHECK:       27:
-; CHECK-NEXT:    store i32 -1, ptr [[TMP26]], align 4, !tbaa [[TBAA1]]
-; CHECK-NEXT:    br label [[TMP57]]
-; CHECK:       28:
-; CHECK-NEXT:    store i32 0, ptr [[TMP26]], align 4, !tbaa [[TBAA1]]
+; CHECK-NEXT:    br i1 [[TMP25]], label %[[TMP27:.*]], label %[[TMP28:.*]]
+; CHECK:       [[TMP27]]:
+; CHECK-NEXT:    store i32 -1, ptr [[TMP26]], align 4, !tbaa [[INT_TBAA1]]
+; CHECK-NEXT:    br label %[[TMP57]]
+; CHECK:       [[TMP28]]:
+; CHECK-NEXT:    store i32 0, ptr [[TMP26]], align 4, !tbaa [[INT_TBAA1]]
 ; CHECK-NEXT:    [[TMP29:%.*]] = zext i32 [[TMP24]] to i64
-; CHECK-NEXT:    br i1 false, label [[DOT_CRIT_EDGE:%.*]], label [[DOTLR_PH_PREHEADER:%.*]]
-; CHECK:       .lr.ph.preheader:
-; CHECK-NEXT:    br label [[DOTLR_PH:%.*]]
-; CHECK:       .lr.ph:
-; CHECK-NEXT:    [[DOT04658:%.*]] = phi i64 [ [[TMP31:%.*]], [[DOTLR_PH]] ], [ 1, [[DOTLR_PH_PREHEADER]] ]
-; CHECK-NEXT:    [[DOT04857:%.*]] = phi i32 [ [[TMP30:%.*]], [[DOTLR_PH]] ], [ 1, [[DOTLR_PH_PREHEADER]] ]
+; CHECK-NEXT:    br i1 false, label %[[DOT_CRIT_EDGE:.*]], label %[[DOTLR_PH_PREHEADER:.*]]
+; CHECK:       [[_LR_PH_PREHEADER:.*:]]
+; CHECK-NEXT:    br label %[[DOTLR_PH:.*]]
+; CHECK:       [[_LR_PH:.*:]]
+; CHECK-NEXT:    [[DOT04658:%.*]] = phi i64 [ [[TMP31:%.*]], %[[DOTLR_PH]] ], [ 1, %[[DOTLR_PH_PREHEADER]] ]
+; CHECK-NEXT:    [[DOT04857:%.*]] = phi i32 [ [[TMP30:%.*]], %[[DOTLR_PH]] ], [ 1, %[[DOTLR_PH_PREHEADER]] ]
 ; CHECK-NEXT:    [[TMP30]] = add nuw nsw i32 [[DOT04857]], 1
 ; CHECK-NEXT:    [[TMP31]] = shl i64 [[DOT04658]], 1
 ; CHECK-NEXT:    [[TMP32:%.*]] = icmp ult i64 [[TMP29]], [[TMP31]]
-; CHECK-NEXT:    br i1 [[TMP32]], label [[DOT_CRIT_EDGE_LOOPEXIT:%.*]], label [[DOTLR_PH]]
-; CHECK:       ._crit_edge.loopexit:
-; CHECK-NEXT:    br label [[DOT_CRIT_EDGE]]
-; CHECK:       ._crit_edge:
-; CHECK-NEXT:    [[DOT048_LCSSA:%.*]] = phi i32 [ poison, [[TMP28]] ], [ [[TMP30]], [[DOT_CRIT_EDGE_LOOPEXIT]] ]
-; CHECK-NEXT:    [[DOT046_LCSSA:%.*]] = phi i64 [ poison, [[TMP28]] ], [ [[TMP31]], [[DOT_CRIT_EDGE_LOOPEXIT]] ]
+; CHECK-NEXT:    br i1 [[TMP32]], label %[[DOT_CRIT_EDGE_LOOPEXIT:.*]], label %[[DOTLR_PH]]
+; CHECK:       [[__CRIT_EDGE_LOOPEXIT:.*:]]
+; CHECK-NEXT:    br label %[[DOT_CRIT_EDGE]]
+; CHECK:       [[__CRIT_EDGE:.*:]]
+; CHECK-NEXT:    [[DOT048_LCSSA:%.*]] = phi i32 [ poison, %[[TMP28]] ], [ [[TMP30]], %[[DOT_CRIT_EDGE_LOOPEXIT]] ]
+; CHECK-NEXT:    [[DOT046_LCSSA:%.*]] = phi i64 [ poison, %[[TMP28]] ], [ [[TMP31]], %[[DOT_CRIT_EDGE_LOOPEXIT]] ]
 ; CHECK-NEXT:    [[TMP33:%.*]] = add nsw i32 [[DOT048_LCSSA]], [[DOT04961]]
 ; CHECK-NEXT:    [[TMP34:%.*]] = icmp ugt i32 [[TMP33]], 64
-; CHECK-NEXT:    br i1 [[TMP34]], label [[TMP35:%.*]], label [[TMP39:%.*]]
-; CHECK:       35:
+; CHECK-NEXT:    br i1 [[TMP34]], label %[[TMP35:.*]], label %[[TMP39:.*]]
+; CHECK:       [[TMP35]]:
 ; CHECK-NEXT:    [[TMP36:%.*]] = add i32 [[DOT05160]], 1
 ; CHECK-NEXT:    [[TMP37:%.*]] = icmp ugt i32 [[TMP36]], 1
-; CHECK-NEXT:    br i1 [[TMP37]], label [[TMP38:%.*]], label [[TMP39]]
-; CHECK:       38:
+; CHECK-NEXT:    br i1 [[TMP37]], label %[[TMP38:.*]], label %[[TMP39]]
+; CHECK:       [[TMP38]]:
 ; CHECK-NEXT:    tail call void @Fatal(ptr @.str.7, i32 0)
-; CHECK-NEXT:    br label [[TMP39]]
-; CHECK:       39:
-; CHECK-NEXT:    [[DOT152:%.*]] = phi i32 [ [[DOT05160]], [[DOT_CRIT_EDGE]] ], [ [[TMP36]], [[TMP38]] ], [ [[TMP36]], [[TMP35]] ]
-; CHECK-NEXT:    [[DOT150:%.*]] = phi i32 [ [[DOT04961]], [[DOT_CRIT_EDGE]] ], [ 0, [[TMP38]] ], [ 0, [[TMP35]] ]
+; CHECK-NEXT:    br label %[[TMP39]]
+; CHECK:       [[TMP39]]:
+; CHECK-NEXT:    [[DOT152:%.*]] = phi i32 [ [[DOT05160]], %[[DOT_CRIT_EDGE]] ], [ [[TMP36]], %[[TMP38]] ], [ [[TMP36]], %[[TMP35]] ]
+; CHECK-NEXT:    [[DOT150:%.*]] = phi i32 [ [[DOT04961]], %[[DOT_CRIT_EDGE]] ], [ 0, %[[TMP38]] ], [ 0, %[[TMP35]] ]
 ; CHECK-NEXT:    [[TMP40:%.*]] = add i64 [[DOT046_LCSSA]], 4294967295
 ; CHECK-NEXT:    [[TMP41:%.*]] = trunc i64 [[TMP40]] to i32
 ; CHECK-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [26 x %struct.Letter], ptr @alPhrase, i64 0, i64 [[INDVARS_IV]], i32 2
-; CHECK-NEXT:    store i32 [[TMP41]], ptr [[TMP42]], align 8, !tbaa [[TBAA12:![0-9]+]]
+; CHECK-NEXT:    store i32 [[TMP41]], ptr [[TMP42]], align 8, !tbaa [[INT_TBAA12:![0-9]+]]
 ; CHECK-NEXT:    [[TMP43:%.*]] = zext i32 [[DOT150]] to i64
 ; CHECK-NEXT:    [[DOT046_:%.*]] = shl i64 [[DOT046_LCSSA]], [[TMP43]]
 ; CHECK-NEXT:    [[TMP44:%.*]] = zext i32 [[DOT152]] to i64
 ; CHECK-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [2 x i64], ptr @aqMainSign, i64 0, i64 [[TMP44]]
-; CHECK-NEXT:    [[TMP46:%.*]] = load i64, ptr [[TMP45]], align 8, !tbaa [[TBAA13:![0-9]+]]
+; CHECK-NEXT:    [[TMP46:%.*]] = load i64, ptr [[TMP45]], align 8, !tbaa [[LONG_TBAA13:![0-9]+]]
 ; CHECK-NEXT:    [[TMP47:%.*]] = or i64 [[TMP46]], [[DOT046_]]
-; CHECK-NEXT:    store i64 [[TMP47]], ptr [[TMP45]], align 8, !tbaa [[TBAA13]]
-; CHECK-NEXT:    [[TMP48:%.*]] = load i32, ptr [[TMP23]], align 16, !tbaa [[TBAA10]]
+; CHECK-NEXT:    store i64 [[TMP47]], ptr [[TMP45]], align 8, !tbaa [[LONG_TBAA13]]
+; CHECK-NEXT:    [[TMP48:%.*]] = load i32, ptr [[TMP23]], align 16, !tbaa [[INT_TBAA10]]
 ; CHECK-NEXT:    [[TMP49:%.*]] = zext i32 [[TMP48]] to i64
 ; CHECK-NEXT:    [[TMP50:%.*]] = shl i64 [[TMP49]], [[TMP43]]
 ; CHECK-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [2 x i64], ptr @aqMainMask, i64 0, i64 [[TMP44]]
-; CHECK-NEXT:    [[TMP52:%.*]] = load i64, ptr [[TMP51]], align 8, !tbaa [[TBAA13]]
+; CHECK-NEXT:    [[TMP52:%.*]] = load i64, ptr [[TMP51]], align 8, !tbaa [[LONG_TBAA13]]
 ; CHECK-NEXT:    [[TMP53:%.*]] = or i64 [[TMP50]], [[TMP52]]
-; CHECK-NEXT:    store i64 [[TMP53]], ptr [[TMP51]], align 8, !tbaa [[TBAA13]]
+; CHECK-NEXT:    store i64 [[TMP53]], ptr [[TMP51]], align 8, !tbaa [[LONG_TBAA13]]
 ; CHECK-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [26 x %struct.Letter], ptr @alPhrase, i64 0, i64 [[INDVARS_IV]], i32 1
-; CHECK-NEXT:    store i32 [[DOT150]], ptr [[TMP54]], align 4, !tbaa [[TBAA15:![0-9]+]]
+; CHECK-NEXT:    store i32 [[DOT150]], ptr [[TMP54]], align 4, !tbaa [[INT_TBAA15:![0-9]+]]
 ; CHECK-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [26 x %struct.Letter], ptr @alPhrase, i64 0, i64 [[INDVARS_IV]], i32 3
-; CHECK-NEXT:    store i32 [[DOT152]], ptr [[TMP55]], align 4, !tbaa [[TBAA16:![0-9]+]]
+; CHECK-NEXT:    store i32 [[DOT152]], ptr [[TMP55]], align 4, !tbaa [[INT_TBAA16:![0-9]+]]
 ; CHECK-NEXT:    [[TMP56:%.*]] = add nsw i32 [[DOT150]], [[DOT048_LCSSA]]
-; CHECK-NEXT:    br label [[TMP57]]
-; CHECK:       57:
-; CHECK-NEXT:    [[DOT253]] = phi i32 [ [[DOT05160]], [[TMP27]] ], [ [[DOT152]], [[TMP39]] ]
-; CHECK-NEXT:    [[DOT2]] = phi i32 [ [[DOT04961]], [[TMP27]] ], [ [[TMP56]], [[TMP39]] ]
+; CHECK-NEXT:    br label %[[TMP57]]
+; CHECK:       [[TMP57]]:
+; CHECK-NEXT:    [[DOT253]] = phi i32 [ [[DOT05160]], %[[TMP27]] ], [ [[DOT152]], %[[TMP39]] ]
+; CHECK-NEXT:    [[DOT2]] = phi i32 [ [[DOT04961]], %[[TMP27]] ], [ [[TMP56]], %[[TMP39]] ]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], 26
-; CHECK-NEXT:    br i1 [[EXITCOND]], label [[DOTPREHEADER]], label [[TMP58:%.*]]
-; CHECK:       58:
+; CHECK-NEXT:    br i1 [[EXITCOND]], [[DOTPREHEADER]], label %[[BB58:.*]]
+; CHECK:       [[BB58]]:
 ; CHECK-NEXT:    ret void
 ;
   tail call void @llvm.memset.p0.i64(ptr align 16 @alPhrase, i8 0, i64 416, i1 false)
@@ -309,20 +309,20 @@ attributes #5 = { nounwind readonly }
 !15 = !{!11, !2, i64 4}
 !16 = !{!11, !2, i64 12}
 ;.
-; CHECK: [[TBAA1]] = !{[[META2:![0-9]+]], [[META2]], i64 0}
+; CHECK: [[INT_TBAA1]] = !{[[META2:![0-9]+]], [[META2]], i64 0}
 ; CHECK: [[META2]] = !{!"int", [[META3:![0-9]+]], i64 0}
 ; CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0}
 ; CHECK: [[META4]] = !{!"Simple C/C++ TBAA"}
-; CHECK: [[TBAA5]] = !{[[META3]], [[META3]], i64 0}
-; CHECK: [[TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0}
+; CHECK: [[CHAR_TBAA5]] = !{[[META3]], [[META3]], i64 0}
+; CHECK: [[ANYPTR_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0}
 ; CHECK: [[META7]] = !{!"any pointer", [[META3]], i64 0}
-; CHECK: [[TBAA8]] = !{[[META9:![0-9]+]], [[META9]], i64 0}
+; CHECK: [[SHORT_TBAA8]] = !{[[META9:![0-9]+]], [[META9]], i64 0}
 ; CHECK: [[META9]] = !{!"short", [[META3]], i64 0}
-; CHECK: [[TBAA10]] = !{[[META11:![0-9]+]], [[META2]], i64 0}
+; CHECK: [[INT_TBAA10]] = !{[[META11:![0-9]+]], [[META2]], i64 0}
 ; CHECK: [[META11]] = !{!"", [[META2]], i64 0, [[META2]], i64 4, [[META2]], i64 8, [[META2]], i64 12}
-; CHECK: [[TBAA12]] = !{[[META11]], [[META2]], i64 8}
-; CHECK: [[TBAA13]] = !{[[META14:![0-9]+]], [[META14]], i64 0}
+; CHECK: [[INT_TBAA12]] = !{[[META11]], [[META2]], i64 8}
+; CHECK: [[LONG_TBAA13]] = !{[[META14:![0-9]+]], [[META14]], i64 0}
 ; CHECK: [[META14]] = !{!"long", [[META3]], i64 0}
-; CHECK: [[TBAA15]] = !{[[META11]], [[META2]], i64 4}
-; CHECK: [[TBAA16]] = !{[[META11]], [[META2]], i64 12}
+; CHECK: [[INT_TBAA15]] = !{[[META11]], [[META2]], i64 4}
+; CHECK: [[INT_TBAA16]] = !{[[META11]], [[META2]], i64 12}
 ;.
diff --git a/llvm/test/Transforms/NewGVN/pr31501.ll b/llvm/test/Transforms/NewGVN/pr31501.ll
index 18bfcd1..353c693 100644
--- a/llvm/test/Transforms/NewGVN/pr31501.ll
+++ b/llvm/test/Transforms/NewGVN/pr31501.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt < %s -passes=newgvn -S | FileCheck %s
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 
@@ -50,32 +50,33 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 
 ; Function Attrs: norecurse nounwind ssp uwtable
 define weak_odr hidden ptr @quux(ptr %arg, ptr %arg1) local_unnamed_addr #0 align 2 {
-; CHECK-LABEL: @quux(
-; CHECK-NEXT:  bb:
-; CHECK-NEXT:    [[TMP:%.*]] = getelementptr inbounds [[STRUCT_BARNEY:%.*]], ptr [[ARG:%.*]], i64 0, i32 3, i32 0, i32 0, i32 0
-; CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8, !tbaa [[TBAA2:![0-9]+]]
+; CHECK-LABEL: define weak_odr hidden ptr @quux(
+; CHECK-SAME: ptr [[ARG:%.*]], ptr [[ARG1:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] align 2 {
+; CHECK-NEXT:  [[BB:.*]]:
+; CHECK-NEXT:    [[TMP:%.*]] = getelementptr inbounds [[STRUCT_BARNEY:%.*]], ptr [[ARG]], i64 0, i32 3, i32 0, i32 0, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8, !tbaa [[ANYPTR_TBAA2:![0-9]+]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_BARNEY]], ptr [[ARG]], i64 0, i32 3, i32 0, i32 0, i32 0, i32 0, i32 1
-; CHECK-NEXT:    [[TMP6:%.*]] = load ptr, ptr [[TMP4]], align 8, !tbaa [[TBAA7:![0-9]+]]
+; CHECK-NEXT:    [[TMP6:%.*]] = load ptr, ptr [[TMP4]], align 8, !tbaa [[ANYPTR_TBAA7:![0-9]+]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq ptr [[TMP3]], [[TMP6]]
-; CHECK-NEXT:    br i1 [[TMP7]], label [[BB21:%.*]], label [[BB8:%.*]]
-; CHECK:       bb8:
-; CHECK-NEXT:    br label [[BB11:%.*]]
-; CHECK:       bb9:
+; CHECK-NEXT:    br i1 [[TMP7]], label %[[BB21:.*]], label %[[BB8:.*]]
+; CHECK:       [[BB8]]:
+; CHECK-NEXT:    br label %[[BB11:.*]]
+; CHECK:       [[BB9:.*]]:
 ; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq ptr [[TMP18:%.*]], [[TMP6]]
-; CHECK-NEXT:    br i1 [[TMP10]], label [[BB19:%.*]], label [[BB11]]
-; CHECK:       bb11:
-; CHECK-NEXT:    [[TMP12:%.*]] = phi ptr [ [[TMP17:%.*]], [[BB9:%.*]] ], [ undef, [[BB8]] ]
-; CHECK-NEXT:    [[TMP13:%.*]] = phi ptr [ [[TMP18]], [[BB9]] ], [ [[TMP3]], [[BB8]] ]
-; CHECK-NEXT:    [[TMP15:%.*]] = load ptr, ptr [[TMP13]], align 8, !tbaa [[TBAA8:![0-9]+]]
-; CHECK-NEXT:    [[TMP16:%.*]] = icmp eq ptr [[TMP15]], [[ARG1:%.*]]
+; CHECK-NEXT:    br i1 [[TMP10]], label %[[BB19:.*]], label %[[BB11]]
+; CHECK:       [[BB11]]:
+; CHECK-NEXT:    [[TMP12:%.*]] = phi ptr [ [[TMP17:%.*]], %[[BB9]] ], [ undef, %[[BB8]] ]
+; CHECK-NEXT:    [[TMP13:%.*]] = phi ptr [ [[TMP18]], %[[BB9]] ], [ [[TMP3]], %[[BB8]] ]
+; CHECK-NEXT:    [[TMP15:%.*]] = load ptr, ptr [[TMP13]], align 8, !tbaa [[ANYPTR_TBAA8:![0-9]+]]
+; CHECK-NEXT:    [[TMP16:%.*]] = icmp eq ptr [[TMP15]], [[ARG1]]
 ; CHECK-NEXT:    [[TMP17]] = select i1 [[TMP16]], ptr [[TMP13]], ptr [[TMP12]]
 ; CHECK-NEXT:    [[TMP18]] = getelementptr inbounds [[STRUCT_FOO:%.*]], ptr [[TMP13]], i64 1
-; CHECK-NEXT:    br i1 [[TMP16]], label [[BB19]], label [[BB9]]
-; CHECK:       bb19:
-; CHECK-NEXT:    [[TMP20:%.*]] = phi ptr [ null, [[BB9]] ], [ [[TMP17]], [[BB11]] ]
-; CHECK-NEXT:    br label [[BB21]]
-; CHECK:       bb21:
-; CHECK-NEXT:    [[TMP22:%.*]] = phi ptr [ null, [[BB:%.*]] ], [ [[TMP20]], [[BB19]] ]
+; CHECK-NEXT:    br i1 [[TMP16]], label %[[BB19]], label %[[BB9]]
+; CHECK:       [[BB19]]:
+; CHECK-NEXT:    [[TMP20:%.*]] = phi ptr [ null, %[[BB9]] ], [ [[TMP17]], %[[BB11]] ]
+; CHECK-NEXT:    br label %[[BB21]]
+; CHECK:       [[BB21]]:
+; CHECK-NEXT:    [[TMP22:%.*]] = phi ptr [ null, %[[BB]] ], [ [[TMP20]], %[[BB19]] ]
 ; CHECK-NEXT:    ret ptr [[TMP22]]
 ;
 bb:
@@ -128,3 +129,15 @@ attributes #0 = { norecurse nounwind ssp uwtable "correctly-rounded-divide-sqrt-
 !9 = !{!"_ZTSN4llvm9RecordValE", !4, i64 0, !10, i64 8, !4, i64 16}
 !10 = !{!"_ZTSN4llvm14PointerIntPairIPNS_5RecTyELj1EbNS_21PointerLikeTypeTraitsIS2_EENS_18PointerIntPairInfoIS2_Lj1ES4_EEEE", !11, i64 0}
 !11 = !{!"long", !5, i64 0}
+;.
+; CHECK: [[ANYPTR_TBAA2]] = !{[[META3:![0-9]+]], [[META4:![0-9]+]], i64 0}
+; CHECK: [[META3]] = !{!"_ZTSN4llvm15SmallVectorBaseE", [[META4]], i64 0, [[META4]], i64 8, [[META4]], i64 16}
+; CHECK: [[META4]] = !{!"any pointer", [[META5:![0-9]+]], i64 0}
+; CHECK: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0}
+; CHECK: [[META6]] = !{!"Simple C++ TBAA"}
+; CHECK: [[ANYPTR_TBAA7]] = !{[[META3]], [[META4]], i64 8}
+; CHECK: [[ANYPTR_TBAA8]] = !{[[META9:![0-9]+]], [[META4]], i64 0}
+; CHECK: [[META9]] = !{!"_ZTSN4llvm9RecordValE", [[META4]], i64 0, [[META10:![0-9]+]], i64 8, [[META4]], i64 16}
+; CHECK: [[META10]] = !{!"_ZTSN4llvm14PointerIntPairIPNS_5RecTyELj1EbNS_21PointerLikeTypeTraitsIS2_EENS_18PointerIntPairInfoIS2_Lj1ES4_EEEE", [[META11:![0-9]+]], i64 0}
+; CHECK: [[META11]] = !{!"long", [[META5]], i64 0}
+;.
diff --git a/llvm/test/Transforms/NewGVN/pr33305.ll b/llvm/test/Transforms/NewGVN/pr33305.ll
index 3a19f61..e742f14 100644
--- a/llvm/test/Transforms/NewGVN/pr33305.ll
+++ b/llvm/test/Transforms/NewGVN/pr33305.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt -passes=newgvn -S %s | FileCheck %s
 ; Ensure we do not incorrect do phi of ops
 source_filename = "/Users/dannyb/sources/llvm-clean/debug-build/pr33305.c"
@@ -17,68 +17,69 @@ target triple = "x86_64-apple-macosx10.12.0"
 
 ; Function Attrs: nounwind optsize ssp uwtable
 define i32 @main() local_unnamed_addr #0 {
-; CHECK-LABEL: @main(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[DOTPR_I:%.*]] = load i32, ptr @c, align 4, !tbaa [[TBAA3:![0-9]+]]
+; CHECK-LABEL: define i32 @main(
+; CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[DOTPR_I:%.*]] = load i32, ptr @c, align 4, !tbaa [[INT_TBAA3:![0-9]+]]
 ; CHECK-NEXT:    [[CMP13_I:%.*]] = icmp slt i32 [[DOTPR_I]], 1
-; CHECK-NEXT:    br i1 [[CMP13_I]], label [[FOR_COND1_PREHEADER_LR_PH_I:%.*]], label [[ENTRY_FOR_END9_I_CRIT_EDGE:%.*]]
-; CHECK:       entry.for.end9.i_crit_edge:
-; CHECK-NEXT:    [[DOTPRE:%.*]] = load i32, ptr @h, align 4, !tbaa [[TBAA3]]
-; CHECK-NEXT:    br label [[FOR_END9_I:%.*]]
-; CHECK:       for.cond1.preheader.lr.ph.i:
-; CHECK-NEXT:    [[G_PROMOTED14_I:%.*]] = load i32, ptr @g, align 4, !tbaa [[TBAA3]]
-; CHECK-NEXT:    br label [[FOR_COND1_PREHEADER_I:%.*]]
-; CHECK:       for.cond1.preheader.i:
-; CHECK-NEXT:    [[INC816_I:%.*]] = phi i32 [ [[DOTPR_I]], [[FOR_COND1_PREHEADER_LR_PH_I]] ], [ [[INC8_I:%.*]], [[FOR_INC7_I:%.*]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = phi i32 [ [[G_PROMOTED14_I]], [[FOR_COND1_PREHEADER_LR_PH_I]] ], [ 0, [[FOR_INC7_I]] ]
-; CHECK-NEXT:    br label [[FOR_BODY3_I:%.*]]
-; CHECK:       for.body3.i:
-; CHECK-NEXT:    [[TMP1:%.*]] = phi i1 [ false, [[FOR_COND1_PREHEADER_I]] ], [ true, [[LOR_END_I:%.*]] ]
-; CHECK-NEXT:    [[INC12_I:%.*]] = phi i32 [ 0, [[FOR_COND1_PREHEADER_I]] ], [ [[INC_I:%.*]], [[LOR_END_I]] ]
-; CHECK-NEXT:    [[TMP2:%.*]] = phi i32 [ [[TMP0]], [[FOR_COND1_PREHEADER_I]] ], [ 0, [[LOR_END_I]] ]
+; CHECK-NEXT:    br i1 [[CMP13_I]], label %[[FOR_COND1_PREHEADER_LR_PH_I:.*]], label %[[ENTRY_FOR_END9_I_CRIT_EDGE:.*]]
+; CHECK:       [[ENTRY_FOR_END9_I_CRIT_EDGE]]:
+; CHECK-NEXT:    [[DOTPRE:%.*]] = load i32, ptr @h, align 4, !tbaa [[INT_TBAA3]]
+; CHECK-NEXT:    br label %[[FOR_END9_I:.*]]
+; CHECK:       [[FOR_COND1_PREHEADER_LR_PH_I]]:
+; CHECK-NEXT:    [[G_PROMOTED14_I:%.*]] = load i32, ptr @g, align 4, !tbaa [[INT_TBAA3]]
+; CHECK-NEXT:    br label %[[FOR_COND1_PREHEADER_I:.*]]
+; CHECK:       [[FOR_COND1_PREHEADER_I]]:
+; CHECK-NEXT:    [[INC816_I:%.*]] = phi i32 [ [[DOTPR_I]], %[[FOR_COND1_PREHEADER_LR_PH_I]] ], [ [[INC8_I:%.*]], %[[FOR_INC7_I:.*]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = phi i32 [ [[G_PROMOTED14_I]], %[[FOR_COND1_PREHEADER_LR_PH_I]] ], [ 0, %[[FOR_INC7_I]] ]
+; CHECK-NEXT:    br label %[[FOR_BODY3_I:.*]]
+; CHECK:       [[FOR_BODY3_I]]:
+; CHECK-NEXT:    [[TMP1:%.*]] = phi i1 [ false, %[[FOR_COND1_PREHEADER_I]] ], [ true, %[[LOR_END_I:.*]] ]
+; CHECK-NEXT:    [[INC12_I:%.*]] = phi i32 [ 0, %[[FOR_COND1_PREHEADER_I]] ], [ [[INC_I:%.*]], %[[LOR_END_I]] ]
+; CHECK-NEXT:    [[TMP2:%.*]] = phi i32 [ [[TMP0]], %[[FOR_COND1_PREHEADER_I]] ], [ 0, %[[LOR_END_I]] ]
 ; CHECK-NEXT:    [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP2]], 0
 ; CHECK-NEXT:    [[OR_COND_I:%.*]] = and i1 [[TMP1]], [[TOBOOL_I]]
-; CHECK-NEXT:    br i1 [[OR_COND_I]], label [[LOR_END_I]], label [[LOR_RHS_I:%.*]]
-; CHECK:       lor.rhs.i:
+; CHECK-NEXT:    br i1 [[OR_COND_I]], label %[[LOR_END_I]], label %[[LOR_RHS_I:.*]]
+; CHECK:       [[LOR_RHS_I]]:
 ; CHECK-NEXT:    [[LNOT_I:%.*]] = xor i1 [[TOBOOL_I]], true
 ; CHECK-NEXT:    [[LNOT_EXT_I:%.*]] = zext i1 [[LNOT_I]] to i32
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr @e, align 4, !tbaa [[TBAA3]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr @e, align 4, !tbaa [[INT_TBAA3]]
 ; CHECK-NEXT:    [[XOR_I:%.*]] = xor i32 [[TMP3]], [[LNOT_EXT_I]]
-; CHECK-NEXT:    store i32 [[XOR_I]], ptr @e, align 4, !tbaa [[TBAA3]]
-; CHECK-NEXT:    br label [[LOR_END_I]]
-; CHECK:       lor.end.i:
+; CHECK-NEXT:    store i32 [[XOR_I]], ptr @e, align 4, !tbaa [[INT_TBAA3]]
+; CHECK-NEXT:    br label %[[LOR_END_I]]
+; CHECK:       [[LOR_END_I]]:
 ; CHECK-NEXT:    [[INC_I]] = add nuw nsw i32 [[INC12_I]], 1
 ; CHECK-NEXT:    [[EXITCOND_I:%.*]] = icmp eq i32 [[INC_I]], 2
-; CHECK-NEXT:    br i1 [[EXITCOND_I]], label [[FOR_INC7_I]], label [[FOR_BODY3_I]]
-; CHECK:       for.inc7.i:
+; CHECK-NEXT:    br i1 [[EXITCOND_I]], label %[[FOR_INC7_I]], label %[[FOR_BODY3_I]]
+; CHECK:       [[FOR_INC7_I]]:
 ; CHECK-NEXT:    [[INC8_I]] = add nsw i32 [[INC816_I]], 1
 ; CHECK-NEXT:    [[CMP_I:%.*]] = icmp slt i32 [[INC816_I]], 0
-; CHECK-NEXT:    br i1 [[CMP_I]], label [[FOR_COND1_PREHEADER_I]], label [[FOR_COND_FOR_END9_CRIT_EDGE_I:%.*]]
-; CHECK:       for.cond.for.end9_crit_edge.i:
-; CHECK-NEXT:    store i32 0, ptr @g, align 4, !tbaa [[TBAA3]]
-; CHECK-NEXT:    store i32 2, ptr @h, align 4, !tbaa [[TBAA3]]
-; CHECK-NEXT:    store i32 [[INC8_I]], ptr @c, align 4, !tbaa [[TBAA3]]
-; CHECK-NEXT:    br label [[FOR_END9_I]]
-; CHECK:       for.end9.i:
-; CHECK-NEXT:    [[TMP4:%.*]] = phi i32 [ [[DOTPRE]], [[ENTRY_FOR_END9_I_CRIT_EDGE]] ], [ 2, [[FOR_COND_FOR_END9_CRIT_EDGE_I]] ]
-; CHECK-NEXT:    [[TMP5:%.*]] = load ptr, ptr @b, align 8, !tbaa [[TBAA7:![0-9]+]]
-; CHECK-NEXT:    store i32 [[TMP4]], ptr [[TMP5]], align 4, !tbaa [[TBAA3]]
-; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr @e, align 4, !tbaa [[TBAA3]]
+; CHECK-NEXT:    br i1 [[CMP_I]], label %[[FOR_COND1_PREHEADER_I]], label %[[FOR_COND_FOR_END9_CRIT_EDGE_I:.*]]
+; CHECK:       [[FOR_COND_FOR_END9_CRIT_EDGE_I]]:
+; CHECK-NEXT:    store i32 0, ptr @g, align 4, !tbaa [[INT_TBAA3]]
+; CHECK-NEXT:    store i32 2, ptr @h, align 4, !tbaa [[INT_TBAA3]]
+; CHECK-NEXT:    store i32 [[INC8_I]], ptr @c, align 4, !tbaa [[INT_TBAA3]]
+; CHECK-NEXT:    br label %[[FOR_END9_I]]
+; CHECK:       [[FOR_END9_I]]:
+; CHECK-NEXT:    [[TMP4:%.*]] = phi i32 [ [[DOTPRE]], %[[ENTRY_FOR_END9_I_CRIT_EDGE]] ], [ 2, %[[FOR_COND_FOR_END9_CRIT_EDGE_I]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = load ptr, ptr @b, align 8, !tbaa [[ANYPTR_TBAA7:![0-9]+]]
+; CHECK-NEXT:    store i32 [[TMP4]], ptr [[TMP5]], align 4, !tbaa [[INT_TBAA3]]
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr @e, align 4, !tbaa [[INT_TBAA3]]
 ; CHECK-NEXT:    [[CMP10_I:%.*]] = icmp slt i32 [[TMP6]], -1
-; CHECK-NEXT:    br i1 [[CMP10_I]], label [[IF_THEN_I:%.*]], label [[FN1_EXIT:%.*]]
-; CHECK:       if.then.i:
-; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr @f, align 4, !tbaa [[TBAA3]]
-; CHECK-NEXT:    store i32 [[TMP7]], ptr [[TMP5]], align 4, !tbaa [[TBAA3]]
-; CHECK-NEXT:    br label [[FN1_EXIT]]
-; CHECK:       fn1.exit:
-; CHECK-NEXT:    [[TMP8:%.*]] = load i32, ptr @a, align 4, !tbaa [[TBAA3]]
+; CHECK-NEXT:    br i1 [[CMP10_I]], label %[[IF_THEN_I:.*]], label %[[FN1_EXIT:.*]]
+; CHECK:       [[IF_THEN_I]]:
+; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr @f, align 4, !tbaa [[INT_TBAA3]]
+; CHECK-NEXT:    store i32 [[TMP7]], ptr [[TMP5]], align 4, !tbaa [[INT_TBAA3]]
+; CHECK-NEXT:    br label %[[FN1_EXIT]]
+; CHECK:       [[FN1_EXIT]]:
+; CHECK-NEXT:    [[TMP8:%.*]] = load i32, ptr @a, align 4, !tbaa [[INT_TBAA3]]
 ; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[TMP8]], 0
-; CHECK-NEXT:    br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
-; CHECK:       if.then:
+; CHECK-NEXT:    br i1 [[TOBOOL]], label %[[IF_END:.*]], label %[[IF_THEN:.*]]
+; CHECK:       [[IF_THEN]]:
 ; CHECK-NEXT:    [[PUTS2:%.*]] = tail call i32 @puts(ptr @str.2)
 ; CHECK-NEXT:    tail call void @abort() #[[ATTR3:[0-9]+]]
 ; CHECK-NEXT:    unreachable
-; CHECK:       if.end:
+; CHECK:       [[IF_END]]:
 ; CHECK-NEXT:    [[PUTS:%.*]] = tail call i32 @puts(ptr @str)
 ; CHECK-NEXT:    ret i32 0
 ;
@@ -183,3 +184,11 @@ attributes #3 = { noreturn nounwind optsize }
 !6 = !{!"Simple C/C++ TBAA"}
 !7 = !{!8, !8, i64 0}
 !8 = !{!"any pointer", !5, i64 0}
+;.
+; CHECK: [[INT_TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0}
+; CHECK: [[META4]] = !{!"int", [[META5:![0-9]+]], i64 0}
+; CHECK: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0}
+; CHECK: [[META6]] = !{!"Simple C/C++ TBAA"}
+; CHECK: [[ANYPTR_TBAA7]] = !{[[META8:![0-9]+]], [[META8]], i64 0}
+; CHECK: [[META8]] = !{!"any pointer", [[META5]], i64 0}
+;.
diff --git a/llvm/test/Transforms/NewGVN/pr33367.ll b/llvm/test/Transforms/NewGVN/pr33367.ll
index 597caa2..428a053 100644
--- a/llvm/test/Transforms/NewGVN/pr33367.ll
+++ b/llvm/test/Transforms/NewGVN/pr33367.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt -aa-pipeline=basic-aa -passes=newgvn -S %s | FileCheck %s
 ; Verify that we don't accidentally delete intrinsics that aren't SSA copies
 %DS_struct = type { [32 x ptr], i8, [32 x i16] }
@@ -7,47 +7,48 @@
 declare i64 @llvm.x86.bmi.bextr.64(i64, i64) #3
 
 define %MNR_struct @f000316011717_2(ptr %pDS, ptr %pCG) #2 {
-; CHECK-LABEL: @f000316011717_2(
-; CHECK-NEXT:  Entry:
+; CHECK-LABEL: define %MNR_struct @f000316011717_2(
+; CHECK-SAME: ptr [[PDS:%.*]], ptr [[PCG:%.*]]) #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[RESTART:%.*]] = alloca [[MNR_STRUCT:%.*]], align 8
-; CHECK-NEXT:    [[PCARRY:%.*]] = getelementptr [[DS_STRUCT:%.*]], ptr [[PDS:%.*]], i32 0, i32 1
-; CHECK-NEXT:    [[BASE:%.*]] = load ptr, ptr [[PDS]], align 8, !tbaa [[TBAA14:![0-9]+]]
+; CHECK-NEXT:    [[PCARRY:%.*]] = getelementptr [[DS_STRUCT:%.*]], ptr [[PDS]], i32 0, i32 1
+; CHECK-NEXT:    [[BASE:%.*]] = load ptr, ptr [[PDS]], align 8, !tbaa [[BREG_TBAA14:![0-9]+]]
 ; CHECK-NEXT:    [[ABSADDR:%.*]] = getelementptr i64, ptr [[BASE]], i64 9
-; CHECK-NEXT:    [[EXTARGET:%.*]] = load i64, ptr [[ABSADDR]], align 8, !tbaa [[TBAA4:![0-9]+]]
+; CHECK-NEXT:    [[EXTARGET:%.*]] = load i64, ptr [[ABSADDR]], align 8, !tbaa [[MEM_TBAA4:![0-9]+]]
 ; CHECK-NEXT:    [[TEMPLATE:%.*]] = icmp eq i64 [[EXTARGET]], 8593987412
-; CHECK-NEXT:    br i1 [[TEMPLATE]], label %"BB3.000316011731#1", label [[BB2_000316011731_5:%.*]]
+; CHECK-NEXT:    br i1 [[TEMPLATE]], label %"BB3.000316011731#1", label %[[BB2_000316011731_5:.*]]
 ; CHECK:       "BB3.000316011731#1":
 ; CHECK-NEXT:    [[PBASE8:%.*]] = getelementptr [32 x ptr], ptr [[PDS]], i64 0, i64 29
-; CHECK-NEXT:    [[BASE9:%.*]] = load ptr, ptr [[PBASE8]], align 8, !tbaa [[TBAA14]]
+; CHECK-NEXT:    [[BASE9:%.*]] = load ptr, ptr [[PBASE8]], align 8, !tbaa [[BREG_TBAA14]]
 ; CHECK-NEXT:    [[ABSADDR1:%.*]] = getelementptr i64, ptr [[BASE9]], i64 7
-; CHECK-NEXT:    [[RMEM:%.*]] = load i64, ptr [[ABSADDR1]], align 8, !tbaa [[TBAA4]]
+; CHECK-NEXT:    [[RMEM:%.*]] = load i64, ptr [[ABSADDR1]], align 8, !tbaa [[MEM_TBAA4]]
 ; CHECK-NEXT:    [[PWT:%.*]] = getelementptr [[DS_STRUCT]], ptr [[PDS]], i32 0, i32 2
 ; CHECK-NEXT:    [[PWTE:%.*]] = getelementptr [32 x i16], ptr [[PWT]], i64 0, i64 8593987412
-; CHECK-NEXT:    [[SHIFTS:%.*]] = load i16, ptr [[PWTE]], align 2, !tbaa [[TBAA18:![0-9]+]], !invariant.load [[META20:![0-9]+]]
+; CHECK-NEXT:    [[SHIFTS:%.*]] = load i16, ptr [[PWTE]], align 2, !tbaa [[CONST_TBAA18:![0-9]+]], !invariant.load [[META20:![0-9]+]]
 ; CHECK-NEXT:    [[SLOWJ:%.*]] = icmp eq i16 [[SHIFTS]], 0
-; CHECK-NEXT:    br i1 [[SLOWJ]], label [[BB2_000316011731_5]], label %"BB3.000316011731#1.1"
-; CHECK:       BB2.000316011731.5:
+; CHECK-NEXT:    br i1 [[SLOWJ]], label %[[BB2_000316011731_5]], label %"BB3.000316011731#1.1"
+; CHECK:       [[BB2_000316011731_5]]:
 ; CHECK-NEXT:    [[EXTARGET1:%.*]] = and i64 [[EXTARGET]], 137438953471
-; CHECK-NEXT:    switch i64 [[EXTARGET1]], label [[EXIT:%.*]] [
+; CHECK-NEXT:    switch i64 [[EXTARGET1]], label %[[EXIT:.*]] [
 ; CHECK-NEXT:    ]
 ; CHECK:       "BB3.000316011731#1.1":
 ; CHECK-NEXT:    [[SHIFTS1:%.*]] = zext i16 [[SHIFTS]] to i64
 ; CHECK-NEXT:    [[VAL:%.*]] = call i64 @llvm.x86.bmi.bextr.64(i64 [[RMEM]], i64 [[SHIFTS1]])
-; CHECK-NEXT:    [[PREG:%.*]] = getelementptr [64 x i64], ptr [[PCG:%.*]], i64 0, i64 12
-; CHECK-NEXT:    store i64 [[VAL]], ptr [[PREG]], align 32, !tbaa [[TBAA10:![0-9]+]]
+; CHECK-NEXT:    [[PREG:%.*]] = getelementptr [64 x i64], ptr [[PCG]], i64 0, i64 12
+; CHECK-NEXT:    store i64 [[VAL]], ptr [[PREG]], align 32, !tbaa [[A0_TBAA10:![0-9]+]]
 ; CHECK-NEXT:    [[PREG2:%.*]] = getelementptr [64 x i64], ptr [[PCG]], i64 0, i64 14
-; CHECK-NEXT:    [[REG:%.*]] = load i64, ptr [[PREG2]], align 16, !tbaa [[TBAA12:![0-9]+]]
-; CHECK-NEXT:    [[BASE2:%.*]] = load ptr, ptr [[PBASE8]], align 8, !tbaa [[TBAA14]]
+; CHECK-NEXT:    [[REG:%.*]] = load i64, ptr [[PREG2]], align 16, !tbaa [[A2_TBAA12:![0-9]+]]
+; CHECK-NEXT:    [[BASE2:%.*]] = load ptr, ptr [[PBASE8]], align 8, !tbaa [[BREG_TBAA14]]
 ; CHECK-NEXT:    [[ABSADDR2:%.*]] = getelementptr i64, ptr [[BASE2]], i64 [[REG]]
-; CHECK-NEXT:    [[RMEM2:%.*]] = load i64, ptr [[ABSADDR2]], align 8, !tbaa [[TBAA1:![0-9]+]]
+; CHECK-NEXT:    [[RMEM2:%.*]] = load i64, ptr [[ABSADDR2]], align 8, !tbaa [[MEM_TBAA4]]
 ; CHECK-NEXT:    [[PREG7:%.*]] = getelementptr [64 x i64], ptr [[PCG]], i64 0, i64 9
-; CHECK-NEXT:    store i64 [[RMEM2]], ptr [[PREG7]], align 8, !tbaa [[TBAA8:![0-9]+]]
+; CHECK-NEXT:    store i64 [[RMEM2]], ptr [[PREG7]], align 8, !tbaa [[X9_TBAA8:![0-9]+]]
 ; CHECK-NEXT:    [[ADD2C279:%.*]] = add i64 [[RMEM2]], [[VAL]]
 ; CHECK-NEXT:    [[CCHK:%.*]] = icmp sge i64 [[ADD2C279]], 0
 ; CHECK-NEXT:    [[CFL:%.*]] = zext i1 [[CCHK]] to i8
-; CHECK-NEXT:    store i8 [[CFL]], ptr [[PCARRY]], align 1, !tbaa [[TBAA16:![0-9]+]]
-; CHECK-NEXT:    br label [[EXIT]]
-; CHECK:       Exit:
+; CHECK-NEXT:    store i8 [[CFL]], ptr [[PCARRY]], align 1, !tbaa [[CARRY_TBAA16:![0-9]+]]
+; CHECK-NEXT:    br label %[[EXIT]]
+; CHECK:       [[EXIT]]:
 ; CHECK-NEXT:    [[RESTART378:%.*]] = load [[MNR_STRUCT]], ptr [[RESTART]], align 8
 ; CHECK-NEXT:    ret [[MNR_STRUCT]] [[RESTART378]]
 ;
@@ -129,3 +130,24 @@ attributes #3 = { nounwind readnone }
 !175 = !{!176, !176, i64 0, i32 1}
 !176 = !{!"const", !3}
 !181 = !{}
+;.
+; CHECK: [[META0:![0-9]+]] = !{!"tbaa2200"}
+; CHECK: [[META2:![0-9]+]] = !{!"data", [[META0]]}
+; CHECK: [[META3:![0-9]+]] = !{!"ctrl", [[META0]]}
+; CHECK: [[MEM_TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0}
+; CHECK: [[META5]] = !{!"mem", [[META2]]}
+; CHECK: [[META7:![0-9]+]] = !{!"grs", [[META2]]}
+; CHECK: [[X9_TBAA8]] = !{[[META9:![0-9]+]], [[META9]], i64 0}
+; CHECK: [[META9]] = !{!"X9", [[META7]]}
+; CHECK: [[A0_TBAA10]] = !{[[META11:![0-9]+]], [[META11]], i64 0}
+; CHECK: [[META11]] = !{!"A0", [[META7]]}
+; CHECK: [[A2_TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0}
+; CHECK: [[META13]] = !{!"A2", [[META7]]}
+; CHECK: [[BREG_TBAA14]] = !{[[META15:![0-9]+]], [[META15]], i64 0}
+; CHECK: [[META15]] = !{!"breg", [[META3]]}
+; CHECK: [[CARRY_TBAA16]] = !{[[META17:![0-9]+]], [[META17]], i64 0}
+; CHECK: [[META17]] = !{!"carry", [[META3]]}
+; CHECK: [[CONST_TBAA18]] = !{[[META19:![0-9]+]], [[META19]], i64 0, i32 1}
+; CHECK: [[META19]] = !{!"const", [[META3]]}
+; CHECK: [[META20]] = !{}
+;.
diff --git a/llvm/test/Transforms/NewGVN/pr34452.ll b/llvm/test/Transforms/NewGVN/pr34452.ll
index 9e65349..48bdd88 100644
--- a/llvm/test/Transforms/NewGVN/pr34452.ll
+++ b/llvm/test/Transforms/NewGVN/pr34452.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt -passes=newgvn -S < %s | FileCheck %s
 ;; Ensure we don't crash when simplifying aggregate value expressions
 source_filename = "bugpoint-output-09f7a24.bc"
@@ -7,17 +7,18 @@ source_filename = "bugpoint-output-09f7a24.bc"
 
 ; Function Attrs: nounwind uwtable
 define void @sgrep() local_unnamed_addr #0 {
-; CHECK-LABEL: @sgrep(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr @WHOLELINE, align 4, !tbaa [[TBAA1:![0-9]+]]
+; CHECK-LABEL: define void @sgrep(
+; CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr @WHOLELINE, align 4, !tbaa [[INT_TBAA1:![0-9]+]]
 ; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[TMP0]], 0
 ; CHECK-NEXT:    [[DOT:%.*]] = select i1 [[TOBOOL]], i32 2048, i32 2047
-; CHECK-NEXT:    br label [[WHILE_BODY_US:%.*]]
-; CHECK:       while.body.us:
-; CHECK-NEXT:    [[START_1230_US:%.*]] = phi i32 [ [[DOT]], [[ENTRY:%.*]] ], [ 0, [[WHILE_BODY_US]] ]
+; CHECK-NEXT:    br label %[[WHILE_BODY_US:.*]]
+; CHECK:       [[WHILE_BODY_US]]:
+; CHECK-NEXT:    [[START_1230_US:%.*]] = phi i32 [ [[DOT]], %[[ENTRY]] ], [ 0, %[[WHILE_BODY_US]] ]
 ; CHECK-NEXT:    [[TMP1:%.*]] = sext i32 [[START_1230_US]] to i64
 ; CHECK-NEXT:    [[TMP2:%.*]] = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 0, i64 [[TMP1]])
-; CHECK-NEXT:    br label [[WHILE_BODY_US]]
+; CHECK-NEXT:    br label %[[WHILE_BODY_US]]
 ;
 entry:
   %0 = load i32, ptr @WHOLELINE, align 4, !tbaa !1
@@ -47,3 +48,9 @@ attributes #1 = { nounwind readnone speculatable }
 !2 = !{!"int", !3, i64 0}
 !3 = !{!"omnipotent char", !4, i64 0}
 !4 = !{!"Simple C/C++ TBAA"}
+;.
+; CHECK: [[INT_TBAA1]] = !{[[META2:![0-9]+]], [[META2]], i64 0}
+; CHECK: [[META2]] = !{!"int", [[META3:![0-9]+]], i64 0}
+; CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0}
+; CHECK: [[META4]] = !{!"Simple C/C++ TBAA"}
+;.
diff --git a/llvm/test/Transforms/NewGVN/preserve-metadata-for-predicate-replacements.ll b/llvm/test/Transforms/NewGVN/preserve-metadata-for-predicate-replacements.ll
index a63ca13..c1e52b8 100644
--- a/llvm/test/Transforms/NewGVN/preserve-metadata-for-predicate-replacements.ll
+++ b/llvm/test/Transforms/NewGVN/preserve-metadata-for-predicate-replacements.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt -passes=newgvn %s -S | FileCheck %s
 
 declare void @use(i32)
@@ -7,25 +7,26 @@ declare void @use(i32)
 ; PredicateInfo are replaced.
 
 define i32 @test(ptr %p1, ptr %p2, i1 %c) {
-; CHECK-LABEL: @test(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[LV:%.*]] = load i32, ptr [[P1:%.*]], align 8, !tbaa [[TBAA0:![0-9]+]]
+; CHECK-LABEL: define i32 @test(
+; CHECK-SAME: ptr [[P1:%.*]], ptr [[P2:%.*]], i1 [[C:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[LV:%.*]] = load i32, ptr [[P1]], align 8, !tbaa [[INT_TBAA0:![0-9]+]]
 ; CHECK-NEXT:    [[CMP_1:%.*]] = icmp slt i32 [[LV]], 1
-; CHECK-NEXT:    br i1 [[CMP_1]], label [[EXIT:%.*]], label [[IF_FALSE:%.*]]
-; CHECK:       if.false:
-; CHECK-NEXT:    br i1 [[C:%.*]], label [[EXIT]], label [[FOR_CHECK:%.*]]
-; CHECK:       for.check:
+; CHECK-NEXT:    br i1 [[CMP_1]], label %[[EXIT:.*]], label %[[IF_FALSE:.*]]
+; CHECK:       [[IF_FALSE]]:
+; CHECK-NEXT:    br i1 [[C]], label %[[EXIT]], label %[[FOR_CHECK:.*]]
+; CHECK:       [[FOR_CHECK]]:
 ; CHECK-NEXT:    [[CMP_2:%.*]] = icmp sgt i32 [[LV]], 0
-; CHECK-NEXT:    br i1 [[CMP_2]], label [[FOR_PH:%.*]], label [[EXIT]]
-; CHECK:       for.ph:
-; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
-; CHECK:       for.body:
-; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, [[FOR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    br i1 [[CMP_2]], label %[[FOR_PH:.*]], label %[[EXIT]]
+; CHECK:       [[FOR_PH]]:
+; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
+; CHECK:       [[FOR_BODY]]:
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, %[[FOR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ]
 ; CHECK-NEXT:    call void @use(i32 [[IV]])
 ; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
 ; CHECK-NEXT:    [[CMP_3:%.*]] = icmp ne i32 [[IV_NEXT]], [[LV]]
-; CHECK-NEXT:    br i1 [[CMP_3]], label [[FOR_BODY]], label [[EXIT]]
-; CHECK:       exit:
+; CHECK-NEXT:    br i1 [[CMP_3]], label %[[FOR_BODY]], label %[[EXIT]]
+; CHECK:       [[EXIT]]:
 ; CHECK-NEXT:    ret i32 [[LV]]
 ;
 entry:
@@ -59,3 +60,10 @@ exit:                                            ; preds = %for.body, %for.check
 !2 = !{!"int", !3, i64 0}
 !3 = !{!"omnipotent char", !4, i64 0}
 !4 = !{!"Simple C/C++ TBAA"}
+;.
+; CHECK: [[INT_TBAA0]] = !{[[META1:![0-9]+]], [[META2:![0-9]+]], i64 0}
+; CHECK: [[META1]] = !{!"FULL", [[META2]], i64 0, [[META2]], i64 4, [[META3:![0-9]+]], i64 8}
+; CHECK: [[META2]] = !{!"int", [[META3]], i64 0}
+; CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0}
+; CHECK: [[META4]] = !{!"Simple C/C++ TBAA"}
+;.
diff --git a/llvm/test/Transforms/NewGVN/tbaa.ll b/llvm/test/Transforms/NewGVN/tbaa.ll
index 20c09aa..a906603 100644
--- a/llvm/test/Transforms/NewGVN/tbaa.ll
+++ b/llvm/test/Transforms/NewGVN/tbaa.ll
@@ -1,10 +1,10 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt -passes=newgvn -S < %s | FileCheck %s
 
 define i32 @test1(ptr %p, ptr %q) {
 ; CHECK-LABEL: define i32 @test1(
 ; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) {
-; CHECK-NEXT:    [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA0:![0-9]+]]
+; CHECK-NEXT:    [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[C_TBAA0:![0-9]+]]
 ; CHECK-NEXT:    [[C:%.*]] = add i32 [[A]], [[A]]
 ; CHECK-NEXT:    ret i32 [[C]]
 ;
@@ -17,7 +17,7 @@ define i32 @test1(ptr %p, ptr %q) {
 define i32 @test2(ptr %p, ptr %q) {
 ; CHECK-LABEL: define i32 @test2(
 ; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) {
-; CHECK-NEXT:    [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA0]]
+; CHECK-NEXT:    [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[C_TBAA0]]
 ; CHECK-NEXT:    [[C:%.*]] = add i32 [[A]], [[A]]
 ; CHECK-NEXT:    ret i32 [[C]]
 ;
@@ -30,7 +30,7 @@ define i32 @test2(ptr %p, ptr %q) {
 define i32 @test3(ptr %p, ptr %q) {
 ; CHECK-LABEL: define i32 @test3(
 ; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) {
-; CHECK-NEXT:    [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA4:![0-9]+]]
+; CHECK-NEXT:    [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[B_TBAA4:![0-9]+]]
 ; CHECK-NEXT:    [[C:%.*]] = add i32 [[A]], [[A]]
 ; CHECK-NEXT:    ret i32 [[C]]
 ;
@@ -43,7 +43,7 @@ define i32 @test3(ptr %p, ptr %q) {
 define i32 @test4(ptr %p, ptr %q) {
 ; CHECK-LABEL: define i32 @test4(
 ; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) {
-; CHECK-NEXT:    [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA6:![0-9]+]]
+; CHECK-NEXT:    [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[A_TBAA6:![0-9]+]]
 ; CHECK-NEXT:    [[C:%.*]] = add i32 [[A]], [[A]]
 ; CHECK-NEXT:    ret i32 [[C]]
 ;
@@ -56,7 +56,7 @@ define i32 @test4(ptr %p, ptr %q) {
 define i32 @test5(ptr %p, ptr %q) {
 ; CHECK-LABEL: define i32 @test5(
 ; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) {
-; CHECK-NEXT:    [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA0]]
+; CHECK-NEXT:    [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[C_TBAA0]]
 ; CHECK-NEXT:    [[C:%.*]] = add i32 [[A]], [[A]]
 ; CHECK-NEXT:    ret i32 [[C]]
 ;
@@ -69,7 +69,7 @@ define i32 @test5(ptr %p, ptr %q) {
 define i32 @test6(ptr %p, ptr %q) {
 ; CHECK-LABEL: define i32 @test6(
 ; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) {
-; CHECK-NEXT:    [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA0]]
+; CHECK-NEXT:    [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[C_TBAA0]]
 ; CHECK-NEXT:    [[C:%.*]] = add i32 [[A]], [[A]]
 ; CHECK-NEXT:    ret i32 [[C]]
 ;
@@ -82,7 +82,7 @@ define i32 @test6(ptr %p, ptr %q) {
 define i32 @test7(ptr %p, ptr %q) {
 ; CHECK-LABEL: define i32 @test7(
 ; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) {
-; CHECK-NEXT:    [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA7:![0-9]+]]
+; CHECK-NEXT:    [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[SCALAR_TYPE_TBAA7:![0-9]+]]
 ; CHECK-NEXT:    [[C:%.*]] = add i32 [[A]], [[A]]
 ; CHECK-NEXT:    ret i32 [[C]]
 ;
@@ -129,7 +129,7 @@ define i32 @test10(ptr %p, ptr %q) {
 ; and not just the common final access type.
 ; CHECK-LABEL: define i32 @test10(
 ; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) {
-; CHECK-NEXT:    [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA10:![0-9]+]]
+; CHECK-NEXT:    [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[INT_TBAA10:![0-9]+]]
 ; CHECK-NEXT:    [[C:%.*]] = add i32 [[A]], [[A]]
 ; CHECK-NEXT:    ret i32 [[C]]
 ;
@@ -165,17 +165,17 @@ declare i32 @foo(ptr) readonly
 !9 = !{!"yet another root"}
 !10 = !{!"node", !9, i64 1}
 ;.
-; CHECK: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
+; CHECK: [[C_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
 ; CHECK: [[META1]] = !{!"C", [[META2:![0-9]+]]}
 ; CHECK: [[META2]] = !{!"A", [[META3:![0-9]+]]}
 ; CHECK: [[META3]] = !{!"tbaa root"}
-; CHECK: [[TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0}
+; CHECK: [[B_TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0}
 ; CHECK: [[META5]] = !{!"B", [[META2]]}
-; CHECK: [[TBAA6]] = !{[[META2]], [[META2]], i64 0}
-; CHECK: [[TBAA7]] = !{[[META8:![0-9]+]], [[META8]], i64 0}
+; CHECK: [[A_TBAA6]] = !{[[META2]], [[META2]], i64 0}
+; CHECK: [[SCALAR_TYPE_TBAA7]] = !{[[META8:![0-9]+]], [[META8]], i64 0}
 ; CHECK: [[META8]] = !{!"scalar type", [[META9:![0-9]+]]}
 ; CHECK: [[META9]] = !{!"another root"}
-; CHECK: [[TBAA10]] = !{[[META11:![0-9]+]], [[META12:![0-9]+]], i64 0}
+; CHECK: [[INT_TBAA10]] = !{[[META11:![0-9]+]], [[META12:![0-9]+]], i64 0}
 ; CHECK: [[META11]] = !{!"struct X", [[META12]], i64 0}
 ; CHECK: [[META12]] = !{!"int", [[META13:![0-9]+]], i64 0}
 ; CHECK: [[META13]] = !{!"char", [[META3]], i64 0}
diff --git a/llvm/test/Transforms/NewGVN/volatile-nonvolatile.ll b/llvm/test/Transforms/NewGVN/volatile-nonvolatile.ll
index d8b28d7..68f7ee5 100644
--- a/llvm/test/Transforms/NewGVN/volatile-nonvolatile.ll
+++ b/llvm/test/Transforms/NewGVN/volatile-nonvolatile.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt -passes=newgvn -S < %s | FileCheck %s
 
 %struct.t = type { ptr }
@@ -8,10 +8,10 @@
 define void @test1(ptr nocapture readonly %p, i32 %v) #0 {
 ; CHECK-LABEL: define void @test1(
 ; CHECK-SAME: ptr readonly captures(none) [[P:%.*]], i32 [[V:%.*]]) #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[P]], align 4, !tbaa [[TBAA0:![0-9]+]]
-; CHECK-NEXT:    store volatile i32 [[V]], ptr [[TMP0]], align 4, !tbaa [[TBAA5:![0-9]+]]
-; CHECK-NEXT:    store volatile i32 [[V]], ptr [[TMP0]], align 4, !tbaa [[TBAA5]]
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[P]], align 4, !tbaa [[ANYPTR_TBAA0:![0-9]+]]
+; CHECK-NEXT:    store volatile i32 [[V]], ptr [[TMP0]], align 4, !tbaa [[INT_TBAA5:![0-9]+]]
+; CHECK-NEXT:    store volatile i32 [[V]], ptr [[TMP0]], align 4, !tbaa [[INT_TBAA5]]
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -27,11 +27,11 @@ entry:
 define void @test2(ptr nocapture readonly %p, i32 %v) #0 {
 ; CHECK-LABEL: define void @test2(
 ; CHECK-SAME: ptr readonly captures(none) [[P:%.*]], i32 [[V:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[P]], align 4, !tbaa [[TBAA0]]
-; CHECK-NEXT:    store volatile i32 [[V]], ptr [[TMP0]], align 4, !tbaa [[TBAA0]]
-; CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[P]], align 4, !tbaa [[TBAA0]]
-; CHECK-NEXT:    store volatile i32 [[V]], ptr [[TMP1]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[P]], align 4, !tbaa [[ANYPTR_TBAA0]]
+; CHECK-NEXT:    store volatile i32 [[V]], ptr [[TMP0]], align 4, !tbaa [[ANYPTR_TBAA0]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[P]], align 4, !tbaa [[ANYPTR_TBAA0]]
+; CHECK-NEXT:    store volatile i32 [[V]], ptr [[TMP1]], align 4, !tbaa [[ANYPTR_TBAA0]]
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -47,11 +47,11 @@ entry:
 define void @test3(ptr nocapture readonly %p, i32 %v) #0 {
 ; CHECK-LABEL: define void @test3(
 ; CHECK-SAME: ptr readonly captures(none) [[P:%.*]], i32 [[V:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load atomic ptr, ptr [[P]] acquire, align 4, !tbaa [[TBAA0]]
-; CHECK-NEXT:    store volatile i32 [[V]], ptr [[TMP0]], align 4, !tbaa [[TBAA5]]
-; CHECK-NEXT:    [[TMP1:%.*]] = load atomic ptr, ptr [[P]] acquire, align 4, !tbaa [[TBAA0]]
-; CHECK-NEXT:    store volatile i32 [[V]], ptr [[TMP1]], align 4, !tbaa [[TBAA5]]
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load atomic ptr, ptr [[P]] acquire, align 4, !tbaa [[ANYPTR_TBAA0]]
+; CHECK-NEXT:    store volatile i32 [[V]], ptr [[TMP0]], align 4, !tbaa [[INT_TBAA5]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load atomic ptr, ptr [[P]] acquire, align 4, !tbaa [[ANYPTR_TBAA0]]
+; CHECK-NEXT:    store volatile i32 [[V]], ptr [[TMP1]], align 4, !tbaa [[INT_TBAA5]]
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -73,11 +73,11 @@ attributes #0 = { norecurse nounwind }
 !7 = !{!"int", !4, i64 0}
 
 ;.
-; CHECK: [[TBAA0]] = !{[[META1:![0-9]+]], [[META2:![0-9]+]], i64 0}
+; CHECK: [[ANYPTR_TBAA0]] = !{[[META1:![0-9]+]], [[META2:![0-9]+]], i64 0}
 ; CHECK: [[META1]] = !{!"", [[META2]], i64 0}
 ; CHECK: [[META2]] = !{!"any pointer", [[META3:![0-9]+]], i64 0}
 ; CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0}
 ; CHECK: [[META4]] = !{!"Simple C/C++ TBAA"}
-; CHECK: [[TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0}
+; CHECK: [[INT_TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0}
 ; CHECK: [[META6]] = !{!"int", [[META3]], i64 0}
 ;.
diff --git a/llvm/test/Transforms/OpenMP/dead_use.ll b/llvm/test/Transforms/OpenMP/dead_use.ll
index b3f5194..1c4b2c6 100644
--- a/llvm/test/Transforms/OpenMP/dead_use.ll
+++ b/llvm/test/Transforms/OpenMP/dead_use.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt -S -passes=openmp-opt-cgscc < %s | FileCheck %s
 %struct.ident_t = type { i32, i32, i32, i32, ptr }
 
@@ -7,8 +7,8 @@
 
 ; Function Attrs: nounwind uwtable
 define dso_local i32 @b() #0 {
-; CHECK-LABEL: define {{[^@]+}}@b
-; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
+; CHECK-LABEL: define dso_local i32 @b(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:    [[TMP1:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @a()
 ; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP1]], align 4
@@ -22,8 +22,8 @@ define dso_local i32 @b() #0 {
 
 ; Function Attrs: nounwind uwtable
 define internal i32 @a() #0 {
-; CHECK-LABEL: define {{[^@]+}}@a
-; CHECK-SAME: () #[[ATTR0]] {
+; CHECK-LABEL: define internal i32 @a(
+; CHECK-SAME: ) #[[ATTR0]] {
 ; CHECK-NEXT:    [[TMP1:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @b()
 ; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB0:[0-9]+]], i32 0, ptr @.omp_outlined.)
@@ -39,12 +39,12 @@ define internal i32 @a() #0 {
 
 ; Function Attrs: norecurse nounwind uwtable
 define internal void @.omp_outlined.(ptr noalias %0, ptr noalias %1) #1 {
-; CHECK-LABEL: define {{[^@]+}}@.omp_outlined.
-; CHECK-SAME: (ptr noalias [[TMP0:%.*]], ptr noalias [[TMP1:%.*]]) #[[ATTR1:[0-9]+]] {
+; CHECK-LABEL: define internal void @.omp_outlined.(
+; CHECK-SAME: ptr noalias [[TMP0:%.*]], ptr noalias [[TMP1:%.*]]) #[[ATTR1:[0-9]+]] {
 ; CHECK-NEXT:    [[TMP3:%.*]] = alloca ptr, align 8
 ; CHECK-NEXT:    [[TMP4:%.*]] = alloca ptr, align 8
-; CHECK-NEXT:    store ptr [[TMP0]], ptr [[TMP3]], align 8, !tbaa [[TBAA2:![0-9]+]]
-; CHECK-NEXT:    store ptr [[TMP1]], ptr [[TMP4]], align 8, !tbaa [[TBAA2]]
+; CHECK-NEXT:    store ptr [[TMP0]], ptr [[TMP3]], align 8, !tbaa [[ANYPTR_TBAA2:![0-9]+]]
+; CHECK-NEXT:    store ptr [[TMP1]], ptr [[TMP4]], align 8, !tbaa [[ANYPTR_TBAA2]]
 ; CHECK-NEXT:    ret void
 ;
   %3 = alloca ptr, align 8
@@ -72,3 +72,9 @@ attributes #2 = { nounwind }
 !5 = !{!"Simple C/C++ TBAA"}
 !6 = !{!7}
 !7 = !{i64 2, i64 -1, i64 -1, i1 true}
+;.
+; CHECK: [[ANYPTR_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0}
+; CHECK: [[META3]] = !{!"any pointer", [[META4:![0-9]+]], i64 0}
+; CHECK: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0}
+; CHECK: [[META5]] = !{!"Simple C/C++ TBAA"}
+;.
diff --git a/llvm/test/Transforms/OpenMP/global_constructor.ll b/llvm/test/Transforms/OpenMP/global_constructor.ll
index 1d18e52..ad3955e 100644
--- a/llvm/test/Transforms/OpenMP/global_constructor.ll
+++ b/llvm/test/Transforms/OpenMP/global_constructor.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --include-generated-funcs
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --include-generated-funcs --version 6
 ; RUN: opt -S -passes=openmp-opt < %s | FileCheck %s
 
 %struct.ident_t = type { i32, i32, i32, i32, ptr }
@@ -74,34 +74,40 @@ attributes #1 = { convergent nounwind }
 !12 = !{!"double", !13, i64 0}
 !13 = !{!"omnipotent char", !14, i64 0}
 !14 = !{!"Simple C++ TBAA"}
-; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_fd02_85283c04_main_l11
-; CHECK-SAME: (ptr [[DYN:%.*]], ptr nonnull align 8 dereferenceable(8) [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT:  entry:
+; CHECK-LABEL: define weak ptx_kernel void @__omp_offloading_fd02_85283c04_main_l11(
+; CHECK-SAME: ptr [[DYN:%.*]], ptr nonnull align 8 dereferenceable(8) [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_85283c04_main_l11_kernel_environment, ptr [[DYN]]) #[[ATTR1:[0-9]+]]
 ; CHECK-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; CHECK-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
-; CHECK:       common.ret:
+; CHECK-NEXT:    br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]]
+; CHECK:       [[COMMON_RET]]:
 ; CHECK-NEXT:    ret void
-; CHECK:       user_code.entry:
-; CHECK-NEXT:    [[TMP1:%.*]] = load double, ptr @_ZL6Device, align 8, !tbaa [[TBAA9:![0-9]+]]
+; CHECK:       [[USER_CODE_ENTRY]]:
+; CHECK-NEXT:    [[TMP1:%.*]] = load double, ptr @_ZL6Device, align 8, !tbaa [[DOUBLE_TBAA9:![0-9]+]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = tail call i32 @__kmpc_get_hardware_thread_id_in_block() #[[ATTR1]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[TMP3]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]]
-; CHECK:       region.guarded:
-; CHECK-NEXT:    store double [[TMP1]], ptr [[X]], align 8, !tbaa [[TBAA9]]
-; CHECK-NEXT:    br label [[REGION_BARRIER]]
-; CHECK:       region.barrier:
+; CHECK-NEXT:    br i1 [[TMP3]], label %[[REGION_GUARDED:.*]], label %[[REGION_BARRIER:.*]]
+; CHECK:       [[REGION_GUARDED]]:
+; CHECK-NEXT:    store double [[TMP1]], ptr [[X]], align 8, !tbaa [[DOUBLE_TBAA9]]
+; CHECK-NEXT:    br label %[[REGION_BARRIER]]
+; CHECK:       [[REGION_BARRIER]]:
 ; CHECK-NEXT:    tail call void @__kmpc_barrier_simple_spmd(ptr nonnull @[[GLOB1:[0-9]+]], i32 [[TMP2]]) #[[ATTR1]]
 ; CHECK-NEXT:    tail call void @__kmpc_target_deinit() #[[ATTR1]]
-; CHECK-NEXT:    br label [[COMMON_RET]]
+; CHECK-NEXT:    br label %[[COMMON_RET]]
 ;
 ;
-; CHECK-LABEL: define {{[^@]+}}@__omp_offloading__fd02_85283c04_Device_l6_ctor
-; CHECK-SAME: () #[[ATTR0]] {
-; CHECK-NEXT:  entry:
+; CHECK-LABEL: define weak ptx_kernel void @__omp_offloading__fd02_85283c04_Device_l6_ctor(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[CALL_I:%.*]] = tail call double @__nv_log(double noundef 2.000000e+00) #[[ATTR2:[0-9]+]]
 ; CHECK-NEXT:    [[CALL_I2:%.*]] = tail call double @__nv_log(double noundef 2.000000e+00) #[[ATTR2]]
 ; CHECK-NEXT:    [[DIV:%.*]] = fdiv double [[CALL_I]], [[CALL_I2]]
-; CHECK-NEXT:    store double [[DIV]], ptr @_ZL6Device, align 8, !tbaa [[TBAA9]]
+; CHECK-NEXT:    store double [[DIV]], ptr @_ZL6Device, align 8, !tbaa [[DOUBLE_TBAA9]]
 ; CHECK-NEXT:    ret void
 ;
+;.
+; CHECK: [[DOUBLE_TBAA9]] = !{[[META10:![0-9]+]], [[META10]], i64 0}
+; CHECK: [[META10]] = !{!"double", [[META11:![0-9]+]], i64 0}
+; CHECK: [[META11]] = !{!"omnipotent char", [[META12:![0-9]+]], i64 0}
+; CHECK: [[META12]] = !{!"Simple C++ TBAA"}
+;.
diff --git a/llvm/test/Transforms/OpenMP/spmdization.ll b/llvm/test/Transforms/OpenMP/spmdization.ll
index 0272c41..19d4474 100644
--- a/llvm/test/Transforms/OpenMP/spmdization.ll
+++ b/llvm/test/Transforms/OpenMP/spmdization.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 6
 ; RUN: opt --mtriple=amdgcn-amd-amdhsa --data-layout=A5 -S -passes=openmp-opt < %s | FileCheck %s --check-prefixes=AMDGPU
 ; RUN: opt --mtriple=nvptx64-- -S -passes=openmp-opt < %s | FileCheck %s --check-prefixes=NVPTX
 ; RUN: opt --mtriple=amdgcn-amd-amdhsa --data-layout=A5 -S -passes=openmp-opt -openmp-opt-disable-spmdization < %s | FileCheck %s --check-prefix=AMDGPU-DISABLED1
@@ -186,33 +186,33 @@
 ; NVPTX-DISABLED2: @x_shared1 = internal addrspace(3) global [4 x i8] poison, align 4
 ;.
 define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_l5() #0 {
-; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5
-; AMDGPU-SAME: () #[[ATTR0:[0-9]+]] {
+; AMDGPU-LABEL: define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_l5(
+; AMDGPU-SAME: ) #[[ATTR0:[0-9]+]] {
 ; AMDGPU-NEXT:    call void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
 ; AMDGPU-NEXT:    ret void
 ;
-; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5
-; NVPTX-SAME: () #[[ATTR0:[0-9]+]] {
+; NVPTX-LABEL: define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_l5(
+; NVPTX-SAME: ) #[[ATTR0:[0-9]+]] {
 ; NVPTX-NEXT:    call void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
 ; NVPTX-NEXT:    ret void
 ;
-; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5
-; AMDGPU-DISABLED1-SAME: () #[[ATTR0:[0-9]+]] {
+; AMDGPU-DISABLED1-LABEL: define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_l5(
+; AMDGPU-DISABLED1-SAME: ) #[[ATTR0:[0-9]+]] {
 ; AMDGPU-DISABLED1-NEXT:    call void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
 ; AMDGPU-DISABLED1-NEXT:    ret void
 ;
-; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5
-; AMDGPU-DISABLED2-SAME: () #[[ATTR0:[0-9]+]] {
+; AMDGPU-DISABLED2-LABEL: define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_l5(
+; AMDGPU-DISABLED2-SAME: ) #[[ATTR0:[0-9]+]] {
 ; AMDGPU-DISABLED2-NEXT:    call void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
 ; AMDGPU-DISABLED2-NEXT:    ret void
 ;
-; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5
-; NVPTX-DISABLED1-SAME: () #[[ATTR0:[0-9]+]] {
+; NVPTX-DISABLED1-LABEL: define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_l5(
+; NVPTX-DISABLED1-SAME: ) #[[ATTR0:[0-9]+]] {
 ; NVPTX-DISABLED1-NEXT:    call void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
 ; NVPTX-DISABLED1-NEXT:    ret void
 ;
-; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5
-; NVPTX-DISABLED2-SAME: () #[[ATTR0:[0-9]+]] {
+; NVPTX-DISABLED2-LABEL: define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_l5(
+; NVPTX-DISABLED2-SAME: ) #[[ATTR0:[0-9]+]] {
 ; NVPTX-DISABLED2-NEXT:    call void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
 ; NVPTX-DISABLED2-NEXT:    ret void
 ;
@@ -221,47 +221,47 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_l5()
 }
 
 define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug() {
-; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5__debug
-; AMDGPU-SAME: () #[[ATTR1:[0-9]+]] {
-; AMDGPU-NEXT:  entry:
+; AMDGPU-LABEL: define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug(
+; AMDGPU-SAME: ) #[[ATTR1:[0-9]+]] {
+; AMDGPU-NEXT:  [[ENTRY:.*:]]
 ; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-NEXT:    [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
 ; AMDGPU-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-NEXT:    [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
 ; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment, ptr null)
 ; AMDGPU-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; AMDGPU-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
-; AMDGPU:       common.ret:
+; AMDGPU-NEXT:    br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]]
+; AMDGPU:       [[COMMON_RET]]:
 ; AMDGPU-NEXT:    ret void
-; AMDGPU:       user_code.entry:
+; AMDGPU:       [[USER_CODE_ENTRY]]:
 ; AMDGPU-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]]
-; AMDGPU-NEXT:    store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]]
+; AMDGPU-NEXT:    store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[INT_TBAA12:![0-9]+]]
 ; AMDGPU-NEXT:    call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
 ; AMDGPU-NEXT:    call void @__kmpc_target_deinit()
-; AMDGPU-NEXT:    br label [[COMMON_RET]]
+; AMDGPU-NEXT:    br label %[[COMMON_RET]]
 ;
-; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5__debug
-; NVPTX-SAME: () #[[ATTR1:[0-9]+]] {
-; NVPTX-NEXT:  entry:
+; NVPTX-LABEL: define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug(
+; NVPTX-SAME: ) #[[ATTR1:[0-9]+]] {
+; NVPTX-NEXT:  [[ENTRY:.*:]]
 ; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
 ; NVPTX-NEXT:    [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
 ; NVPTX-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
 ; NVPTX-NEXT:    [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
 ; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment, ptr null)
 ; NVPTX-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; NVPTX-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
-; NVPTX:       common.ret:
+; NVPTX-NEXT:    br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]]
+; NVPTX:       [[COMMON_RET]]:
 ; NVPTX-NEXT:    ret void
-; NVPTX:       user_code.entry:
+; NVPTX:       [[USER_CODE_ENTRY]]:
 ; NVPTX-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]]
-; NVPTX-NEXT:    store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]]
+; NVPTX-NEXT:    store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[INT_TBAA12:![0-9]+]]
 ; NVPTX-NEXT:    call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
 ; NVPTX-NEXT:    call void @__kmpc_target_deinit()
-; NVPTX-NEXT:    br label [[COMMON_RET]]
+; NVPTX-NEXT:    br label %[[COMMON_RET]]
 ;
-; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5__debug
-; AMDGPU-DISABLED1-SAME: () #[[ATTR1:[0-9]+]] {
-; AMDGPU-DISABLED1-NEXT:  entry:
+; AMDGPU-DISABLED1-LABEL: define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug(
+; AMDGPU-DISABLED1-SAME: ) #[[ATTR1:[0-9]+]] {
+; AMDGPU-DISABLED1-NEXT:  [[ENTRY:.*:]]
 ; AMDGPU-DISABLED1-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-DISABLED1-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-DISABLED1-NEXT:    [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
@@ -269,71 +269,71 @@ define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
 ; AMDGPU-DISABLED1-NEXT:    [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
 ; AMDGPU-DISABLED1-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment, ptr null)
 ; AMDGPU-DISABLED1-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
-; AMDGPU-DISABLED1-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
-; AMDGPU-DISABLED1:       is_worker_check:
+; AMDGPU-DISABLED1-NEXT:    br i1 [[THREAD_IS_WORKER]], label %[[IS_WORKER_CHECK:.*]], label %[[THREAD_USER_CODE_CHECK:.*]]
+; AMDGPU-DISABLED1:       [[IS_WORKER_CHECK]]:
 ; AMDGPU-DISABLED1-NEXT:    [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
 ; AMDGPU-DISABLED1-NEXT:    [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
 ; AMDGPU-DISABLED1-NEXT:    [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
 ; AMDGPU-DISABLED1-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
-; AMDGPU-DISABLED1-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
-; AMDGPU-DISABLED1:       worker_state_machine.begin:
+; AMDGPU-DISABLED1-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label %[[WORKER_STATE_MACHINE_BEGIN:.*]], label %[[WORKER_STATE_MACHINE_FINISHED:.*]]
+; AMDGPU-DISABLED1:       [[WORKER_STATE_MACHINE_BEGIN]]:
 ; AMDGPU-DISABLED1-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
 ; AMDGPU-DISABLED1-NEXT:    [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
 ; AMDGPU-DISABLED1-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
 ; AMDGPU-DISABLED1-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
 ; AMDGPU-DISABLED1-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
-; AMDGPU-DISABLED1-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
-; AMDGPU-DISABLED1:       worker_state_machine.finished:
+; AMDGPU-DISABLED1-NEXT:    br i1 [[WORKER_IS_DONE]], label %[[WORKER_STATE_MACHINE_FINISHED]], label %[[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:.*]]
+; AMDGPU-DISABLED1:       [[WORKER_STATE_MACHINE_FINISHED]]:
 ; AMDGPU-DISABLED1-NEXT:    ret void
-; AMDGPU-DISABLED1:       worker_state_machine.is_active.check:
-; AMDGPU-DISABLED1-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
-; AMDGPU-DISABLED1:       worker_state_machine.parallel_region.check:
-; AMDGPU-DISABLED1-NEXT:    br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
-; AMDGPU-DISABLED1:       worker_state_machine.parallel_region.execute:
+; AMDGPU-DISABLED1:       [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK]]:
+; AMDGPU-DISABLED1-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:.*]], label %[[WORKER_STATE_MACHINE_DONE_BARRIER:.*]]
+; AMDGPU-DISABLED1:       [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK]]:
+; AMDGPU-DISABLED1-NEXT:    br i1 true, label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:.*]], label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:.*]]
+; AMDGPU-DISABLED1:       [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE]]:
 ; AMDGPU-DISABLED1-NEXT:    call void @__omp_outlined__1_wrapper(i16 0, i32 [[TMP0]])
-; AMDGPU-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
-; AMDGPU-DISABLED1:       worker_state_machine.parallel_region.check1:
-; AMDGPU-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
-; AMDGPU-DISABLED1:       worker_state_machine.parallel_region.end:
+; AMDGPU-DISABLED1-NEXT:    br label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_END:.*]]
+; AMDGPU-DISABLED1:       [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1]]:
+; AMDGPU-DISABLED1-NEXT:    br label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
+; AMDGPU-DISABLED1:       [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]:
 ; AMDGPU-DISABLED1-NEXT:    call void @__kmpc_kernel_end_parallel()
-; AMDGPU-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
-; AMDGPU-DISABLED1:       worker_state_machine.done.barrier:
+; AMDGPU-DISABLED1-NEXT:    br label %[[WORKER_STATE_MACHINE_DONE_BARRIER]]
+; AMDGPU-DISABLED1:       [[WORKER_STATE_MACHINE_DONE_BARRIER]]:
 ; AMDGPU-DISABLED1-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; AMDGPU-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
-; AMDGPU-DISABLED1:       thread.user_code.check:
+; AMDGPU-DISABLED1-NEXT:    br label %[[WORKER_STATE_MACHINE_BEGIN]]
+; AMDGPU-DISABLED1:       [[THREAD_USER_CODE_CHECK]]:
 ; AMDGPU-DISABLED1-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; AMDGPU-DISABLED1-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
-; AMDGPU-DISABLED1:       common.ret:
+; AMDGPU-DISABLED1-NEXT:    br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]]
+; AMDGPU-DISABLED1:       [[COMMON_RET]]:
 ; AMDGPU-DISABLED1-NEXT:    ret void
-; AMDGPU-DISABLED1:       user_code.entry:
+; AMDGPU-DISABLED1:       [[USER_CODE_ENTRY]]:
 ; AMDGPU-DISABLED1-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]]
-; AMDGPU-DISABLED1-NEXT:    store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]]
+; AMDGPU-DISABLED1-NEXT:    store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[INT_TBAA12:![0-9]+]]
 ; AMDGPU-DISABLED1-NEXT:    call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
 ; AMDGPU-DISABLED1-NEXT:    call void @__kmpc_target_deinit()
-; AMDGPU-DISABLED1-NEXT:    br label [[COMMON_RET]]
+; AMDGPU-DISABLED1-NEXT:    br label %[[COMMON_RET]]
 ;
-; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5__debug
-; AMDGPU-DISABLED2-SAME: () #[[ATTR1:[0-9]+]] {
-; AMDGPU-DISABLED2-NEXT:  entry:
+; AMDGPU-DISABLED2-LABEL: define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug(
+; AMDGPU-DISABLED2-SAME: ) #[[ATTR1:[0-9]+]] {
+; AMDGPU-DISABLED2-NEXT:  [[ENTRY:.*:]]
 ; AMDGPU-DISABLED2-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-DISABLED2-NEXT:    [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
 ; AMDGPU-DISABLED2-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-DISABLED2-NEXT:    [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
 ; AMDGPU-DISABLED2-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment, ptr null)
 ; AMDGPU-DISABLED2-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; AMDGPU-DISABLED2-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
-; AMDGPU-DISABLED2:       common.ret:
+; AMDGPU-DISABLED2-NEXT:    br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]]
+; AMDGPU-DISABLED2:       [[COMMON_RET]]:
 ; AMDGPU-DISABLED2-NEXT:    ret void
-; AMDGPU-DISABLED2:       user_code.entry:
+; AMDGPU-DISABLED2:       [[USER_CODE_ENTRY]]:
 ; AMDGPU-DISABLED2-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]]
-; AMDGPU-DISABLED2-NEXT:    store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]]
+; AMDGPU-DISABLED2-NEXT:    store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[INT_TBAA12:![0-9]+]]
 ; AMDGPU-DISABLED2-NEXT:    call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
 ; AMDGPU-DISABLED2-NEXT:    call void @__kmpc_target_deinit()
-; AMDGPU-DISABLED2-NEXT:    br label [[COMMON_RET]]
+; AMDGPU-DISABLED2-NEXT:    br label %[[COMMON_RET]]
 ;
-; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5__debug
-; NVPTX-DISABLED1-SAME: () #[[ATTR1:[0-9]+]] {
-; NVPTX-DISABLED1-NEXT:  entry:
+; NVPTX-DISABLED1-LABEL: define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug(
+; NVPTX-DISABLED1-SAME: ) #[[ATTR1:[0-9]+]] {
+; NVPTX-DISABLED1-NEXT:  [[ENTRY:.*:]]
 ; NVPTX-DISABLED1-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
 ; NVPTX-DISABLED1-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
 ; NVPTX-DISABLED1-NEXT:    [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
@@ -341,66 +341,66 @@ define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
 ; NVPTX-DISABLED1-NEXT:    [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
 ; NVPTX-DISABLED1-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment, ptr null)
 ; NVPTX-DISABLED1-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
-; NVPTX-DISABLED1-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
-; NVPTX-DISABLED1:       is_worker_check:
+; NVPTX-DISABLED1-NEXT:    br i1 [[THREAD_IS_WORKER]], label %[[IS_WORKER_CHECK:.*]], label %[[THREAD_USER_CODE_CHECK:.*]]
+; NVPTX-DISABLED1:       [[IS_WORKER_CHECK]]:
 ; NVPTX-DISABLED1-NEXT:    [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
 ; NVPTX-DISABLED1-NEXT:    [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
 ; NVPTX-DISABLED1-NEXT:    [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
 ; NVPTX-DISABLED1-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
-; NVPTX-DISABLED1-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
-; NVPTX-DISABLED1:       worker_state_machine.begin:
+; NVPTX-DISABLED1-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label %[[WORKER_STATE_MACHINE_BEGIN:.*]], label %[[WORKER_STATE_MACHINE_FINISHED:.*]]
+; NVPTX-DISABLED1:       [[WORKER_STATE_MACHINE_BEGIN]]:
 ; NVPTX-DISABLED1-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
 ; NVPTX-DISABLED1-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
 ; NVPTX-DISABLED1-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
 ; NVPTX-DISABLED1-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
-; NVPTX-DISABLED1-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
-; NVPTX-DISABLED1:       worker_state_machine.finished:
+; NVPTX-DISABLED1-NEXT:    br i1 [[WORKER_IS_DONE]], label %[[WORKER_STATE_MACHINE_FINISHED]], label %[[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:.*]]
+; NVPTX-DISABLED1:       [[WORKER_STATE_MACHINE_FINISHED]]:
 ; NVPTX-DISABLED1-NEXT:    ret void
-; NVPTX-DISABLED1:       worker_state_machine.is_active.check:
-; NVPTX-DISABLED1-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
-; NVPTX-DISABLED1:       worker_state_machine.parallel_region.check:
-; NVPTX-DISABLED1-NEXT:    br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
-; NVPTX-DISABLED1:       worker_state_machine.parallel_region.execute:
+; NVPTX-DISABLED1:       [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK]]:
+; NVPTX-DISABLED1-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:.*]], label %[[WORKER_STATE_MACHINE_DONE_BARRIER:.*]]
+; NVPTX-DISABLED1:       [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK]]:
+; NVPTX-DISABLED1-NEXT:    br i1 true, label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:.*]], label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:.*]]
+; NVPTX-DISABLED1:       [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE]]:
 ; NVPTX-DISABLED1-NEXT:    call void @__omp_outlined__1_wrapper(i16 0, i32 [[TMP0]])
-; NVPTX-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
-; NVPTX-DISABLED1:       worker_state_machine.parallel_region.check1:
-; NVPTX-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
-; NVPTX-DISABLED1:       worker_state_machine.parallel_region.end:
+; NVPTX-DISABLED1-NEXT:    br label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_END:.*]]
+; NVPTX-DISABLED1:       [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1]]:
+; NVPTX-DISABLED1-NEXT:    br label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
+; NVPTX-DISABLED1:       [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]:
 ; NVPTX-DISABLED1-NEXT:    call void @__kmpc_kernel_end_parallel()
-; NVPTX-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
-; NVPTX-DISABLED1:       worker_state_machine.done.barrier:
+; NVPTX-DISABLED1-NEXT:    br label %[[WORKER_STATE_MACHINE_DONE_BARRIER]]
+; NVPTX-DISABLED1:       [[WORKER_STATE_MACHINE_DONE_BARRIER]]:
 ; NVPTX-DISABLED1-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; NVPTX-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
-; NVPTX-DISABLED1:       thread.user_code.check:
+; NVPTX-DISABLED1-NEXT:    br label %[[WORKER_STATE_MACHINE_BEGIN]]
+; NVPTX-DISABLED1:       [[THREAD_USER_CODE_CHECK]]:
 ; NVPTX-DISABLED1-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; NVPTX-DISABLED1-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
-; NVPTX-DISABLED1:       common.ret:
+; NVPTX-DISABLED1-NEXT:    br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]]
+; NVPTX-DISABLED1:       [[COMMON_RET]]:
 ; NVPTX-DISABLED1-NEXT:    ret void
-; NVPTX-DISABLED1:       user_code.entry:
+; NVPTX-DISABLED1:       [[USER_CODE_ENTRY]]:
 ; NVPTX-DISABLED1-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]]
-; NVPTX-DISABLED1-NEXT:    store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]]
+; NVPTX-DISABLED1-NEXT:    store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[INT_TBAA12:![0-9]+]]
 ; NVPTX-DISABLED1-NEXT:    call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
 ; NVPTX-DISABLED1-NEXT:    call void @__kmpc_target_deinit()
-; NVPTX-DISABLED1-NEXT:    br label [[COMMON_RET]]
+; NVPTX-DISABLED1-NEXT:    br label %[[COMMON_RET]]
 ;
-; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5__debug
-; NVPTX-DISABLED2-SAME: () #[[ATTR1:[0-9]+]] {
-; NVPTX-DISABLED2-NEXT:  entry:
+; NVPTX-DISABLED2-LABEL: define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug(
+; NVPTX-DISABLED2-SAME: ) #[[ATTR1:[0-9]+]] {
+; NVPTX-DISABLED2-NEXT:  [[ENTRY:.*:]]
 ; NVPTX-DISABLED2-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
 ; NVPTX-DISABLED2-NEXT:    [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
 ; NVPTX-DISABLED2-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
 ; NVPTX-DISABLED2-NEXT:    [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
 ; NVPTX-DISABLED2-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment, ptr null)
 ; NVPTX-DISABLED2-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; NVPTX-DISABLED2-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
-; NVPTX-DISABLED2:       common.ret:
+; NVPTX-DISABLED2-NEXT:    br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]]
+; NVPTX-DISABLED2:       [[COMMON_RET]]:
 ; NVPTX-DISABLED2-NEXT:    ret void
-; NVPTX-DISABLED2:       user_code.entry:
+; NVPTX-DISABLED2:       [[USER_CODE_ENTRY]]:
 ; NVPTX-DISABLED2-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]]
-; NVPTX-DISABLED2-NEXT:    store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]]
+; NVPTX-DISABLED2-NEXT:    store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[INT_TBAA12:![0-9]+]]
 ; NVPTX-DISABLED2-NEXT:    call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
 ; NVPTX-DISABLED2-NEXT:    call void @__kmpc_target_deinit()
-; NVPTX-DISABLED2-NEXT:    br label [[COMMON_RET]]
+; NVPTX-DISABLED2-NEXT:    br label %[[COMMON_RET]]
 ;
 entry:
   %.zero.addr = alloca ptr, align 8, addrspace(5)
@@ -424,125 +424,125 @@ user_code.entry:                                  ; preds = %entry
 }
 
 define internal void @__omp_outlined__(ptr noalias %.global_tid., ptr noalias %.bound_tid.) {
-; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__
-; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
-; AMDGPU-NEXT:  entry:
+; AMDGPU-LABEL: define internal void @__omp_outlined__(
+; AMDGPU-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
+; AMDGPU-NEXT:  [[ENTRY:.*]]:
 ; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
-; AMDGPU-NEXT:    br label [[FOR_COND:%.*]]
-; AMDGPU:       for.cond:
-; AMDGPU-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
+; AMDGPU-NEXT:    br label %[[FOR_COND:.*]]
+; AMDGPU:       [[FOR_COND]]:
+; AMDGPU-NEXT:    [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY:.*]] ]
 ; AMDGPU-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
-; AMDGPU-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
-; AMDGPU:       for.cond.cleanup:
+; AMDGPU-NEXT:    br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP:.*]]
+; AMDGPU:       [[FOR_COND_CLEANUP]]:
 ; AMDGPU-NEXT:    call void @spmd_amenable() #[[ATTR7:[0-9]+]]
 ; AMDGPU-NEXT:    ret void
-; AMDGPU:       for.body:
+; AMDGPU:       [[FOR_BODY]]:
 ; AMDGPU-NEXT:    [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
-; AMDGPU-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[INT_TBAA12]]
 ; AMDGPU-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
 ; AMDGPU-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
-; AMDGPU-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
+; AMDGPU-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
 ;
-; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__
-; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
-; NVPTX-NEXT:  entry:
+; NVPTX-LABEL: define internal void @__omp_outlined__(
+; NVPTX-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
+; NVPTX-NEXT:  [[ENTRY:.*]]:
 ; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
 ; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
-; NVPTX-NEXT:    br label [[FOR_COND:%.*]]
-; NVPTX:       for.cond:
-; NVPTX-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
+; NVPTX-NEXT:    br label %[[FOR_COND:.*]]
+; NVPTX:       [[FOR_COND]]:
+; NVPTX-NEXT:    [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY:.*]] ]
 ; NVPTX-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
-; NVPTX-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
-; NVPTX:       for.cond.cleanup:
+; NVPTX-NEXT:    br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP:.*]]
+; NVPTX:       [[FOR_COND_CLEANUP]]:
 ; NVPTX-NEXT:    call void @spmd_amenable() #[[ATTR7:[0-9]+]]
 ; NVPTX-NEXT:    ret void
-; NVPTX:       for.body:
+; NVPTX:       [[FOR_BODY]]:
 ; NVPTX-NEXT:    [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
-; NVPTX-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]]
+; NVPTX-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[INT_TBAA12]]
 ; NVPTX-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
 ; NVPTX-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
-; NVPTX-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
+; NVPTX-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
 ;
-; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__
-; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
-; AMDGPU-DISABLED1-NEXT:  entry:
+; AMDGPU-DISABLED1-LABEL: define internal void @__omp_outlined__(
+; AMDGPU-DISABLED1-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
+; AMDGPU-DISABLED1-NEXT:  [[ENTRY:.*]]:
 ; AMDGPU-DISABLED1-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-DISABLED1-NEXT:    [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
-; AMDGPU-DISABLED1-NEXT:    br label [[FOR_COND:%.*]]
-; AMDGPU-DISABLED1:       for.cond:
-; AMDGPU-DISABLED1-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
+; AMDGPU-DISABLED1-NEXT:    br label %[[FOR_COND:.*]]
+; AMDGPU-DISABLED1:       [[FOR_COND]]:
+; AMDGPU-DISABLED1-NEXT:    [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY:.*]] ]
 ; AMDGPU-DISABLED1-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
-; AMDGPU-DISABLED1-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
-; AMDGPU-DISABLED1:       for.cond.cleanup:
+; AMDGPU-DISABLED1-NEXT:    br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP:.*]]
+; AMDGPU-DISABLED1:       [[FOR_COND_CLEANUP]]:
 ; AMDGPU-DISABLED1-NEXT:    call void @spmd_amenable() #[[ATTR7:[0-9]+]]
 ; AMDGPU-DISABLED1-NEXT:    ret void
-; AMDGPU-DISABLED1:       for.body:
+; AMDGPU-DISABLED1:       [[FOR_BODY]]:
 ; AMDGPU-DISABLED1-NEXT:    [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
-; AMDGPU-DISABLED1-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-DISABLED1-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[INT_TBAA12]]
 ; AMDGPU-DISABLED1-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
 ; AMDGPU-DISABLED1-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
-; AMDGPU-DISABLED1-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
+; AMDGPU-DISABLED1-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
 ;
-; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__
-; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
-; AMDGPU-DISABLED2-NEXT:  entry:
+; AMDGPU-DISABLED2-LABEL: define internal void @__omp_outlined__(
+; AMDGPU-DISABLED2-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
+; AMDGPU-DISABLED2-NEXT:  [[ENTRY:.*]]:
 ; AMDGPU-DISABLED2-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-DISABLED2-NEXT:    [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
-; AMDGPU-DISABLED2-NEXT:    br label [[FOR_COND:%.*]]
-; AMDGPU-DISABLED2:       for.cond:
-; AMDGPU-DISABLED2-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
+; AMDGPU-DISABLED2-NEXT:    br label %[[FOR_COND:.*]]
+; AMDGPU-DISABLED2:       [[FOR_COND]]:
+; AMDGPU-DISABLED2-NEXT:    [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY:.*]] ]
 ; AMDGPU-DISABLED2-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
-; AMDGPU-DISABLED2-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
-; AMDGPU-DISABLED2:       for.cond.cleanup:
+; AMDGPU-DISABLED2-NEXT:    br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP:.*]]
+; AMDGPU-DISABLED2:       [[FOR_COND_CLEANUP]]:
 ; AMDGPU-DISABLED2-NEXT:    call void @spmd_amenable() #[[ATTR7:[0-9]+]]
 ; AMDGPU-DISABLED2-NEXT:    ret void
-; AMDGPU-DISABLED2:       for.body:
+; AMDGPU-DISABLED2:       [[FOR_BODY]]:
 ; AMDGPU-DISABLED2-NEXT:    [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
-; AMDGPU-DISABLED2-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-DISABLED2-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[INT_TBAA12]]
 ; AMDGPU-DISABLED2-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
 ; AMDGPU-DISABLED2-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
-; AMDGPU-DISABLED2-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
+; AMDGPU-DISABLED2-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
 ;
-; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__
-; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
-; NVPTX-DISABLED1-NEXT:  entry:
+; NVPTX-DISABLED1-LABEL: define internal void @__omp_outlined__(
+; NVPTX-DISABLED1-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
+; NVPTX-DISABLED1-NEXT:  [[ENTRY:.*]]:
 ; NVPTX-DISABLED1-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
 ; NVPTX-DISABLED1-NEXT:    [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
-; NVPTX-DISABLED1-NEXT:    br label [[FOR_COND:%.*]]
-; NVPTX-DISABLED1:       for.cond:
-; NVPTX-DISABLED1-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
+; NVPTX-DISABLED1-NEXT:    br label %[[FOR_COND:.*]]
+; NVPTX-DISABLED1:       [[FOR_COND]]:
+; NVPTX-DISABLED1-NEXT:    [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY:.*]] ]
 ; NVPTX-DISABLED1-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
-; NVPTX-DISABLED1-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
-; NVPTX-DISABLED1:       for.cond.cleanup:
+; NVPTX-DISABLED1-NEXT:    br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP:.*]]
+; NVPTX-DISABLED1:       [[FOR_COND_CLEANUP]]:
 ; NVPTX-DISABLED1-NEXT:    call void @spmd_amenable() #[[ATTR7:[0-9]+]]
 ; NVPTX-DISABLED1-NEXT:    ret void
-; NVPTX-DISABLED1:       for.body:
+; NVPTX-DISABLED1:       [[FOR_BODY]]:
 ; NVPTX-DISABLED1-NEXT:    [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
-; NVPTX-DISABLED1-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]]
+; NVPTX-DISABLED1-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[INT_TBAA12]]
 ; NVPTX-DISABLED1-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
 ; NVPTX-DISABLED1-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
-; NVPTX-DISABLED1-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
+; NVPTX-DISABLED1-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
 ;
-; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__
-; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
-; NVPTX-DISABLED2-NEXT:  entry:
+; NVPTX-DISABLED2-LABEL: define internal void @__omp_outlined__(
+; NVPTX-DISABLED2-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
+; NVPTX-DISABLED2-NEXT:  [[ENTRY:.*]]:
 ; NVPTX-DISABLED2-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
 ; NVPTX-DISABLED2-NEXT:    [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
-; NVPTX-DISABLED2-NEXT:    br label [[FOR_COND:%.*]]
-; NVPTX-DISABLED2:       for.cond:
-; NVPTX-DISABLED2-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
+; NVPTX-DISABLED2-NEXT:    br label %[[FOR_COND:.*]]
+; NVPTX-DISABLED2:       [[FOR_COND]]:
+; NVPTX-DISABLED2-NEXT:    [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY:.*]] ]
 ; NVPTX-DISABLED2-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
-; NVPTX-DISABLED2-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
-; NVPTX-DISABLED2:       for.cond.cleanup:
+; NVPTX-DISABLED2-NEXT:    br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP:.*]]
+; NVPTX-DISABLED2:       [[FOR_COND_CLEANUP]]:
 ; NVPTX-DISABLED2-NEXT:    call void @spmd_amenable() #[[ATTR7:[0-9]+]]
 ; NVPTX-DISABLED2-NEXT:    ret void
-; NVPTX-DISABLED2:       for.body:
+; NVPTX-DISABLED2:       [[FOR_BODY]]:
 ; NVPTX-DISABLED2-NEXT:    [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
-; NVPTX-DISABLED2-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]]
+; NVPTX-DISABLED2-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[INT_TBAA12]]
 ; NVPTX-DISABLED2-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
 ; NVPTX-DISABLED2-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
-; NVPTX-DISABLED2-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
+; NVPTX-DISABLED2-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
 ;
 entry:
   %captured_vars_addrs = alloca ptr, align 8, addrspace(5)
@@ -566,39 +566,39 @@ for.body:                                         ; preds = %for.cond
 }
 
 define internal void @__omp_outlined__1(ptr noalias %.global_tid., ptr noalias %.bound_tid.) {
-; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__1
-; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
-; AMDGPU-NEXT:  entry:
+; AMDGPU-LABEL: define internal void @__omp_outlined__1(
+; AMDGPU-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
+; AMDGPU-NEXT:  [[ENTRY:.*:]]
 ; AMDGPU-NEXT:    call void @unknown() #[[ATTR8:[0-9]+]]
 ; AMDGPU-NEXT:    ret void
 ;
-; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__1
-; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
-; NVPTX-NEXT:  entry:
+; NVPTX-LABEL: define internal void @__omp_outlined__1(
+; NVPTX-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
+; NVPTX-NEXT:  [[ENTRY:.*:]]
 ; NVPTX-NEXT:    call void @unknown() #[[ATTR8:[0-9]+]]
 ; NVPTX-NEXT:    ret void
 ;
-; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__1
-; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
-; AMDGPU-DISABLED1-NEXT:  entry:
+; AMDGPU-DISABLED1-LABEL: define internal void @__omp_outlined__1(
+; AMDGPU-DISABLED1-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
+; AMDGPU-DISABLED1-NEXT:  [[ENTRY:.*:]]
 ; AMDGPU-DISABLED1-NEXT:    call void @unknown() #[[ATTR8:[0-9]+]]
 ; AMDGPU-DISABLED1-NEXT:    ret void
 ;
-; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__1
-; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
-; AMDGPU-DISABLED2-NEXT:  entry:
+; AMDGPU-DISABLED2-LABEL: define internal void @__omp_outlined__1(
+; AMDGPU-DISABLED2-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
+; AMDGPU-DISABLED2-NEXT:  [[ENTRY:.*:]]
 ; AMDGPU-DISABLED2-NEXT:    call void @unknown() #[[ATTR8:[0-9]+]]
 ; AMDGPU-DISABLED2-NEXT:    ret void
 ;
-; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__1
-; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
-; NVPTX-DISABLED1-NEXT:  entry:
+; NVPTX-DISABLED1-LABEL: define internal void @__omp_outlined__1(
+; NVPTX-DISABLED1-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
+; NVPTX-DISABLED1-NEXT:  [[ENTRY:.*:]]
 ; NVPTX-DISABLED1-NEXT:    call void @unknown() #[[ATTR8:[0-9]+]]
 ; NVPTX-DISABLED1-NEXT:    ret void
 ;
-; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__1
-; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
-; NVPTX-DISABLED2-NEXT:  entry:
+; NVPTX-DISABLED2-LABEL: define internal void @__omp_outlined__1(
+; NVPTX-DISABLED2-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
+; NVPTX-DISABLED2-NEXT:  [[ENTRY:.*:]]
 ; NVPTX-DISABLED2-NEXT:    call void @unknown() #[[ATTR8:[0-9]+]]
 ; NVPTX-DISABLED2-NEXT:    ret void
 ;
@@ -609,9 +609,9 @@ entry:
 
 ; Function Attrs: convergent norecurse nounwind
 define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) #1 {
-; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper
-; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] {
-; AMDGPU-NEXT:  entry:
+; AMDGPU-LABEL: define internal void @__omp_outlined__1_wrapper(
+; AMDGPU-SAME: i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] {
+; AMDGPU-NEXT:  [[ENTRY:.*:]]
 ; AMDGPU-NEXT:    [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-NEXT:    [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
 ; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
@@ -622,9 +622,9 @@ define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) #1 {
 ; AMDGPU-NEXT:    call void @__omp_outlined__1(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
 ; AMDGPU-NEXT:    ret void
 ;
-; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper
-; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] {
-; NVPTX-NEXT:  entry:
+; NVPTX-LABEL: define internal void @__omp_outlined__1_wrapper(
+; NVPTX-SAME: i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] {
+; NVPTX-NEXT:  [[ENTRY:.*:]]
 ; NVPTX-NEXT:    [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
 ; NVPTX-NEXT:    [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
 ; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
@@ -635,9 +635,9 @@ define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) #1 {
 ; NVPTX-NEXT:    call void @__omp_outlined__1(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
 ; NVPTX-NEXT:    ret void
 ;
-; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper
-; AMDGPU-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] {
-; AMDGPU-DISABLED1-NEXT:  entry:
+; AMDGPU-DISABLED1-LABEL: define internal void @__omp_outlined__1_wrapper(
+; AMDGPU-DISABLED1-SAME: i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] {
+; AMDGPU-DISABLED1-NEXT:  [[ENTRY:.*:]]
 ; AMDGPU-DISABLED1-NEXT:    [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-DISABLED1-NEXT:    [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
 ; AMDGPU-DISABLED1-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
@@ -648,9 +648,9 @@ define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) #1 {
 ; AMDGPU-DISABLED1-NEXT:    call void @__omp_outlined__1(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
 ; AMDGPU-DISABLED1-NEXT:    ret void
 ;
-; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper
-; AMDGPU-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] {
-; AMDGPU-DISABLED2-NEXT:  entry:
+; AMDGPU-DISABLED2-LABEL: define internal void @__omp_outlined__1_wrapper(
+; AMDGPU-DISABLED2-SAME: i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] {
+; AMDGPU-DISABLED2-NEXT:  [[ENTRY:.*:]]
 ; AMDGPU-DISABLED2-NEXT:    [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-DISABLED2-NEXT:    [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
 ; AMDGPU-DISABLED2-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
@@ -661,9 +661,9 @@ define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) #1 {
 ; AMDGPU-DISABLED2-NEXT:    call void @__omp_outlined__1(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
 ; AMDGPU-DISABLED2-NEXT:    ret void
 ;
-; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper
-; NVPTX-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] {
-; NVPTX-DISABLED1-NEXT:  entry:
+; NVPTX-DISABLED1-LABEL: define internal void @__omp_outlined__1_wrapper(
+; NVPTX-DISABLED1-SAME: i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] {
+; NVPTX-DISABLED1-NEXT:  [[ENTRY:.*:]]
 ; NVPTX-DISABLED1-NEXT:    [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
 ; NVPTX-DISABLED1-NEXT:    [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
 ; NVPTX-DISABLED1-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
@@ -674,9 +674,9 @@ define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) #1 {
 ; NVPTX-DISABLED1-NEXT:    call void @__omp_outlined__1(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
 ; NVPTX-DISABLED1-NEXT:    ret void
 ;
-; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper
-; NVPTX-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] {
-; NVPTX-DISABLED2-NEXT:  entry:
+; NVPTX-DISABLED2-LABEL: define internal void @__omp_outlined__1_wrapper(
+; NVPTX-DISABLED2-SAME: i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] {
+; NVPTX-DISABLED2-NEXT:  [[ENTRY:.*:]]
 ; NVPTX-DISABLED2-NEXT:    [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
 ; NVPTX-DISABLED2-NEXT:    [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
 ; NVPTX-DISABLED2-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
@@ -703,47 +703,47 @@ entry:
 
 ; Function Attrs: alwaysinline convergent norecurse nounwind
 define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20() #0 {
-; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20
-; AMDGPU-SAME: () #[[ATTR0]] {
-; AMDGPU-NEXT:  entry:
+; AMDGPU-LABEL: define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20(
+; AMDGPU-SAME: ) #[[ATTR0]] {
+; AMDGPU-NEXT:  [[ENTRY:.*:]]
 ; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-NEXT:    [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
 ; AMDGPU-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-NEXT:    [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
 ; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment, ptr null)
 ; AMDGPU-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; AMDGPU-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
-; AMDGPU:       common.ret:
+; AMDGPU-NEXT:    br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]]
+; AMDGPU:       [[COMMON_RET]]:
 ; AMDGPU-NEXT:    ret void
-; AMDGPU:       user_code.entry:
+; AMDGPU:       [[USER_CODE_ENTRY]]:
 ; AMDGPU-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; AMDGPU-NEXT:    store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-NEXT:    store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[INT_TBAA12]]
 ; AMDGPU-NEXT:    call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
 ; AMDGPU-NEXT:    call void @__kmpc_target_deinit()
-; AMDGPU-NEXT:    br label [[COMMON_RET]]
+; AMDGPU-NEXT:    br label %[[COMMON_RET]]
 ;
-; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20
-; NVPTX-SAME: () #[[ATTR0]] {
-; NVPTX-NEXT:  entry:
+; NVPTX-LABEL: define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20(
+; NVPTX-SAME: ) #[[ATTR0]] {
+; NVPTX-NEXT:  [[ENTRY:.*:]]
 ; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
 ; NVPTX-NEXT:    [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
 ; NVPTX-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
 ; NVPTX-NEXT:    [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
 ; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment, ptr null)
 ; NVPTX-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; NVPTX-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
-; NVPTX:       common.ret:
+; NVPTX-NEXT:    br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]]
+; NVPTX:       [[COMMON_RET]]:
 ; NVPTX-NEXT:    ret void
-; NVPTX:       user_code.entry:
+; NVPTX:       [[USER_CODE_ENTRY]]:
 ; NVPTX-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; NVPTX-NEXT:    store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
+; NVPTX-NEXT:    store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[INT_TBAA12]]
 ; NVPTX-NEXT:    call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
 ; NVPTX-NEXT:    call void @__kmpc_target_deinit()
-; NVPTX-NEXT:    br label [[COMMON_RET]]
+; NVPTX-NEXT:    br label %[[COMMON_RET]]
 ;
-; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20
-; AMDGPU-DISABLED1-SAME: () #[[ATTR0]] {
-; AMDGPU-DISABLED1-NEXT:  entry:
+; AMDGPU-DISABLED1-LABEL: define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20(
+; AMDGPU-DISABLED1-SAME: ) #[[ATTR0]] {
+; AMDGPU-DISABLED1-NEXT:  [[ENTRY:.*:]]
 ; AMDGPU-DISABLED1-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-DISABLED1-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-DISABLED1-NEXT:    [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
@@ -751,71 +751,71 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_s
 ; AMDGPU-DISABLED1-NEXT:    [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
 ; AMDGPU-DISABLED1-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment, ptr null)
 ; AMDGPU-DISABLED1-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
-; AMDGPU-DISABLED1-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
-; AMDGPU-DISABLED1:       is_worker_check:
+; AMDGPU-DISABLED1-NEXT:    br i1 [[THREAD_IS_WORKER]], label %[[IS_WORKER_CHECK:.*]], label %[[THREAD_USER_CODE_CHECK:.*]]
+; AMDGPU-DISABLED1:       [[IS_WORKER_CHECK]]:
 ; AMDGPU-DISABLED1-NEXT:    [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
 ; AMDGPU-DISABLED1-NEXT:    [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
 ; AMDGPU-DISABLED1-NEXT:    [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
 ; AMDGPU-DISABLED1-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
-; AMDGPU-DISABLED1-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
-; AMDGPU-DISABLED1:       worker_state_machine.begin:
+; AMDGPU-DISABLED1-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label %[[WORKER_STATE_MACHINE_BEGIN:.*]], label %[[WORKER_STATE_MACHINE_FINISHED:.*]]
+; AMDGPU-DISABLED1:       [[WORKER_STATE_MACHINE_BEGIN]]:
 ; AMDGPU-DISABLED1-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
 ; AMDGPU-DISABLED1-NEXT:    [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
 ; AMDGPU-DISABLED1-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
 ; AMDGPU-DISABLED1-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
 ; AMDGPU-DISABLED1-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
-; AMDGPU-DISABLED1-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
-; AMDGPU-DISABLED1:       worker_state_machine.finished:
+; AMDGPU-DISABLED1-NEXT:    br i1 [[WORKER_IS_DONE]], label %[[WORKER_STATE_MACHINE_FINISHED]], label %[[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:.*]]
+; AMDGPU-DISABLED1:       [[WORKER_STATE_MACHINE_FINISHED]]:
 ; AMDGPU-DISABLED1-NEXT:    ret void
-; AMDGPU-DISABLED1:       worker_state_machine.is_active.check:
-; AMDGPU-DISABLED1-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
-; AMDGPU-DISABLED1:       worker_state_machine.parallel_region.check:
-; AMDGPU-DISABLED1-NEXT:    br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
-; AMDGPU-DISABLED1:       worker_state_machine.parallel_region.execute:
+; AMDGPU-DISABLED1:       [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK]]:
+; AMDGPU-DISABLED1-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:.*]], label %[[WORKER_STATE_MACHINE_DONE_BARRIER:.*]]
+; AMDGPU-DISABLED1:       [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK]]:
+; AMDGPU-DISABLED1-NEXT:    br i1 true, label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:.*]], label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:.*]]
+; AMDGPU-DISABLED1:       [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE]]:
 ; AMDGPU-DISABLED1-NEXT:    call void @__omp_outlined__3_wrapper(i16 0, i32 [[TMP0]])
-; AMDGPU-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
-; AMDGPU-DISABLED1:       worker_state_machine.parallel_region.check1:
-; AMDGPU-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
-; AMDGPU-DISABLED1:       worker_state_machine.parallel_region.end:
+; AMDGPU-DISABLED1-NEXT:    br label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_END:.*]]
+; AMDGPU-DISABLED1:       [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1]]:
+; AMDGPU-DISABLED1-NEXT:    br label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
+; AMDGPU-DISABLED1:       [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]:
 ; AMDGPU-DISABLED1-NEXT:    call void @__kmpc_kernel_end_parallel()
-; AMDGPU-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
-; AMDGPU-DISABLED1:       worker_state_machine.done.barrier:
+; AMDGPU-DISABLED1-NEXT:    br label %[[WORKER_STATE_MACHINE_DONE_BARRIER]]
+; AMDGPU-DISABLED1:       [[WORKER_STATE_MACHINE_DONE_BARRIER]]:
 ; AMDGPU-DISABLED1-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; AMDGPU-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
-; AMDGPU-DISABLED1:       thread.user_code.check:
+; AMDGPU-DISABLED1-NEXT:    br label %[[WORKER_STATE_MACHINE_BEGIN]]
+; AMDGPU-DISABLED1:       [[THREAD_USER_CODE_CHECK]]:
 ; AMDGPU-DISABLED1-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; AMDGPU-DISABLED1-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
-; AMDGPU-DISABLED1:       common.ret:
+; AMDGPU-DISABLED1-NEXT:    br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]]
+; AMDGPU-DISABLED1:       [[COMMON_RET]]:
 ; AMDGPU-DISABLED1-NEXT:    ret void
-; AMDGPU-DISABLED1:       user_code.entry:
+; AMDGPU-DISABLED1:       [[USER_CODE_ENTRY]]:
 ; AMDGPU-DISABLED1-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; AMDGPU-DISABLED1-NEXT:    store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-DISABLED1-NEXT:    store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[INT_TBAA12]]
 ; AMDGPU-DISABLED1-NEXT:    call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
 ; AMDGPU-DISABLED1-NEXT:    call void @__kmpc_target_deinit()
-; AMDGPU-DISABLED1-NEXT:    br label [[COMMON_RET]]
+; AMDGPU-DISABLED1-NEXT:    br label %[[COMMON_RET]]
 ;
-; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20
-; AMDGPU-DISABLED2-SAME: () #[[ATTR0]] {
-; AMDGPU-DISABLED2-NEXT:  entry:
+; AMDGPU-DISABLED2-LABEL: define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20(
+; AMDGPU-DISABLED2-SAME: ) #[[ATTR0]] {
+; AMDGPU-DISABLED2-NEXT:  [[ENTRY:.*:]]
 ; AMDGPU-DISABLED2-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-DISABLED2-NEXT:    [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
 ; AMDGPU-DISABLED2-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-DISABLED2-NEXT:    [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
 ; AMDGPU-DISABLED2-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment, ptr null)
 ; AMDGPU-DISABLED2-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; AMDGPU-DISABLED2-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
-; AMDGPU-DISABLED2:       common.ret:
+; AMDGPU-DISABLED2-NEXT:    br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]]
+; AMDGPU-DISABLED2:       [[COMMON_RET]]:
 ; AMDGPU-DISABLED2-NEXT:    ret void
-; AMDGPU-DISABLED2:       user_code.entry:
+; AMDGPU-DISABLED2:       [[USER_CODE_ENTRY]]:
 ; AMDGPU-DISABLED2-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; AMDGPU-DISABLED2-NEXT:    store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-DISABLED2-NEXT:    store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[INT_TBAA12]]
 ; AMDGPU-DISABLED2-NEXT:    call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
 ; AMDGPU-DISABLED2-NEXT:    call void @__kmpc_target_deinit()
-; AMDGPU-DISABLED2-NEXT:    br label [[COMMON_RET]]
+; AMDGPU-DISABLED2-NEXT:    br label %[[COMMON_RET]]
 ;
-; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20
-; NVPTX-DISABLED1-SAME: () #[[ATTR0]] {
-; NVPTX-DISABLED1-NEXT:  entry:
+; NVPTX-DISABLED1-LABEL: define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20(
+; NVPTX-DISABLED1-SAME: ) #[[ATTR0]] {
+; NVPTX-DISABLED1-NEXT:  [[ENTRY:.*:]]
 ; NVPTX-DISABLED1-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
 ; NVPTX-DISABLED1-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
 ; NVPTX-DISABLED1-NEXT:    [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
@@ -823,66 +823,66 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_s
 ; NVPTX-DISABLED1-NEXT:    [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
 ; NVPTX-DISABLED1-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment, ptr null)
 ; NVPTX-DISABLED1-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
-; NVPTX-DISABLED1-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
-; NVPTX-DISABLED1:       is_worker_check:
+; NVPTX-DISABLED1-NEXT:    br i1 [[THREAD_IS_WORKER]], label %[[IS_WORKER_CHECK:.*]], label %[[THREAD_USER_CODE_CHECK:.*]]
+; NVPTX-DISABLED1:       [[IS_WORKER_CHECK]]:
 ; NVPTX-DISABLED1-NEXT:    [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
 ; NVPTX-DISABLED1-NEXT:    [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
 ; NVPTX-DISABLED1-NEXT:    [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
 ; NVPTX-DISABLED1-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
-; NVPTX-DISABLED1-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
-; NVPTX-DISABLED1:       worker_state_machine.begin:
+; NVPTX-DISABLED1-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label %[[WORKER_STATE_MACHINE_BEGIN:.*]], label %[[WORKER_STATE_MACHINE_FINISHED:.*]]
+; NVPTX-DISABLED1:       [[WORKER_STATE_MACHINE_BEGIN]]:
 ; NVPTX-DISABLED1-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
 ; NVPTX-DISABLED1-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
 ; NVPTX-DISABLED1-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
 ; NVPTX-DISABLED1-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
-; NVPTX-DISABLED1-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
-; NVPTX-DISABLED1:       worker_state_machine.finished:
+; NVPTX-DISABLED1-NEXT:    br i1 [[WORKER_IS_DONE]], label %[[WORKER_STATE_MACHINE_FINISHED]], label %[[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:.*]]
+; NVPTX-DISABLED1:       [[WORKER_STATE_MACHINE_FINISHED]]:
 ; NVPTX-DISABLED1-NEXT:    ret void
-; NVPTX-DISABLED1:       worker_state_machine.is_active.check:
-; NVPTX-DISABLED1-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
-; NVPTX-DISABLED1:       worker_state_machine.parallel_region.check:
-; NVPTX-DISABLED1-NEXT:    br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
-; NVPTX-DISABLED1:       worker_state_machine.parallel_region.execute:
+; NVPTX-DISABLED1:       [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK]]:
+; NVPTX-DISABLED1-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:.*]], label %[[WORKER_STATE_MACHINE_DONE_BARRIER:.*]]
+; NVPTX-DISABLED1:       [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK]]:
+; NVPTX-DISABLED1-NEXT:    br i1 true, label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:.*]], label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:.*]]
+; NVPTX-DISABLED1:       [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE]]:
 ; NVPTX-DISABLED1-NEXT:    call void @__omp_outlined__3_wrapper(i16 0, i32 [[TMP0]])
-; NVPTX-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
-; NVPTX-DISABLED1:       worker_state_machine.parallel_region.check1:
-; NVPTX-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
-; NVPTX-DISABLED1:       worker_state_machine.parallel_region.end:
+; NVPTX-DISABLED1-NEXT:    br label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_END:.*]]
+; NVPTX-DISABLED1:       [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1]]:
+; NVPTX-DISABLED1-NEXT:    br label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
+; NVPTX-DISABLED1:       [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]:
 ; NVPTX-DISABLED1-NEXT:    call void @__kmpc_kernel_end_parallel()
-; NVPTX-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
-; NVPTX-DISABLED1:       worker_state_machine.done.barrier:
+; NVPTX-DISABLED1-NEXT:    br label %[[WORKER_STATE_MACHINE_DONE_BARRIER]]
+; NVPTX-DISABLED1:       [[WORKER_STATE_MACHINE_DONE_BARRIER]]:
 ; NVPTX-DISABLED1-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; NVPTX-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
-; NVPTX-DISABLED1:       thread.user_code.check:
+; NVPTX-DISABLED1-NEXT:    br label %[[WORKER_STATE_MACHINE_BEGIN]]
+; NVPTX-DISABLED1:       [[THREAD_USER_CODE_CHECK]]:
 ; NVPTX-DISABLED1-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; NVPTX-DISABLED1-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
-; NVPTX-DISABLED1:       common.ret:
+; NVPTX-DISABLED1-NEXT:    br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]]
+; NVPTX-DISABLED1:       [[COMMON_RET]]:
 ; NVPTX-DISABLED1-NEXT:    ret void
-; NVPTX-DISABLED1:       user_code.entry:
+; NVPTX-DISABLED1:       [[USER_CODE_ENTRY]]:
 ; NVPTX-DISABLED1-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; NVPTX-DISABLED1-NEXT:    store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
+; NVPTX-DISABLED1-NEXT:    store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[INT_TBAA12]]
 ; NVPTX-DISABLED1-NEXT:    call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
 ; NVPTX-DISABLED1-NEXT:    call void @__kmpc_target_deinit()
-; NVPTX-DISABLED1-NEXT:    br label [[COMMON_RET]]
+; NVPTX-DISABLED1-NEXT:    br label %[[COMMON_RET]]
 ;
-; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20
-; NVPTX-DISABLED2-SAME: () #[[ATTR0]] {
-; NVPTX-DISABLED2-NEXT:  entry:
+; NVPTX-DISABLED2-LABEL: define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20(
+; NVPTX-DISABLED2-SAME: ) #[[ATTR0]] {
+; NVPTX-DISABLED2-NEXT:  [[ENTRY:.*:]]
 ; NVPTX-DISABLED2-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
 ; NVPTX-DISABLED2-NEXT:    [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
 ; NVPTX-DISABLED2-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
 ; NVPTX-DISABLED2-NEXT:    [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
 ; NVPTX-DISABLED2-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment, ptr null)
 ; NVPTX-DISABLED2-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; NVPTX-DISABLED2-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
-; NVPTX-DISABLED2:       common.ret:
+; NVPTX-DISABLED2-NEXT:    br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]]
+; NVPTX-DISABLED2:       [[COMMON_RET]]:
 ; NVPTX-DISABLED2-NEXT:    ret void
-; NVPTX-DISABLED2:       user_code.entry:
+; NVPTX-DISABLED2:       [[USER_CODE_ENTRY]]:
 ; NVPTX-DISABLED2-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; NVPTX-DISABLED2-NEXT:    store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
+; NVPTX-DISABLED2-NEXT:    store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[INT_TBAA12]]
 ; NVPTX-DISABLED2-NEXT:    call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
 ; NVPTX-DISABLED2-NEXT:    call void @__kmpc_target_deinit()
-; NVPTX-DISABLED2-NEXT:    br label [[COMMON_RET]]
+; NVPTX-DISABLED2-NEXT:    br label %[[COMMON_RET]]
 ;
 entry:
   %.zero.addr = alloca ptr, align 8, addrspace(5)
@@ -906,140 +906,140 @@ user_code.entry:                                  ; preds = %entry
 }
 
 define internal void @__omp_outlined__2(ptr noalias %.global_tid., ptr noalias %.bound_tid.) {
-; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__2
-; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
-; AMDGPU-NEXT:  entry:
+; AMDGPU-LABEL: define internal void @__omp_outlined__2(
+; AMDGPU-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
+; AMDGPU-NEXT:  [[ENTRY:.*]]:
 ; AMDGPU-NEXT:    [[X_H2S:%.*]] = alloca i8, i64 4, align 4, addrspace(5)
 ; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
 ; AMDGPU-NEXT:    [[MALLOC_CAST:%.*]] = addrspacecast ptr addrspace(5) [[X_H2S]] to ptr
 ; AMDGPU-NEXT:    call void @use(ptr captures(none) [[MALLOC_CAST]]) #[[ATTR7]]
-; AMDGPU-NEXT:    br label [[FOR_COND:%.*]]
-; AMDGPU:       for.cond:
-; AMDGPU-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
+; AMDGPU-NEXT:    br label %[[FOR_COND:.*]]
+; AMDGPU:       [[FOR_COND]]:
+; AMDGPU-NEXT:    [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY:.*]] ]
 ; AMDGPU-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
-; AMDGPU-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
-; AMDGPU:       for.cond.cleanup:
+; AMDGPU-NEXT:    br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP:.*]]
+; AMDGPU:       [[FOR_COND_CLEANUP]]:
 ; AMDGPU-NEXT:    call void @spmd_amenable() #[[ATTR7]]
 ; AMDGPU-NEXT:    ret void
-; AMDGPU:       for.body:
+; AMDGPU:       [[FOR_BODY]]:
 ; AMDGPU-NEXT:    [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
-; AMDGPU-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[INT_TBAA12]]
 ; AMDGPU-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
 ; AMDGPU-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
-; AMDGPU-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
+; AMDGPU-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
 ;
-; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__2
-; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
-; NVPTX-NEXT:  entry:
+; NVPTX-LABEL: define internal void @__omp_outlined__2(
+; NVPTX-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
+; NVPTX-NEXT:  [[ENTRY:.*]]:
 ; NVPTX-NEXT:    [[X_H2S:%.*]] = alloca i8, i64 4, align 4
 ; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
 ; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
 ; NVPTX-NEXT:    call void @use(ptr captures(none) [[X_H2S]]) #[[ATTR7]]
-; NVPTX-NEXT:    br label [[FOR_COND:%.*]]
-; NVPTX:       for.cond:
-; NVPTX-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
+; NVPTX-NEXT:    br label %[[FOR_COND:.*]]
+; NVPTX:       [[FOR_COND]]:
+; NVPTX-NEXT:    [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY:.*]] ]
 ; NVPTX-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
-; NVPTX-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
-; NVPTX:       for.cond.cleanup:
+; NVPTX-NEXT:    br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP:.*]]
+; NVPTX:       [[FOR_COND_CLEANUP]]:
 ; NVPTX-NEXT:    call void @spmd_amenable() #[[ATTR7]]
 ; NVPTX-NEXT:    ret void
-; NVPTX:       for.body:
+; NVPTX:       [[FOR_BODY]]:
 ; NVPTX-NEXT:    [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
-; NVPTX-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]]
+; NVPTX-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[INT_TBAA12]]
 ; NVPTX-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
 ; NVPTX-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
-; NVPTX-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
+; NVPTX-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
 ;
-; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__2
-; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
-; AMDGPU-DISABLED1-NEXT:  entry:
+; AMDGPU-DISABLED1-LABEL: define internal void @__omp_outlined__2(
+; AMDGPU-DISABLED1-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
+; AMDGPU-DISABLED1-NEXT:  [[ENTRY:.*]]:
 ; AMDGPU-DISABLED1-NEXT:    [[X_H2S:%.*]] = alloca i8, i64 4, align 4, addrspace(5)
 ; AMDGPU-DISABLED1-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-DISABLED1-NEXT:    [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
 ; AMDGPU-DISABLED1-NEXT:    [[MALLOC_CAST:%.*]] = addrspacecast ptr addrspace(5) [[X_H2S]] to ptr
 ; AMDGPU-DISABLED1-NEXT:    call void @use(ptr captures(none) [[MALLOC_CAST]]) #[[ATTR7]]
-; AMDGPU-DISABLED1-NEXT:    br label [[FOR_COND:%.*]]
-; AMDGPU-DISABLED1:       for.cond:
-; AMDGPU-DISABLED1-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
+; AMDGPU-DISABLED1-NEXT:    br label %[[FOR_COND:.*]]
+; AMDGPU-DISABLED1:       [[FOR_COND]]:
+; AMDGPU-DISABLED1-NEXT:    [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY:.*]] ]
 ; AMDGPU-DISABLED1-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
-; AMDGPU-DISABLED1-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
-; AMDGPU-DISABLED1:       for.cond.cleanup:
+; AMDGPU-DISABLED1-NEXT:    br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP:.*]]
+; AMDGPU-DISABLED1:       [[FOR_COND_CLEANUP]]:
 ; AMDGPU-DISABLED1-NEXT:    call void @spmd_amenable() #[[ATTR7]]
 ; AMDGPU-DISABLED1-NEXT:    ret void
-; AMDGPU-DISABLED1:       for.body:
+; AMDGPU-DISABLED1:       [[FOR_BODY]]:
 ; AMDGPU-DISABLED1-NEXT:    [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
-; AMDGPU-DISABLED1-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-DISABLED1-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[INT_TBAA12]]
 ; AMDGPU-DISABLED1-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
 ; AMDGPU-DISABLED1-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
-; AMDGPU-DISABLED1-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
+; AMDGPU-DISABLED1-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
 ;
-; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__2
-; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
-; AMDGPU-DISABLED2-NEXT:  entry:
+; AMDGPU-DISABLED2-LABEL: define internal void @__omp_outlined__2(
+; AMDGPU-DISABLED2-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
+; AMDGPU-DISABLED2-NEXT:  [[ENTRY:.*]]:
 ; AMDGPU-DISABLED2-NEXT:    [[X_H2S:%.*]] = alloca i8, i64 4, align 4, addrspace(5)
 ; AMDGPU-DISABLED2-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-DISABLED2-NEXT:    [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
 ; AMDGPU-DISABLED2-NEXT:    [[MALLOC_CAST:%.*]] = addrspacecast ptr addrspace(5) [[X_H2S]] to ptr
 ; AMDGPU-DISABLED2-NEXT:    call void @use(ptr captures(none) [[MALLOC_CAST]]) #[[ATTR7]]
-; AMDGPU-DISABLED2-NEXT:    br label [[FOR_COND:%.*]]
-; AMDGPU-DISABLED2:       for.cond:
-; AMDGPU-DISABLED2-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
+; AMDGPU-DISABLED2-NEXT:    br label %[[FOR_COND:.*]]
+; AMDGPU-DISABLED2:       [[FOR_COND]]:
+; AMDGPU-DISABLED2-NEXT:    [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY:.*]] ]
 ; AMDGPU-DISABLED2-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
-; AMDGPU-DISABLED2-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
-; AMDGPU-DISABLED2:       for.cond.cleanup:
+; AMDGPU-DISABLED2-NEXT:    br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP:.*]]
+; AMDGPU-DISABLED2:       [[FOR_COND_CLEANUP]]:
 ; AMDGPU-DISABLED2-NEXT:    call void @spmd_amenable() #[[ATTR7]]
 ; AMDGPU-DISABLED2-NEXT:    ret void
-; AMDGPU-DISABLED2:       for.body:
+; AMDGPU-DISABLED2:       [[FOR_BODY]]:
 ; AMDGPU-DISABLED2-NEXT:    [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
-; AMDGPU-DISABLED2-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-DISABLED2-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[INT_TBAA12]]
 ; AMDGPU-DISABLED2-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
 ; AMDGPU-DISABLED2-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
-; AMDGPU-DISABLED2-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
+; AMDGPU-DISABLED2-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
 ;
-; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__2
-; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
-; NVPTX-DISABLED1-NEXT:  entry:
+; NVPTX-DISABLED1-LABEL: define internal void @__omp_outlined__2(
+; NVPTX-DISABLED1-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
+; NVPTX-DISABLED1-NEXT:  [[ENTRY:.*]]:
 ; NVPTX-DISABLED1-NEXT:    [[X_H2S:%.*]] = alloca i8, i64 4, align 4
 ; NVPTX-DISABLED1-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
 ; NVPTX-DISABLED1-NEXT:    [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
 ; NVPTX-DISABLED1-NEXT:    call void @use(ptr captures(none) [[X_H2S]]) #[[ATTR7]]
-; NVPTX-DISABLED1-NEXT:    br label [[FOR_COND:%.*]]
-; NVPTX-DISABLED1:       for.cond:
-; NVPTX-DISABLED1-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
+; NVPTX-DISABLED1-NEXT:    br label %[[FOR_COND:.*]]
+; NVPTX-DISABLED1:       [[FOR_COND]]:
+; NVPTX-DISABLED1-NEXT:    [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY:.*]] ]
 ; NVPTX-DISABLED1-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
-; NVPTX-DISABLED1-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
-; NVPTX-DISABLED1:       for.cond.cleanup:
+; NVPTX-DISABLED1-NEXT:    br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP:.*]]
+; NVPTX-DISABLED1:       [[FOR_COND_CLEANUP]]:
 ; NVPTX-DISABLED1-NEXT:    call void @spmd_amenable() #[[ATTR7]]
 ; NVPTX-DISABLED1-NEXT:    ret void
-; NVPTX-DISABLED1:       for.body:
+; NVPTX-DISABLED1:       [[FOR_BODY]]:
 ; NVPTX-DISABLED1-NEXT:    [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
-; NVPTX-DISABLED1-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]]
+; NVPTX-DISABLED1-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[INT_TBAA12]]
 ; NVPTX-DISABLED1-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
 ; NVPTX-DISABLED1-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
-; NVPTX-DISABLED1-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
+; NVPTX-DISABLED1-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
 ;
-; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__2
-; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
-; NVPTX-DISABLED2-NEXT:  entry:
+; NVPTX-DISABLED2-LABEL: define internal void @__omp_outlined__2(
+; NVPTX-DISABLED2-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
+; NVPTX-DISABLED2-NEXT:  [[ENTRY:.*]]:
 ; NVPTX-DISABLED2-NEXT:    [[X_H2S:%.*]] = alloca i8, i64 4, align 4
 ; NVPTX-DISABLED2-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
 ; NVPTX-DISABLED2-NEXT:    [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
 ; NVPTX-DISABLED2-NEXT:    call void @use(ptr captures(none) [[X_H2S]]) #[[ATTR7]]
-; NVPTX-DISABLED2-NEXT:    br label [[FOR_COND:%.*]]
-; NVPTX-DISABLED2:       for.cond:
-; NVPTX-DISABLED2-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
+; NVPTX-DISABLED2-NEXT:    br label %[[FOR_COND:.*]]
+; NVPTX-DISABLED2:       [[FOR_COND]]:
+; NVPTX-DISABLED2-NEXT:    [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY:.*]] ]
 ; NVPTX-DISABLED2-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
-; NVPTX-DISABLED2-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
-; NVPTX-DISABLED2:       for.cond.cleanup:
+; NVPTX-DISABLED2-NEXT:    br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP:.*]]
+; NVPTX-DISABLED2:       [[FOR_COND_CLEANUP]]:
 ; NVPTX-DISABLED2-NEXT:    call void @spmd_amenable() #[[ATTR7]]
 ; NVPTX-DISABLED2-NEXT:    ret void
-; NVPTX-DISABLED2:       for.body:
+; NVPTX-DISABLED2:       [[FOR_BODY]]:
 ; NVPTX-DISABLED2-NEXT:    [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
-; NVPTX-DISABLED2-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]]
+; NVPTX-DISABLED2-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[INT_TBAA12]]
 ; NVPTX-DISABLED2-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
 ; NVPTX-DISABLED2-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
-; NVPTX-DISABLED2-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
+; NVPTX-DISABLED2-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
 ;
 entry:
   %captured_vars_addrs = alloca ptr, align 8, addrspace(5)
@@ -1066,39 +1066,39 @@ for.body:                                         ; preds = %for.cond
 }
 
 define internal void @__omp_outlined__3(ptr noalias %.global_tid., ptr noalias %.bound_tid.) {
-; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__3
-; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
-; AMDGPU-NEXT:  entry:
+; AMDGPU-LABEL: define internal void @__omp_outlined__3(
+; AMDGPU-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
+; AMDGPU-NEXT:  [[ENTRY:.*:]]
 ; AMDGPU-NEXT:    call void @unknown() #[[ATTR8]]
 ; AMDGPU-NEXT:    ret void
 ;
-; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__3
-; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
-; NVPTX-NEXT:  entry:
+; NVPTX-LABEL: define internal void @__omp_outlined__3(
+; NVPTX-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
+; NVPTX-NEXT:  [[ENTRY:.*:]]
 ; NVPTX-NEXT:    call void @unknown() #[[ATTR8]]
 ; NVPTX-NEXT:    ret void
 ;
-; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__3
-; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
-; AMDGPU-DISABLED1-NEXT:  entry:
+; AMDGPU-DISABLED1-LABEL: define internal void @__omp_outlined__3(
+; AMDGPU-DISABLED1-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
+; AMDGPU-DISABLED1-NEXT:  [[ENTRY:.*:]]
 ; AMDGPU-DISABLED1-NEXT:    call void @unknown() #[[ATTR8]]
 ; AMDGPU-DISABLED1-NEXT:    ret void
 ;
-; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__3
-; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
-; AMDGPU-DISABLED2-NEXT:  entry:
+; AMDGPU-DISABLED2-LABEL: define internal void @__omp_outlined__3(
+; AMDGPU-DISABLED2-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
+; AMDGPU-DISABLED2-NEXT:  [[ENTRY:.*:]]
 ; AMDGPU-DISABLED2-NEXT:    call void @unknown() #[[ATTR8]]
 ; AMDGPU-DISABLED2-NEXT:    ret void
 ;
-; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__3
-; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
-; NVPTX-DISABLED1-NEXT:  entry:
+; NVPTX-DISABLED1-LABEL: define internal void @__omp_outlined__3(
+; NVPTX-DISABLED1-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
+; NVPTX-DISABLED1-NEXT:  [[ENTRY:.*:]]
 ; NVPTX-DISABLED1-NEXT:    call void @unknown() #[[ATTR8]]
 ; NVPTX-DISABLED1-NEXT:    ret void
 ;
-; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__3
-; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
-; NVPTX-DISABLED2-NEXT:  entry:
+; NVPTX-DISABLED2-LABEL: define internal void @__omp_outlined__3(
+; NVPTX-DISABLED2-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
+; NVPTX-DISABLED2-NEXT:  [[ENTRY:.*:]]
 ; NVPTX-DISABLED2-NEXT:    call void @unknown() #[[ATTR8]]
 ; NVPTX-DISABLED2-NEXT:    ret void
 ;
@@ -1109,9 +1109,9 @@ entry:
 
 ; Function Attrs: convergent norecurse nounwind
 define internal void @__omp_outlined__3_wrapper(i16 zeroext %0, i32 %1) #1 {
-; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
-; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
-; AMDGPU-NEXT:  entry:
+; AMDGPU-LABEL: define internal void @__omp_outlined__3_wrapper(
+; AMDGPU-SAME: i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
+; AMDGPU-NEXT:  [[ENTRY:.*:]]
 ; AMDGPU-NEXT:    [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-NEXT:    [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
 ; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
@@ -1122,9 +1122,9 @@ define internal void @__omp_outlined__3_wrapper(i16 zeroext %0, i32 %1) #1 {
 ; AMDGPU-NEXT:    call void @__omp_outlined__3(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
 ; AMDGPU-NEXT:    ret void
 ;
-; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
-; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
-; NVPTX-NEXT:  entry:
+; NVPTX-LABEL: define internal void @__omp_outlined__3_wrapper(
+; NVPTX-SAME: i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
+; NVPTX-NEXT:  [[ENTRY:.*:]]
 ; NVPTX-NEXT:    [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
 ; NVPTX-NEXT:    [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
 ; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
@@ -1135,9 +1135,9 @@ define internal void @__omp_outlined__3_wrapper(i16 zeroext %0, i32 %1) #1 {
 ; NVPTX-NEXT:    call void @__omp_outlined__3(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
 ; NVPTX-NEXT:    ret void
 ;
-; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
-; AMDGPU-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
-; AMDGPU-DISABLED1-NEXT:  entry:
+; AMDGPU-DISABLED1-LABEL: define internal void @__omp_outlined__3_wrapper(
+; AMDGPU-DISABLED1-SAME: i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
+; AMDGPU-DISABLED1-NEXT:  [[ENTRY:.*:]]
 ; AMDGPU-DISABLED1-NEXT:    [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-DISABLED1-NEXT:    [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
 ; AMDGPU-DISABLED1-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
@@ -1148,9 +1148,9 @@ define internal void @__omp_outlined__3_wrapper(i16 zeroext %0, i32 %1) #1 {
 ; AMDGPU-DISABLED1-NEXT:    call void @__omp_outlined__3(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
 ; AMDGPU-DISABLED1-NEXT:    ret void
 ;
-; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
-; AMDGPU-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
-; AMDGPU-DISABLED2-NEXT:  entry:
+; AMDGPU-DISABLED2-LABEL: define internal void @__omp_outlined__3_wrapper(
+; AMDGPU-DISABLED2-SAME: i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
+; AMDGPU-DISABLED2-NEXT:  [[ENTRY:.*:]]
 ; AMDGPU-DISABLED2-NEXT:    [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-DISABLED2-NEXT:    [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
 ; AMDGPU-DISABLED2-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
@@ -1161,9 +1161,9 @@ define internal void @__omp_outlined__3_wrapper(i16 zeroext %0, i32 %1) #1 {
 ; AMDGPU-DISABLED2-NEXT:    call void @__omp_outlined__3(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
 ; AMDGPU-DISABLED2-NEXT:    ret void
 ;
-; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
-; NVPTX-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
-; NVPTX-DISABLED1-NEXT:  entry:
+; NVPTX-DISABLED1-LABEL: define internal void @__omp_outlined__3_wrapper(
+; NVPTX-DISABLED1-SAME: i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
+; NVPTX-DISABLED1-NEXT:  [[ENTRY:.*:]]
 ; NVPTX-DISABLED1-NEXT:    [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
 ; NVPTX-DISABLED1-NEXT:    [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
 ; NVPTX-DISABLED1-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
@@ -1174,9 +1174,9 @@ define internal void @__omp_outlined__3_wrapper(i16 zeroext %0, i32 %1) #1 {
 ; NVPTX-DISABLED1-NEXT:    call void @__omp_outlined__3(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
 ; NVPTX-DISABLED1-NEXT:    ret void
 ;
-; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
-; NVPTX-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
-; NVPTX-DISABLED2-NEXT:  entry:
+; NVPTX-DISABLED2-LABEL: define internal void @__omp_outlined__3_wrapper(
+; NVPTX-DISABLED2-SAME: i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
+; NVPTX-DISABLED2-NEXT:  [[ENTRY:.*:]]
 ; NVPTX-DISABLED2-NEXT:    [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
 ; NVPTX-DISABLED2-NEXT:    [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
 ; NVPTX-DISABLED2-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
@@ -1203,47 +1203,47 @@ entry:
 
 ; Function Attrs: alwaysinline convergent norecurse nounwind
 define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35() #0 {
-; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35
-; AMDGPU-SAME: () #[[ATTR0]] {
-; AMDGPU-NEXT:  entry:
+; AMDGPU-LABEL: define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35(
+; AMDGPU-SAME: ) #[[ATTR0]] {
+; AMDGPU-NEXT:  [[ENTRY:.*:]]
 ; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-NEXT:    [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
 ; AMDGPU-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-NEXT:    [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
 ; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment, ptr null)
 ; AMDGPU-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; AMDGPU-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
-; AMDGPU:       common.ret:
+; AMDGPU-NEXT:    br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]]
+; AMDGPU:       [[COMMON_RET]]:
 ; AMDGPU-NEXT:    ret void
-; AMDGPU:       user_code.entry:
+; AMDGPU:       [[USER_CODE_ENTRY]]:
 ; AMDGPU-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; AMDGPU-NEXT:    store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-NEXT:    store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[INT_TBAA12]]
 ; AMDGPU-NEXT:    call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
 ; AMDGPU-NEXT:    call void @__kmpc_target_deinit()
-; AMDGPU-NEXT:    br label [[COMMON_RET]]
+; AMDGPU-NEXT:    br label %[[COMMON_RET]]
 ;
-; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35
-; NVPTX-SAME: () #[[ATTR0]] {
-; NVPTX-NEXT:  entry:
+; NVPTX-LABEL: define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35(
+; NVPTX-SAME: ) #[[ATTR0]] {
+; NVPTX-NEXT:  [[ENTRY:.*:]]
 ; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
 ; NVPTX-NEXT:    [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
 ; NVPTX-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
 ; NVPTX-NEXT:    [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
 ; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment, ptr null)
 ; NVPTX-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; NVPTX-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
-; NVPTX:       common.ret:
+; NVPTX-NEXT:    br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]]
+; NVPTX:       [[COMMON_RET]]:
 ; NVPTX-NEXT:    ret void
-; NVPTX:       user_code.entry:
+; NVPTX:       [[USER_CODE_ENTRY]]:
 ; NVPTX-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; NVPTX-NEXT:    store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
+; NVPTX-NEXT:    store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[INT_TBAA12]]
 ; NVPTX-NEXT:    call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
 ; NVPTX-NEXT:    call void @__kmpc_target_deinit()
-; NVPTX-NEXT:    br label [[COMMON_RET]]
+; NVPTX-NEXT:    br label %[[COMMON_RET]]
 ;
-; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35
-; AMDGPU-DISABLED1-SAME: () #[[ATTR0]] {
-; AMDGPU-DISABLED1-NEXT:  entry:
+; AMDGPU-DISABLED1-LABEL: define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35(
+; AMDGPU-DISABLED1-SAME: ) #[[ATTR0]] {
+; AMDGPU-DISABLED1-NEXT:  [[ENTRY:.*:]]
 ; AMDGPU-DISABLED1-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-DISABLED1-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-DISABLED1-NEXT:    [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
@@ -1251,71 +1251,71 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_s
 ; AMDGPU-DISABLED1-NEXT:    [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
 ; AMDGPU-DISABLED1-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment, ptr null)
 ; AMDGPU-DISABLED1-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
-; AMDGPU-DISABLED1-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
-; AMDGPU-DISABLED1:       is_worker_check:
+; AMDGPU-DISABLED1-NEXT:    br i1 [[THREAD_IS_WORKER]], label %[[IS_WORKER_CHECK:.*]], label %[[THREAD_USER_CODE_CHECK:.*]]
+; AMDGPU-DISABLED1:       [[IS_WORKER_CHECK]]:
 ; AMDGPU-DISABLED1-NEXT:    [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
 ; AMDGPU-DISABLED1-NEXT:    [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
 ; AMDGPU-DISABLED1-NEXT:    [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
 ; AMDGPU-DISABLED1-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
-; AMDGPU-DISABLED1-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
-; AMDGPU-DISABLED1:       worker_state_machine.begin:
+; AMDGPU-DISABLED1-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label %[[WORKER_STATE_MACHINE_BEGIN:.*]], label %[[WORKER_STATE_MACHINE_FINISHED:.*]]
+; AMDGPU-DISABLED1:       [[WORKER_STATE_MACHINE_BEGIN]]:
 ; AMDGPU-DISABLED1-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
 ; AMDGPU-DISABLED1-NEXT:    [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
 ; AMDGPU-DISABLED1-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
 ; AMDGPU-DISABLED1-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
 ; AMDGPU-DISABLED1-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
-; AMDGPU-DISABLED1-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
-; AMDGPU-DISABLED1:       worker_state_machine.finished:
+; AMDGPU-DISABLED1-NEXT:    br i1 [[WORKER_IS_DONE]], label %[[WORKER_STATE_MACHINE_FINISHED]], label %[[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:.*]]
+; AMDGPU-DISABLED1:       [[WORKER_STATE_MACHINE_FINISHED]]:
 ; AMDGPU-DISABLED1-NEXT:    ret void
-; AMDGPU-DISABLED1:       worker_state_machine.is_active.check:
-; AMDGPU-DISABLED1-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
-; AMDGPU-DISABLED1:       worker_state_machine.parallel_region.check:
-; AMDGPU-DISABLED1-NEXT:    br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
-; AMDGPU-DISABLED1:       worker_state_machine.parallel_region.execute:
+; AMDGPU-DISABLED1:       [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK]]:
+; AMDGPU-DISABLED1-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:.*]], label %[[WORKER_STATE_MACHINE_DONE_BARRIER:.*]]
+; AMDGPU-DISABLED1:       [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK]]:
+; AMDGPU-DISABLED1-NEXT:    br i1 true, label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:.*]], label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:.*]]
+; AMDGPU-DISABLED1:       [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE]]:
 ; AMDGPU-DISABLED1-NEXT:    call void @__omp_outlined__5_wrapper(i16 0, i32 [[TMP0]])
-; AMDGPU-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
-; AMDGPU-DISABLED1:       worker_state_machine.parallel_region.check1:
-; AMDGPU-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
-; AMDGPU-DISABLED1:       worker_state_machine.parallel_region.end:
+; AMDGPU-DISABLED1-NEXT:    br label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_END:.*]]
+; AMDGPU-DISABLED1:       [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1]]:
+; AMDGPU-DISABLED1-NEXT:    br label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
+; AMDGPU-DISABLED1:       [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]:
 ; AMDGPU-DISABLED1-NEXT:    call void @__kmpc_kernel_end_parallel()
-; AMDGPU-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
-; AMDGPU-DISABLED1:       worker_state_machine.done.barrier:
+; AMDGPU-DISABLED1-NEXT:    br label %[[WORKER_STATE_MACHINE_DONE_BARRIER]]
+; AMDGPU-DISABLED1:       [[WORKER_STATE_MACHINE_DONE_BARRIER]]:
 ; AMDGPU-DISABLED1-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; AMDGPU-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
-; AMDGPU-DISABLED1:       thread.user_code.check:
+; AMDGPU-DISABLED1-NEXT:    br label %[[WORKER_STATE_MACHINE_BEGIN]]
+; AMDGPU-DISABLED1:       [[THREAD_USER_CODE_CHECK]]:
 ; AMDGPU-DISABLED1-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; AMDGPU-DISABLED1-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
-; AMDGPU-DISABLED1:       common.ret:
+; AMDGPU-DISABLED1-NEXT:    br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]]
+; AMDGPU-DISABLED1:       [[COMMON_RET]]:
 ; AMDGPU-DISABLED1-NEXT:    ret void
-; AMDGPU-DISABLED1:       user_code.entry:
+; AMDGPU-DISABLED1:       [[USER_CODE_ENTRY]]:
 ; AMDGPU-DISABLED1-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; AMDGPU-DISABLED1-NEXT:    store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-DISABLED1-NEXT:    store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[INT_TBAA12]]
 ; AMDGPU-DISABLED1-NEXT:    call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
 ; AMDGPU-DISABLED1-NEXT:    call void @__kmpc_target_deinit()
-; AMDGPU-DISABLED1-NEXT:    br label [[COMMON_RET]]
+; AMDGPU-DISABLED1-NEXT:    br label %[[COMMON_RET]]
 ;
-; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35
-; AMDGPU-DISABLED2-SAME: () #[[ATTR0]] {
-; AMDGPU-DISABLED2-NEXT:  entry:
+; AMDGPU-DISABLED2-LABEL: define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35(
+; AMDGPU-DISABLED2-SAME: ) #[[ATTR0]] {
+; AMDGPU-DISABLED2-NEXT:  [[ENTRY:.*:]]
 ; AMDGPU-DISABLED2-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-DISABLED2-NEXT:    [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
 ; AMDGPU-DISABLED2-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-DISABLED2-NEXT:    [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
 ; AMDGPU-DISABLED2-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment, ptr null)
 ; AMDGPU-DISABLED2-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; AMDGPU-DISABLED2-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
-; AMDGPU-DISABLED2:       common.ret:
+; AMDGPU-DISABLED2-NEXT:    br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]]
+; AMDGPU-DISABLED2:       [[COMMON_RET]]:
 ; AMDGPU-DISABLED2-NEXT:    ret void
-; AMDGPU-DISABLED2:       user_code.entry:
+; AMDGPU-DISABLED2:       [[USER_CODE_ENTRY]]:
 ; AMDGPU-DISABLED2-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; AMDGPU-DISABLED2-NEXT:    store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-DISABLED2-NEXT:    store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[INT_TBAA12]]
 ; AMDGPU-DISABLED2-NEXT:    call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
 ; AMDGPU-DISABLED2-NEXT:    call void @__kmpc_target_deinit()
-; AMDGPU-DISABLED2-NEXT:    br label [[COMMON_RET]]
+; AMDGPU-DISABLED2-NEXT:    br label %[[COMMON_RET]]
 ;
-; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35
-; NVPTX-DISABLED1-SAME: () #[[ATTR0]] {
-; NVPTX-DISABLED1-NEXT:  entry:
+; NVPTX-DISABLED1-LABEL: define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35(
+; NVPTX-DISABLED1-SAME: ) #[[ATTR0]] {
+; NVPTX-DISABLED1-NEXT:  [[ENTRY:.*:]]
 ; NVPTX-DISABLED1-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
 ; NVPTX-DISABLED1-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
 ; NVPTX-DISABLED1-NEXT:    [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
@@ -1323,66 +1323,66 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_s
 ; NVPTX-DISABLED1-NEXT:    [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
 ; NVPTX-DISABLED1-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment, ptr null)
 ; NVPTX-DISABLED1-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
-; NVPTX-DISABLED1-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
-; NVPTX-DISABLED1:       is_worker_check:
+; NVPTX-DISABLED1-NEXT:    br i1 [[THREAD_IS_WORKER]], label %[[IS_WORKER_CHECK:.*]], label %[[THREAD_USER_CODE_CHECK:.*]]
+; NVPTX-DISABLED1:       [[IS_WORKER_CHECK]]:
 ; NVPTX-DISABLED1-NEXT:    [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
 ; NVPTX-DISABLED1-NEXT:    [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
 ; NVPTX-DISABLED1-NEXT:    [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
 ; NVPTX-DISABLED1-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
-; NVPTX-DISABLED1-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
-; NVPTX-DISABLED1:       worker_state_machine.begin:
+; NVPTX-DISABLED1-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label %[[WORKER_STATE_MACHINE_BEGIN:.*]], label %[[WORKER_STATE_MACHINE_FINISHED:.*]]
+; NVPTX-DISABLED1:       [[WORKER_STATE_MACHINE_BEGIN]]:
 ; NVPTX-DISABLED1-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
 ; NVPTX-DISABLED1-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
 ; NVPTX-DISABLED1-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
 ; NVPTX-DISABLED1-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
-; NVPTX-DISABLED1-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
-; NVPTX-DISABLED1:       worker_state_machine.finished:
+; NVPTX-DISABLED1-NEXT:    br i1 [[WORKER_IS_DONE]], label %[[WORKER_STATE_MACHINE_FINISHED]], label %[[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:.*]]
+; NVPTX-DISABLED1:       [[WORKER_STATE_MACHINE_FINISHED]]:
 ; NVPTX-DISABLED1-NEXT:    ret void
-; NVPTX-DISABLED1:       worker_state_machine.is_active.check:
-; NVPTX-DISABLED1-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
-; NVPTX-DISABLED1:       worker_state_machine.parallel_region.check:
-; NVPTX-DISABLED1-NEXT:    br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
-; NVPTX-DISABLED1:       worker_state_machine.parallel_region.execute:
+; NVPTX-DISABLED1:       [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK]]:
+; NVPTX-DISABLED1-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:.*]], label %[[WORKER_STATE_MACHINE_DONE_BARRIER:.*]]
+; NVPTX-DISABLED1:       [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK]]:
+; NVPTX-DISABLED1-NEXT:    br i1 true, label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:.*]], label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:.*]]
+; NVPTX-DISABLED1:       [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE]]:
 ; NVPTX-DISABLED1-NEXT:    call void @__omp_outlined__5_wrapper(i16 0, i32 [[TMP0]])
-; NVPTX-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
-; NVPTX-DISABLED1:       worker_state_machine.parallel_region.check1:
-; NVPTX-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
-; NVPTX-DISABLED1:       worker_state_machine.parallel_region.end:
+; NVPTX-DISABLED1-NEXT:    br label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_END:.*]]
+; NVPTX-DISABLED1:       [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1]]:
+; NVPTX-DISABLED1-NEXT:    br label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
+; NVPTX-DISABLED1:       [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]:
 ; NVPTX-DISABLED1-NEXT:    call void @__kmpc_kernel_end_parallel()
-; NVPTX-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
-; NVPTX-DISABLED1:       worker_state_machine.done.barrier:
+; NVPTX-DISABLED1-NEXT:    br label %[[WORKER_STATE_MACHINE_DONE_BARRIER]]
+; NVPTX-DISABLED1:       [[WORKER_STATE_MACHINE_DONE_BARRIER]]:
 ; NVPTX-DISABLED1-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; NVPTX-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
-; NVPTX-DISABLED1:       thread.user_code.check:
+; NVPTX-DISABLED1-NEXT:    br label %[[WORKER_STATE_MACHINE_BEGIN]]
+; NVPTX-DISABLED1:       [[THREAD_USER_CODE_CHECK]]:
 ; NVPTX-DISABLED1-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; NVPTX-DISABLED1-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
-; NVPTX-DISABLED1:       common.ret:
+; NVPTX-DISABLED1-NEXT:    br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]]
+; NVPTX-DISABLED1:       [[COMMON_RET]]:
 ; NVPTX-DISABLED1-NEXT:    ret void
-; NVPTX-DISABLED1:       user_code.entry:
+; NVPTX-DISABLED1:       [[USER_CODE_ENTRY]]:
 ; NVPTX-DISABLED1-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; NVPTX-DISABLED1-NEXT:    store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
+; NVPTX-DISABLED1-NEXT:    store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[INT_TBAA12]]
 ; NVPTX-DISABLED1-NEXT:    call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
 ; NVPTX-DISABLED1-NEXT:    call void @__kmpc_target_deinit()
-; NVPTX-DISABLED1-NEXT:    br label [[COMMON_RET]]
+; NVPTX-DISABLED1-NEXT:    br label %[[COMMON_RET]]
 ;
-; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35
-; NVPTX-DISABLED2-SAME: () #[[ATTR0]] {
-; NVPTX-DISABLED2-NEXT:  entry:
+; NVPTX-DISABLED2-LABEL: define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35(
+; NVPTX-DISABLED2-SAME: ) #[[ATTR0]] {
+; NVPTX-DISABLED2-NEXT:  [[ENTRY:.*:]]
 ; NVPTX-DISABLED2-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
 ; NVPTX-DISABLED2-NEXT:    [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
 ; NVPTX-DISABLED2-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
 ; NVPTX-DISABLED2-NEXT:    [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
 ; NVPTX-DISABLED2-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment, ptr null)
 ; NVPTX-DISABLED2-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; NVPTX-DISABLED2-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
-; NVPTX-DISABLED2:       common.ret:
+; NVPTX-DISABLED2-NEXT:    br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]]
+; NVPTX-DISABLED2:       [[COMMON_RET]]:
 ; NVPTX-DISABLED2-NEXT:    ret void
-; NVPTX-DISABLED2:       user_code.entry:
+; NVPTX-DISABLED2:       [[USER_CODE_ENTRY]]:
 ; NVPTX-DISABLED2-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; NVPTX-DISABLED2-NEXT:    store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
+; NVPTX-DISABLED2-NEXT:    store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[INT_TBAA12]]
 ; NVPTX-DISABLED2-NEXT:    call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
 ; NVPTX-DISABLED2-NEXT:    call void @__kmpc_target_deinit()
-; NVPTX-DISABLED2-NEXT:    br label [[COMMON_RET]]
+; NVPTX-DISABLED2-NEXT:    br label %[[COMMON_RET]]
 ;
 entry:
   %.zero.addr = alloca ptr, align 8, addrspace(5)
@@ -1406,131 +1406,131 @@ user_code.entry:                                  ; preds = %entry
 }
 
 define internal void @__omp_outlined__4(ptr noalias %.global_tid., ptr noalias %.bound_tid.) {
-; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__4
-; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
-; AMDGPU-NEXT:  entry:
+; AMDGPU-LABEL: define internal void @__omp_outlined__4(
+; AMDGPU-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
+; AMDGPU-NEXT:  [[ENTRY:.*]]:
 ; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
-; AMDGPU-NEXT:    br label [[FOR_COND:%.*]]
-; AMDGPU:       for.cond:
-; AMDGPU-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
+; AMDGPU-NEXT:    br label %[[FOR_COND:.*]]
+; AMDGPU:       [[FOR_COND]]:
+; AMDGPU-NEXT:    [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY:.*]] ]
 ; AMDGPU-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
-; AMDGPU-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
-; AMDGPU:       for.cond.cleanup:
+; AMDGPU-NEXT:    br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP:.*]]
+; AMDGPU:       [[FOR_COND_CLEANUP]]:
 ; AMDGPU-NEXT:    call void @spmd_amenable() #[[ATTR7]]
 ; AMDGPU-NEXT:    ret void
-; AMDGPU:       for.body:
-; AMDGPU-NEXT:    store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr addrspace(5) [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]]
+; AMDGPU:       [[FOR_BODY]]:
+; AMDGPU-NEXT:    store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr addrspace(5) [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[ANYPTR_TBAA20:![0-9]+]]
 ; AMDGPU-NEXT:    [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
-; AMDGPU-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[INT_TBAA12]]
 ; AMDGPU-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 1)
 ; AMDGPU-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
-; AMDGPU-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
+; AMDGPU-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
 ;
-; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__4
-; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
-; NVPTX-NEXT:  entry:
+; NVPTX-LABEL: define internal void @__omp_outlined__4(
+; NVPTX-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
+; NVPTX-NEXT:  [[ENTRY:.*]]:
 ; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
 ; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
-; NVPTX-NEXT:    br label [[FOR_COND:%.*]]
-; NVPTX:       for.cond:
-; NVPTX-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
+; NVPTX-NEXT:    br label %[[FOR_COND:.*]]
+; NVPTX:       [[FOR_COND]]:
+; NVPTX-NEXT:    [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY:.*]] ]
 ; NVPTX-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
-; NVPTX-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
-; NVPTX:       for.cond.cleanup:
+; NVPTX-NEXT:    br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP:.*]]
+; NVPTX:       [[FOR_COND_CLEANUP]]:
 ; NVPTX-NEXT:    call void @spmd_amenable() #[[ATTR7]]
 ; NVPTX-NEXT:    ret void
-; NVPTX:       for.body:
-; NVPTX-NEXT:    store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr addrspace(5) [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]]
+; NVPTX:       [[FOR_BODY]]:
+; NVPTX-NEXT:    store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr addrspace(5) [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[ANYPTR_TBAA20:![0-9]+]]
 ; NVPTX-NEXT:    [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
-; NVPTX-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]]
+; NVPTX-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[INT_TBAA12]]
 ; NVPTX-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 1)
 ; NVPTX-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
-; NVPTX-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
+; NVPTX-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
 ;
-; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__4
-; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
-; AMDGPU-DISABLED1-NEXT:  entry:
+; AMDGPU-DISABLED1-LABEL: define internal void @__omp_outlined__4(
+; AMDGPU-DISABLED1-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
+; AMDGPU-DISABLED1-NEXT:  [[ENTRY:.*]]:
 ; AMDGPU-DISABLED1-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-DISABLED1-NEXT:    [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
-; AMDGPU-DISABLED1-NEXT:    br label [[FOR_COND:%.*]]
-; AMDGPU-DISABLED1:       for.cond:
-; AMDGPU-DISABLED1-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
+; AMDGPU-DISABLED1-NEXT:    br label %[[FOR_COND:.*]]
+; AMDGPU-DISABLED1:       [[FOR_COND]]:
+; AMDGPU-DISABLED1-NEXT:    [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY:.*]] ]
 ; AMDGPU-DISABLED1-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
-; AMDGPU-DISABLED1-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
-; AMDGPU-DISABLED1:       for.cond.cleanup:
+; AMDGPU-DISABLED1-NEXT:    br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP:.*]]
+; AMDGPU-DISABLED1:       [[FOR_COND_CLEANUP]]:
 ; AMDGPU-DISABLED1-NEXT:    call void @spmd_amenable() #[[ATTR7]]
 ; AMDGPU-DISABLED1-NEXT:    ret void
-; AMDGPU-DISABLED1:       for.body:
-; AMDGPU-DISABLED1-NEXT:    store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr addrspace(5) [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]]
+; AMDGPU-DISABLED1:       [[FOR_BODY]]:
+; AMDGPU-DISABLED1-NEXT:    store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr addrspace(5) [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[ANYPTR_TBAA20:![0-9]+]]
 ; AMDGPU-DISABLED1-NEXT:    [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
-; AMDGPU-DISABLED1-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-DISABLED1-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[INT_TBAA12]]
 ; AMDGPU-DISABLED1-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 1)
 ; AMDGPU-DISABLED1-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
-; AMDGPU-DISABLED1-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
+; AMDGPU-DISABLED1-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
 ;
-; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__4
-; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
-; AMDGPU-DISABLED2-NEXT:  entry:
+; AMDGPU-DISABLED2-LABEL: define internal void @__omp_outlined__4(
+; AMDGPU-DISABLED2-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
+; AMDGPU-DISABLED2-NEXT:  [[ENTRY:.*]]:
 ; AMDGPU-DISABLED2-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-DISABLED2-NEXT:    [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
-; AMDGPU-DISABLED2-NEXT:    br label [[FOR_COND:%.*]]
-; AMDGPU-DISABLED2:       for.cond:
-; AMDGPU-DISABLED2-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
+; AMDGPU-DISABLED2-NEXT:    br label %[[FOR_COND:.*]]
+; AMDGPU-DISABLED2:       [[FOR_COND]]:
+; AMDGPU-DISABLED2-NEXT:    [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY:.*]] ]
 ; AMDGPU-DISABLED2-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
-; AMDGPU-DISABLED2-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
-; AMDGPU-DISABLED2:       for.cond.cleanup:
+; AMDGPU-DISABLED2-NEXT:    br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP:.*]]
+; AMDGPU-DISABLED2:       [[FOR_COND_CLEANUP]]:
 ; AMDGPU-DISABLED2-NEXT:    call void @spmd_amenable() #[[ATTR7]]
 ; AMDGPU-DISABLED2-NEXT:    ret void
-; AMDGPU-DISABLED2:       for.body:
-; AMDGPU-DISABLED2-NEXT:    store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr addrspace(5) [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]]
+; AMDGPU-DISABLED2:       [[FOR_BODY]]:
+; AMDGPU-DISABLED2-NEXT:    store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr addrspace(5) [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[ANYPTR_TBAA20:![0-9]+]]
 ; AMDGPU-DISABLED2-NEXT:    [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
-; AMDGPU-DISABLED2-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-DISABLED2-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[INT_TBAA12]]
 ; AMDGPU-DISABLED2-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 1)
 ; AMDGPU-DISABLED2-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
-; AMDGPU-DISABLED2-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
+; AMDGPU-DISABLED2-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
 ;
-; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__4
-; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
-; NVPTX-DISABLED1-NEXT:  entry:
+; NVPTX-DISABLED1-LABEL: define internal void @__omp_outlined__4(
+; NVPTX-DISABLED1-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
+; NVPTX-DISABLED1-NEXT:  [[ENTRY:.*]]:
 ; NVPTX-DISABLED1-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
 ; NVPTX-DISABLED1-NEXT:    [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
-; NVPTX-DISABLED1-NEXT:    br label [[FOR_COND:%.*]]
-; NVPTX-DISABLED1:       for.cond:
-; NVPTX-DISABLED1-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
+; NVPTX-DISABLED1-NEXT:    br label %[[FOR_COND:.*]]
+; NVPTX-DISABLED1:       [[FOR_COND]]:
+; NVPTX-DISABLED1-NEXT:    [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY:.*]] ]
 ; NVPTX-DISABLED1-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
-; NVPTX-DISABLED1-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
-; NVPTX-DISABLED1:       for.cond.cleanup:
+; NVPTX-DISABLED1-NEXT:    br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP:.*]]
+; NVPTX-DISABLED1:       [[FOR_COND_CLEANUP]]:
 ; NVPTX-DISABLED1-NEXT:    call void @spmd_amenable() #[[ATTR7]]
 ; NVPTX-DISABLED1-NEXT:    ret void
-; NVPTX-DISABLED1:       for.body:
-; NVPTX-DISABLED1-NEXT:    store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr addrspace(5) [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]]
+; NVPTX-DISABLED1:       [[FOR_BODY]]:
+; NVPTX-DISABLED1-NEXT:    store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr addrspace(5) [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[ANYPTR_TBAA20:![0-9]+]]
 ; NVPTX-DISABLED1-NEXT:    [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
-; NVPTX-DISABLED1-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]]
+; NVPTX-DISABLED1-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[INT_TBAA12]]
 ; NVPTX-DISABLED1-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 1)
 ; NVPTX-DISABLED1-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
-; NVPTX-DISABLED1-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
+; NVPTX-DISABLED1-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
 ;
-; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__4
-; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
-; NVPTX-DISABLED2-NEXT:  entry:
+; NVPTX-DISABLED2-LABEL: define internal void @__omp_outlined__4(
+; NVPTX-DISABLED2-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
+; NVPTX-DISABLED2-NEXT:  [[ENTRY:.*]]:
 ; NVPTX-DISABLED2-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
 ; NVPTX-DISABLED2-NEXT:    [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
-; NVPTX-DISABLED2-NEXT:    br label [[FOR_COND:%.*]]
-; NVPTX-DISABLED2:       for.cond:
-; NVPTX-DISABLED2-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
+; NVPTX-DISABLED2-NEXT:    br label %[[FOR_COND:.*]]
+; NVPTX-DISABLED2:       [[FOR_COND]]:
+; NVPTX-DISABLED2-NEXT:    [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY:.*]] ]
 ; NVPTX-DISABLED2-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
-; NVPTX-DISABLED2-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
-; NVPTX-DISABLED2:       for.cond.cleanup:
+; NVPTX-DISABLED2-NEXT:    br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP:.*]]
+; NVPTX-DISABLED2:       [[FOR_COND_CLEANUP]]:
 ; NVPTX-DISABLED2-NEXT:    call void @spmd_amenable() #[[ATTR7]]
 ; NVPTX-DISABLED2-NEXT:    ret void
-; NVPTX-DISABLED2:       for.body:
-; NVPTX-DISABLED2-NEXT:    store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr addrspace(5) [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]]
+; NVPTX-DISABLED2:       [[FOR_BODY]]:
+; NVPTX-DISABLED2-NEXT:    store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr addrspace(5) [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[ANYPTR_TBAA20:![0-9]+]]
 ; NVPTX-DISABLED2-NEXT:    [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
-; NVPTX-DISABLED2-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]]
+; NVPTX-DISABLED2-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[INT_TBAA12]]
 ; NVPTX-DISABLED2-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 1)
 ; NVPTX-DISABLED2-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
-; NVPTX-DISABLED2-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
+; NVPTX-DISABLED2-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
 ;
 entry:
   %captured_vars_addrs = alloca ptr, align 8, addrspace(5)
@@ -1557,57 +1557,57 @@ for.body:                                         ; preds = %for.cond
 }
 
 define internal void @__omp_outlined__5(ptr noalias %.global_tid., ptr noalias %.bound_tid., ptr nonnull align 4 dereferenceable(4) %x) {
-; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__5
-; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
-; AMDGPU-NEXT:  entry:
-; AMDGPU-NEXT:    [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-LABEL: define internal void @__omp_outlined__5(
+; AMDGPU-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
+; AMDGPU-NEXT:  [[ENTRY:.*:]]
+; AMDGPU-NEXT:    [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[INT_TBAA12]]
 ; AMDGPU-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP0]], 1
-; AMDGPU-NEXT:    store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-NEXT:    store i32 [[INC]], ptr [[X]], align 4, !tbaa [[INT_TBAA12]]
 ; AMDGPU-NEXT:    call void @unknown() #[[ATTR8]]
 ; AMDGPU-NEXT:    ret void
 ;
-; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__5
-; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
-; NVPTX-NEXT:  entry:
-; NVPTX-NEXT:    [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]]
+; NVPTX-LABEL: define internal void @__omp_outlined__5(
+; NVPTX-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
+; NVPTX-NEXT:  [[ENTRY:.*:]]
+; NVPTX-NEXT:    [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[INT_TBAA12]]
 ; NVPTX-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP0]], 1
-; NVPTX-NEXT:    store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]]
+; NVPTX-NEXT:    store i32 [[INC]], ptr [[X]], align 4, !tbaa [[INT_TBAA12]]
 ; NVPTX-NEXT:    call void @unknown() #[[ATTR8]]
 ; NVPTX-NEXT:    ret void
 ;
-; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__5
-; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
-; AMDGPU-DISABLED1-NEXT:  entry:
-; AMDGPU-DISABLED1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-DISABLED1-LABEL: define internal void @__omp_outlined__5(
+; AMDGPU-DISABLED1-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
+; AMDGPU-DISABLED1-NEXT:  [[ENTRY:.*:]]
+; AMDGPU-DISABLED1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[INT_TBAA12]]
 ; AMDGPU-DISABLED1-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP0]], 1
-; AMDGPU-DISABLED1-NEXT:    store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-DISABLED1-NEXT:    store i32 [[INC]], ptr [[X]], align 4, !tbaa [[INT_TBAA12]]
 ; AMDGPU-DISABLED1-NEXT:    call void @unknown() #[[ATTR8]]
 ; AMDGPU-DISABLED1-NEXT:    ret void
 ;
-; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__5
-; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
-; AMDGPU-DISABLED2-NEXT:  entry:
-; AMDGPU-DISABLED2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-DISABLED2-LABEL: define internal void @__omp_outlined__5(
+; AMDGPU-DISABLED2-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
+; AMDGPU-DISABLED2-NEXT:  [[ENTRY:.*:]]
+; AMDGPU-DISABLED2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[INT_TBAA12]]
 ; AMDGPU-DISABLED2-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP0]], 1
-; AMDGPU-DISABLED2-NEXT:    store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-DISABLED2-NEXT:    store i32 [[INC]], ptr [[X]], align 4, !tbaa [[INT_TBAA12]]
 ; AMDGPU-DISABLED2-NEXT:    call void @unknown() #[[ATTR8]]
 ; AMDGPU-DISABLED2-NEXT:    ret void
 ;
-; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__5
-; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
-; NVPTX-DISABLED1-NEXT:  entry:
-; NVPTX-DISABLED1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]]
+; NVPTX-DISABLED1-LABEL: define internal void @__omp_outlined__5(
+; NVPTX-DISABLED1-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
+; NVPTX-DISABLED1-NEXT:  [[ENTRY:.*:]]
+; NVPTX-DISABLED1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[INT_TBAA12]]
 ; NVPTX-DISABLED1-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP0]], 1
-; NVPTX-DISABLED1-NEXT:    store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]]
+; NVPTX-DISABLED1-NEXT:    store i32 [[INC]], ptr [[X]], align 4, !tbaa [[INT_TBAA12]]
 ; NVPTX-DISABLED1-NEXT:    call void @unknown() #[[ATTR8]]
 ; NVPTX-DISABLED1-NEXT:    ret void
 ;
-; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__5
-; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
-; NVPTX-DISABLED2-NEXT:  entry:
-; NVPTX-DISABLED2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]]
+; NVPTX-DISABLED2-LABEL: define internal void @__omp_outlined__5(
+; NVPTX-DISABLED2-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
+; NVPTX-DISABLED2-NEXT:  [[ENTRY:.*:]]
+; NVPTX-DISABLED2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[INT_TBAA12]]
 ; NVPTX-DISABLED2-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP0]], 1
-; NVPTX-DISABLED2-NEXT:    store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]]
+; NVPTX-DISABLED2-NEXT:    store i32 [[INC]], ptr [[X]], align 4, !tbaa [[INT_TBAA12]]
 ; NVPTX-DISABLED2-NEXT:    call void @unknown() #[[ATTR8]]
 ; NVPTX-DISABLED2-NEXT:    ret void
 ;
@@ -1621,9 +1621,9 @@ entry:
 
 ; Function Attrs: convergent norecurse nounwind
 define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #1 {
-; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
-; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
-; AMDGPU-NEXT:  entry:
+; AMDGPU-LABEL: define internal void @__omp_outlined__5_wrapper(
+; AMDGPU-SAME: i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
+; AMDGPU-NEXT:  [[ENTRY:.*:]]
 ; AMDGPU-NEXT:    [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-NEXT:    [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
 ; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
@@ -1632,13 +1632,13 @@ define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #1 {
 ; AMDGPU-NEXT:    [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
 ; AMDGPU-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
 ; AMDGPU-NEXT:    [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[GLOBAL_ARGS]], align 8
-; AMDGPU-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
+; AMDGPU-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[ANYPTR_TBAA20]]
 ; AMDGPU-NEXT:    call void @__omp_outlined__5(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]], ptr [[TMP3]]) #[[ATTR4]]
 ; AMDGPU-NEXT:    ret void
 ;
-; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
-; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
-; NVPTX-NEXT:  entry:
+; NVPTX-LABEL: define internal void @__omp_outlined__5_wrapper(
+; NVPTX-SAME: i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
+; NVPTX-NEXT:  [[ENTRY:.*:]]
 ; NVPTX-NEXT:    [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
 ; NVPTX-NEXT:    [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
 ; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
@@ -1647,13 +1647,13 @@ define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #1 {
 ; NVPTX-NEXT:    [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
 ; NVPTX-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
 ; NVPTX-NEXT:    [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[GLOBAL_ARGS]], align 8
-; NVPTX-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
+; NVPTX-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[ANYPTR_TBAA20]]
 ; NVPTX-NEXT:    call void @__omp_outlined__5(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]], ptr [[TMP3]]) #[[ATTR4]]
 ; NVPTX-NEXT:    ret void
 ;
-; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
-; AMDGPU-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
-; AMDGPU-DISABLED1-NEXT:  entry:
+; AMDGPU-DISABLED1-LABEL: define internal void @__omp_outlined__5_wrapper(
+; AMDGPU-DISABLED1-SAME: i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
+; AMDGPU-DISABLED1-NEXT:  [[ENTRY:.*:]]
 ; AMDGPU-DISABLED1-NEXT:    [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-DISABLED1-NEXT:    [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
 ; AMDGPU-DISABLED1-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
@@ -1662,13 +1662,13 @@ define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #1 {
 ; AMDGPU-DISABLED1-NEXT:    [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
 ; AMDGPU-DISABLED1-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
 ; AMDGPU-DISABLED1-NEXT:    [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[GLOBAL_ARGS]], align 8
-; AMDGPU-DISABLED1-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
+; AMDGPU-DISABLED1-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[ANYPTR_TBAA20]]
 ; AMDGPU-DISABLED1-NEXT:    call void @__omp_outlined__5(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]], ptr [[TMP3]]) #[[ATTR4]]
 ; AMDGPU-DISABLED1-NEXT:    ret void
 ;
-; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
-; AMDGPU-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
-; AMDGPU-DISABLED2-NEXT:  entry:
+; AMDGPU-DISABLED2-LABEL: define internal void @__omp_outlined__5_wrapper(
+; AMDGPU-DISABLED2-SAME: i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
+; AMDGPU-DISABLED2-NEXT:  [[ENTRY:.*:]]
 ; AMDGPU-DISABLED2-NEXT:    [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-DISABLED2-NEXT:    [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
 ; AMDGPU-DISABLED2-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
@@ -1677,13 +1677,13 @@ define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #1 {
 ; AMDGPU-DISABLED2-NEXT:    [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
 ; AMDGPU-DISABLED2-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
 ; AMDGPU-DISABLED2-NEXT:    [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[GLOBAL_ARGS]], align 8
-; AMDGPU-DISABLED2-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
+; AMDGPU-DISABLED2-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[ANYPTR_TBAA20]]
 ; AMDGPU-DISABLED2-NEXT:    call void @__omp_outlined__5(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]], ptr [[TMP3]]) #[[ATTR4]]
 ; AMDGPU-DISABLED2-NEXT:    ret void
 ;
-; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
-; NVPTX-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
-; NVPTX-DISABLED1-NEXT:  entry:
+; NVPTX-DISABLED1-LABEL: define internal void @__omp_outlined__5_wrapper(
+; NVPTX-DISABLED1-SAME: i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
+; NVPTX-DISABLED1-NEXT:  [[ENTRY:.*:]]
 ; NVPTX-DISABLED1-NEXT:    [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
 ; NVPTX-DISABLED1-NEXT:    [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
 ; NVPTX-DISABLED1-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
@@ -1692,13 +1692,13 @@ define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #1 {
 ; NVPTX-DISABLED1-NEXT:    [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
 ; NVPTX-DISABLED1-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
 ; NVPTX-DISABLED1-NEXT:    [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[GLOBAL_ARGS]], align 8
-; NVPTX-DISABLED1-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
+; NVPTX-DISABLED1-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[ANYPTR_TBAA20]]
 ; NVPTX-DISABLED1-NEXT:    call void @__omp_outlined__5(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]], ptr [[TMP3]]) #[[ATTR4]]
 ; NVPTX-DISABLED1-NEXT:    ret void
 ;
-; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
-; NVPTX-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
-; NVPTX-DISABLED2-NEXT:  entry:
+; NVPTX-DISABLED2-LABEL: define internal void @__omp_outlined__5_wrapper(
+; NVPTX-DISABLED2-SAME: i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
+; NVPTX-DISABLED2-NEXT:  [[ENTRY:.*:]]
 ; NVPTX-DISABLED2-NEXT:    [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
 ; NVPTX-DISABLED2-NEXT:    [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
 ; NVPTX-DISABLED2-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
@@ -1707,7 +1707,7 @@ define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #1 {
 ; NVPTX-DISABLED2-NEXT:    [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
 ; NVPTX-DISABLED2-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
 ; NVPTX-DISABLED2-NEXT:    [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[GLOBAL_ARGS]], align 8
-; NVPTX-DISABLED2-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
+; NVPTX-DISABLED2-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[ANYPTR_TBAA20]]
 ; NVPTX-DISABLED2-NEXT:    call void @__omp_outlined__5(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]], ptr [[TMP3]]) #[[ATTR4]]
 ; NVPTX-DISABLED2-NEXT:    ret void
 ;
@@ -1729,47 +1729,47 @@ entry:
 
 ; Function Attrs: alwaysinline convergent norecurse nounwind
 define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50() #0 {
-; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50
-; AMDGPU-SAME: () #[[ATTR0]] {
-; AMDGPU-NEXT:  entry:
+; AMDGPU-LABEL: define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50(
+; AMDGPU-SAME: ) #[[ATTR0]] {
+; AMDGPU-NEXT:  [[ENTRY:.*:]]
 ; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-NEXT:    [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
 ; AMDGPU-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-NEXT:    [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
 ; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment, ptr null)
 ; AMDGPU-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; AMDGPU-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
-; AMDGPU:       common.ret:
+; AMDGPU-NEXT:    br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]]
+; AMDGPU:       [[COMMON_RET]]:
 ; AMDGPU-NEXT:    ret void
-; AMDGPU:       user_code.entry:
+; AMDGPU:       [[USER_CODE_ENTRY]]:
 ; AMDGPU-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; AMDGPU-NEXT:    store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-NEXT:    store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[INT_TBAA12]]
 ; AMDGPU-NEXT:    call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
 ; AMDGPU-NEXT:    call void @__kmpc_target_deinit()
-; AMDGPU-NEXT:    br label [[COMMON_RET]]
+; AMDGPU-NEXT:    br label %[[COMMON_RET]]
 ;
-; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50
-; NVPTX-SAME: () #[[ATTR0]] {
-; NVPTX-NEXT:  entry:
+; NVPTX-LABEL: define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50(
+; NVPTX-SAME: ) #[[ATTR0]] {
+; NVPTX-NEXT:  [[ENTRY:.*:]]
 ; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
 ; NVPTX-NEXT:    [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
 ; NVPTX-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
 ; NVPTX-NEXT:    [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
 ; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment, ptr null)
 ; NVPTX-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; NVPTX-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
-; NVPTX:       common.ret:
+; NVPTX-NEXT:    br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]]
+; NVPTX:       [[COMMON_RET]]:
 ; NVPTX-NEXT:    ret void
-; NVPTX:       user_code.entry:
+; NVPTX:       [[USER_CODE_ENTRY]]:
 ; NVPTX-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; NVPTX-NEXT:    store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
+; NVPTX-NEXT:    store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[INT_TBAA12]]
 ; NVPTX-NEXT:    call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
 ; NVPTX-NEXT:    call void @__kmpc_target_deinit()
-; NVPTX-NEXT:    br label [[COMMON_RET]]
+; NVPTX-NEXT:    br label %[[COMMON_RET]]
 ;
-; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50
-; AMDGPU-DISABLED1-SAME: () #[[ATTR0]] {
-; AMDGPU-DISABLED1-NEXT:  entry:
+; AMDGPU-DISABLED1-LABEL: define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50(
+; AMDGPU-DISABLED1-SAME: ) #[[ATTR0]] {
+; AMDGPU-DISABLED1-NEXT:  [[ENTRY:.*:]]
 ; AMDGPU-DISABLED1-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-DISABLED1-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-DISABLED1-NEXT:    [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
@@ -1777,71 +1777,71 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_s
 ; AMDGPU-DISABLED1-NEXT:    [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
 ; AMDGPU-DISABLED1-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment, ptr null)
 ; AMDGPU-DISABLED1-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
-; AMDGPU-DISABLED1-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
-; AMDGPU-DISABLED1:       is_worker_check:
+; AMDGPU-DISABLED1-NEXT:    br i1 [[THREAD_IS_WORKER]], label %[[IS_WORKER_CHECK:.*]], label %[[THREAD_USER_CODE_CHECK:.*]]
+; AMDGPU-DISABLED1:       [[IS_WORKER_CHECK]]:
 ; AMDGPU-DISABLED1-NEXT:    [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
 ; AMDGPU-DISABLED1-NEXT:    [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
 ; AMDGPU-DISABLED1-NEXT:    [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
 ; AMDGPU-DISABLED1-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
-; AMDGPU-DISABLED1-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
-; AMDGPU-DISABLED1:       worker_state_machine.begin:
+; AMDGPU-DISABLED1-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label %[[WORKER_STATE_MACHINE_BEGIN:.*]], label %[[WORKER_STATE_MACHINE_FINISHED:.*]]
+; AMDGPU-DISABLED1:       [[WORKER_STATE_MACHINE_BEGIN]]:
 ; AMDGPU-DISABLED1-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
 ; AMDGPU-DISABLED1-NEXT:    [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
 ; AMDGPU-DISABLED1-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
 ; AMDGPU-DISABLED1-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
 ; AMDGPU-DISABLED1-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
-; AMDGPU-DISABLED1-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
-; AMDGPU-DISABLED1:       worker_state_machine.finished:
+; AMDGPU-DISABLED1-NEXT:    br i1 [[WORKER_IS_DONE]], label %[[WORKER_STATE_MACHINE_FINISHED]], label %[[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:.*]]
+; AMDGPU-DISABLED1:       [[WORKER_STATE_MACHINE_FINISHED]]:
 ; AMDGPU-DISABLED1-NEXT:    ret void
-; AMDGPU-DISABLED1:       worker_state_machine.is_active.check:
-; AMDGPU-DISABLED1-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
-; AMDGPU-DISABLED1:       worker_state_machine.parallel_region.check:
-; AMDGPU-DISABLED1-NEXT:    br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
-; AMDGPU-DISABLED1:       worker_state_machine.parallel_region.execute:
+; AMDGPU-DISABLED1:       [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK]]:
+; AMDGPU-DISABLED1-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:.*]], label %[[WORKER_STATE_MACHINE_DONE_BARRIER:.*]]
+; AMDGPU-DISABLED1:       [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK]]:
+; AMDGPU-DISABLED1-NEXT:    br i1 true, label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:.*]], label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:.*]]
+; AMDGPU-DISABLED1:       [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE]]:
 ; AMDGPU-DISABLED1-NEXT:    call void @__omp_outlined__7_wrapper(i16 0, i32 [[TMP0]])
-; AMDGPU-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
-; AMDGPU-DISABLED1:       worker_state_machine.parallel_region.check1:
-; AMDGPU-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
-; AMDGPU-DISABLED1:       worker_state_machine.parallel_region.end:
+; AMDGPU-DISABLED1-NEXT:    br label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_END:.*]]
+; AMDGPU-DISABLED1:       [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1]]:
+; AMDGPU-DISABLED1-NEXT:    br label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
+; AMDGPU-DISABLED1:       [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]:
 ; AMDGPU-DISABLED1-NEXT:    call void @__kmpc_kernel_end_parallel()
-; AMDGPU-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
-; AMDGPU-DISABLED1:       worker_state_machine.done.barrier:
+; AMDGPU-DISABLED1-NEXT:    br label %[[WORKER_STATE_MACHINE_DONE_BARRIER]]
+; AMDGPU-DISABLED1:       [[WORKER_STATE_MACHINE_DONE_BARRIER]]:
 ; AMDGPU-DISABLED1-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; AMDGPU-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
-; AMDGPU-DISABLED1:       thread.user_code.check:
+; AMDGPU-DISABLED1-NEXT:    br label %[[WORKER_STATE_MACHINE_BEGIN]]
+; AMDGPU-DISABLED1:       [[THREAD_USER_CODE_CHECK]]:
 ; AMDGPU-DISABLED1-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; AMDGPU-DISABLED1-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
-; AMDGPU-DISABLED1:       common.ret:
+; AMDGPU-DISABLED1-NEXT:    br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]]
+; AMDGPU-DISABLED1:       [[COMMON_RET]]:
 ; AMDGPU-DISABLED1-NEXT:    ret void
-; AMDGPU-DISABLED1:       user_code.entry:
+; AMDGPU-DISABLED1:       [[USER_CODE_ENTRY]]:
 ; AMDGPU-DISABLED1-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; AMDGPU-DISABLED1-NEXT:    store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-DISABLED1-NEXT:    store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[INT_TBAA12]]
 ; AMDGPU-DISABLED1-NEXT:    call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
 ; AMDGPU-DISABLED1-NEXT:    call void @__kmpc_target_deinit()
-; AMDGPU-DISABLED1-NEXT:    br label [[COMMON_RET]]
+; AMDGPU-DISABLED1-NEXT:    br label %[[COMMON_RET]]
 ;
-; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50
-; AMDGPU-DISABLED2-SAME: () #[[ATTR0]] {
-; AMDGPU-DISABLED2-NEXT:  entry:
+; AMDGPU-DISABLED2-LABEL: define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50(
+; AMDGPU-DISABLED2-SAME: ) #[[ATTR0]] {
+; AMDGPU-DISABLED2-NEXT:  [[ENTRY:.*:]]
 ; AMDGPU-DISABLED2-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-DISABLED2-NEXT:    [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
 ; AMDGPU-DISABLED2-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-DISABLED2-NEXT:    [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
 ; AMDGPU-DISABLED2-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment, ptr null)
 ; AMDGPU-DISABLED2-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; AMDGPU-DISABLED2-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
-; AMDGPU-DISABLED2:       common.ret:
+; AMDGPU-DISABLED2-NEXT:    br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]]
+; AMDGPU-DISABLED2:       [[COMMON_RET]]:
 ; AMDGPU-DISABLED2-NEXT:    ret void
-; AMDGPU-DISABLED2:       user_code.entry:
+; AMDGPU-DISABLED2:       [[USER_CODE_ENTRY]]:
 ; AMDGPU-DISABLED2-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; AMDGPU-DISABLED2-NEXT:    store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-DISABLED2-NEXT:    store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[INT_TBAA12]]
 ; AMDGPU-DISABLED2-NEXT:    call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
 ; AMDGPU-DISABLED2-NEXT:    call void @__kmpc_target_deinit()
-; AMDGPU-DISABLED2-NEXT:    br label [[COMMON_RET]]
+; AMDGPU-DISABLED2-NEXT:    br label %[[COMMON_RET]]
 ;
-; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50
-; NVPTX-DISABLED1-SAME: () #[[ATTR0]] {
-; NVPTX-DISABLED1-NEXT:  entry:
+; NVPTX-DISABLED1-LABEL: define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50(
+; NVPTX-DISABLED1-SAME: ) #[[ATTR0]] {
+; NVPTX-DISABLED1-NEXT:  [[ENTRY:.*:]]
 ; NVPTX-DISABLED1-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
 ; NVPTX-DISABLED1-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
 ; NVPTX-DISABLED1-NEXT:    [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
@@ -1849,66 +1849,66 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_s
 ; NVPTX-DISABLED1-NEXT:    [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
 ; NVPTX-DISABLED1-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment, ptr null)
 ; NVPTX-DISABLED1-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
-; NVPTX-DISABLED1-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
-; NVPTX-DISABLED1:       is_worker_check:
+; NVPTX-DISABLED1-NEXT:    br i1 [[THREAD_IS_WORKER]], label %[[IS_WORKER_CHECK:.*]], label %[[THREAD_USER_CODE_CHECK:.*]]
+; NVPTX-DISABLED1:       [[IS_WORKER_CHECK]]:
 ; NVPTX-DISABLED1-NEXT:    [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
 ; NVPTX-DISABLED1-NEXT:    [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
 ; NVPTX-DISABLED1-NEXT:    [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
 ; NVPTX-DISABLED1-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
-; NVPTX-DISABLED1-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
-; NVPTX-DISABLED1:       worker_state_machine.begin:
+; NVPTX-DISABLED1-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label %[[WORKER_STATE_MACHINE_BEGIN:.*]], label %[[WORKER_STATE_MACHINE_FINISHED:.*]]
+; NVPTX-DISABLED1:       [[WORKER_STATE_MACHINE_BEGIN]]:
 ; NVPTX-DISABLED1-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
 ; NVPTX-DISABLED1-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
 ; NVPTX-DISABLED1-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
 ; NVPTX-DISABLED1-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
-; NVPTX-DISABLED1-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
-; NVPTX-DISABLED1:       worker_state_machine.finished:
+; NVPTX-DISABLED1-NEXT:    br i1 [[WORKER_IS_DONE]], label %[[WORKER_STATE_MACHINE_FINISHED]], label %[[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:.*]]
+; NVPTX-DISABLED1:       [[WORKER_STATE_MACHINE_FINISHED]]:
 ; NVPTX-DISABLED1-NEXT:    ret void
-; NVPTX-DISABLED1:       worker_state_machine.is_active.check:
-; NVPTX-DISABLED1-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
-; NVPTX-DISABLED1:       worker_state_machine.parallel_region.check:
-; NVPTX-DISABLED1-NEXT:    br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
-; NVPTX-DISABLED1:       worker_state_machine.parallel_region.execute:
+; NVPTX-DISABLED1:       [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK]]:
+; NVPTX-DISABLED1-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:.*]], label %[[WORKER_STATE_MACHINE_DONE_BARRIER:.*]]
+; NVPTX-DISABLED1:       [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK]]:
+; NVPTX-DISABLED1-NEXT:    br i1 true, label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:.*]], label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:.*]]
+; NVPTX-DISABLED1:       [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE]]:
 ; NVPTX-DISABLED1-NEXT:    call void @__omp_outlined__7_wrapper(i16 0, i32 [[TMP0]])
-; NVPTX-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
-; NVPTX-DISABLED1:       worker_state_machine.parallel_region.check1:
-; NVPTX-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
-; NVPTX-DISABLED1:       worker_state_machine.parallel_region.end:
+; NVPTX-DISABLED1-NEXT:    br label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_END:.*]]
+; NVPTX-DISABLED1:       [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1]]:
+; NVPTX-DISABLED1-NEXT:    br label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
+; NVPTX-DISABLED1:       [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]:
 ; NVPTX-DISABLED1-NEXT:    call void @__kmpc_kernel_end_parallel()
-; NVPTX-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
-; NVPTX-DISABLED1:       worker_state_machine.done.barrier:
+; NVPTX-DISABLED1-NEXT:    br label %[[WORKER_STATE_MACHINE_DONE_BARRIER]]
+; NVPTX-DISABLED1:       [[WORKER_STATE_MACHINE_DONE_BARRIER]]:
 ; NVPTX-DISABLED1-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; NVPTX-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
-; NVPTX-DISABLED1:       thread.user_code.check:
+; NVPTX-DISABLED1-NEXT:    br label %[[WORKER_STATE_MACHINE_BEGIN]]
+; NVPTX-DISABLED1:       [[THREAD_USER_CODE_CHECK]]:
 ; NVPTX-DISABLED1-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; NVPTX-DISABLED1-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
-; NVPTX-DISABLED1:       common.ret:
+; NVPTX-DISABLED1-NEXT:    br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]]
+; NVPTX-DISABLED1:       [[COMMON_RET]]:
 ; NVPTX-DISABLED1-NEXT:    ret void
-; NVPTX-DISABLED1:       user_code.entry:
+; NVPTX-DISABLED1:       [[USER_CODE_ENTRY]]:
 ; NVPTX-DISABLED1-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; NVPTX-DISABLED1-NEXT:    store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
+; NVPTX-DISABLED1-NEXT:    store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[INT_TBAA12]]
 ; NVPTX-DISABLED1-NEXT:    call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
 ; NVPTX-DISABLED1-NEXT:    call void @__kmpc_target_deinit()
-; NVPTX-DISABLED1-NEXT:    br label [[COMMON_RET]]
+; NVPTX-DISABLED1-NEXT:    br label %[[COMMON_RET]]
 ;
-; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50
-; NVPTX-DISABLED2-SAME: () #[[ATTR0]] {
-; NVPTX-DISABLED2-NEXT:  entry:
+; NVPTX-DISABLED2-LABEL: define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50(
+; NVPTX-DISABLED2-SAME: ) #[[ATTR0]] {
+; NVPTX-DISABLED2-NEXT:  [[ENTRY:.*:]]
 ; NVPTX-DISABLED2-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
 ; NVPTX-DISABLED2-NEXT:    [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
 ; NVPTX-DISABLED2-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
 ; NVPTX-DISABLED2-NEXT:    [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
 ; NVPTX-DISABLED2-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment, ptr null)
 ; NVPTX-DISABLED2-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; NVPTX-DISABLED2-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
-; NVPTX-DISABLED2:       common.ret:
+; NVPTX-DISABLED2-NEXT:    br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]]
+; NVPTX-DISABLED2:       [[COMMON_RET]]:
 ; NVPTX-DISABLED2-NEXT:    ret void
-; NVPTX-DISABLED2:       user_code.entry:
+; NVPTX-DISABLED2:       [[USER_CODE_ENTRY]]:
 ; NVPTX-DISABLED2-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; NVPTX-DISABLED2-NEXT:    store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
+; NVPTX-DISABLED2-NEXT:    store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[INT_TBAA12]]
 ; NVPTX-DISABLED2-NEXT:    call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
 ; NVPTX-DISABLED2-NEXT:    call void @__kmpc_target_deinit()
-; NVPTX-DISABLED2-NEXT:    br label [[COMMON_RET]]
+; NVPTX-DISABLED2-NEXT:    br label %[[COMMON_RET]]
 ;
 entry:
   %.zero.addr = alloca ptr, align 8, addrspace(5)
@@ -1932,163 +1932,163 @@ user_code.entry:                                  ; preds = %entry
 }
 
 define internal void @__omp_outlined__6(ptr noalias %.global_tid., ptr noalias %.bound_tid.) {
-; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__6
-; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
-; AMDGPU-NEXT:  entry:
+; AMDGPU-LABEL: define internal void @__omp_outlined__6(
+; AMDGPU-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
+; AMDGPU-NEXT:  [[ENTRY:.*:]]
 ; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
-; AMDGPU-NEXT:    br label [[REGION_CHECK_TID:%.*]]
-; AMDGPU:       region.check.tid:
+; AMDGPU-NEXT:    br label %[[REGION_CHECK_TID:.*]]
+; AMDGPU:       [[REGION_CHECK_TID]]:
 ; AMDGPU-NEXT:    [[TMP0:%.*]] = call fastcc i32 @__kmpc_get_hardware_thread_id_in_block()
 ; AMDGPU-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0
-; AMDGPU-NEXT:    br i1 [[TMP1]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]]
-; AMDGPU:       region.guarded:
-; AMDGPU-NEXT:    store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), align 4, !tbaa [[TBAA12]]
-; AMDGPU-NEXT:    br label [[REGION_GUARDED_END:%.*]]
-; AMDGPU:       region.guarded.end:
-; AMDGPU-NEXT:    br label [[REGION_BARRIER]]
-; AMDGPU:       region.barrier:
+; AMDGPU-NEXT:    br i1 [[TMP1]], label %[[REGION_GUARDED:.*]], label %[[REGION_BARRIER:.*]]
+; AMDGPU:       [[REGION_GUARDED]]:
+; AMDGPU-NEXT:    store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), align 4, !tbaa [[INT_TBAA12]]
+; AMDGPU-NEXT:    br label %[[REGION_GUARDED_END:.*]]
+; AMDGPU:       [[REGION_GUARDED_END]]:
+; AMDGPU-NEXT:    br label %[[REGION_BARRIER]]
+; AMDGPU:       [[REGION_BARRIER]]:
 ; AMDGPU-NEXT:    call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP0]])
-; AMDGPU-NEXT:    br label [[REGION_EXIT:%.*]]
-; AMDGPU:       region.exit:
-; AMDGPU-NEXT:    br label [[FOR_COND:%.*]]
-; AMDGPU:       for.cond:
-; AMDGPU-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[REGION_EXIT]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
+; AMDGPU-NEXT:    br label %[[REGION_EXIT:.*]]
+; AMDGPU:       [[REGION_EXIT]]:
+; AMDGPU-NEXT:    br label %[[FOR_COND:.*]]
+; AMDGPU:       [[FOR_COND]]:
+; AMDGPU-NEXT:    [[I_0:%.*]] = phi i32 [ 0, %[[REGION_EXIT]] ], [ [[INC:%.*]], %[[FOR_BODY:.*]] ]
 ; AMDGPU-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
-; AMDGPU-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
-; AMDGPU:       for.cond.cleanup:
+; AMDGPU-NEXT:    br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP:.*]]
+; AMDGPU:       [[FOR_COND_CLEANUP]]:
 ; AMDGPU-NEXT:    call void @spmd_amenable() #[[ATTR7]]
 ; AMDGPU-NEXT:    ret void
-; AMDGPU:       for.body:
-; AMDGPU-NEXT:    store ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), ptr addrspace(5) [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20]]
+; AMDGPU:       [[FOR_BODY]]:
+; AMDGPU-NEXT:    store ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), ptr addrspace(5) [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[ANYPTR_TBAA20]]
 ; AMDGPU-NEXT:    [[TMP2:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
-; AMDGPU-NEXT:    [[TMP3:%.*]] = load i32, ptr addrspace(5) [[TMP2]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-NEXT:    [[TMP3:%.*]] = load i32, ptr addrspace(5) [[TMP2]], align 4, !tbaa [[INT_TBAA12]]
 ; AMDGPU-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 1)
 ; AMDGPU-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
-; AMDGPU-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
+; AMDGPU-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
 ;
-; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__6
-; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
-; NVPTX-NEXT:  entry:
+; NVPTX-LABEL: define internal void @__omp_outlined__6(
+; NVPTX-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
+; NVPTX-NEXT:  [[ENTRY:.*:]]
 ; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
 ; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
-; NVPTX-NEXT:    br label [[REGION_CHECK_TID:%.*]]
-; NVPTX:       region.check.tid:
+; NVPTX-NEXT:    br label %[[REGION_CHECK_TID:.*]]
+; NVPTX:       [[REGION_CHECK_TID]]:
 ; NVPTX-NEXT:    [[TMP0:%.*]] = call fastcc i32 @__kmpc_get_hardware_thread_id_in_block()
 ; NVPTX-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0
-; NVPTX-NEXT:    br i1 [[TMP1]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]]
-; NVPTX:       region.guarded:
-; NVPTX-NEXT:    store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), align 4, !tbaa [[TBAA12]]
-; NVPTX-NEXT:    br label [[REGION_GUARDED_END:%.*]]
-; NVPTX:       region.guarded.end:
-; NVPTX-NEXT:    br label [[REGION_BARRIER]]
-; NVPTX:       region.barrier:
+; NVPTX-NEXT:    br i1 [[TMP1]], label %[[REGION_GUARDED:.*]], label %[[REGION_BARRIER:.*]]
+; NVPTX:       [[REGION_GUARDED]]:
+; NVPTX-NEXT:    store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), align 4, !tbaa [[INT_TBAA12]]
+; NVPTX-NEXT:    br label %[[REGION_GUARDED_END:.*]]
+; NVPTX:       [[REGION_GUARDED_END]]:
+; NVPTX-NEXT:    br label %[[REGION_BARRIER]]
+; NVPTX:       [[REGION_BARRIER]]:
 ; NVPTX-NEXT:    call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP0]])
-; NVPTX-NEXT:    br label [[REGION_EXIT:%.*]]
-; NVPTX:       region.exit:
-; NVPTX-NEXT:    br label [[FOR_COND:%.*]]
-; NVPTX:       for.cond:
-; NVPTX-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[REGION_EXIT]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
+; NVPTX-NEXT:    br label %[[REGION_EXIT:.*]]
+; NVPTX:       [[REGION_EXIT]]:
+; NVPTX-NEXT:    br label %[[FOR_COND:.*]]
+; NVPTX:       [[FOR_COND]]:
+; NVPTX-NEXT:    [[I_0:%.*]] = phi i32 [ 0, %[[REGION_EXIT]] ], [ [[INC:%.*]], %[[FOR_BODY:.*]] ]
 ; NVPTX-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
-; NVPTX-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
-; NVPTX:       for.cond.cleanup:
+; NVPTX-NEXT:    br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP:.*]]
+; NVPTX:       [[FOR_COND_CLEANUP]]:
 ; NVPTX-NEXT:    call void @spmd_amenable() #[[ATTR7]]
 ; NVPTX-NEXT:    ret void
-; NVPTX:       for.body:
-; NVPTX-NEXT:    store ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), ptr addrspace(5) [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20]]
+; NVPTX:       [[FOR_BODY]]:
+; NVPTX-NEXT:    store ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), ptr addrspace(5) [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[ANYPTR_TBAA20]]
 ; NVPTX-NEXT:    [[TMP2:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
-; NVPTX-NEXT:    [[TMP3:%.*]] = load i32, ptr addrspace(5) [[TMP2]], align 4, !tbaa [[TBAA12]]
+; NVPTX-NEXT:    [[TMP3:%.*]] = load i32, ptr addrspace(5) [[TMP2]], align 4, !tbaa [[INT_TBAA12]]
 ; NVPTX-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 1)
 ; NVPTX-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
-; NVPTX-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
+; NVPTX-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
 ;
-; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__6
-; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
-; AMDGPU-DISABLED1-NEXT:  entry:
+; AMDGPU-DISABLED1-LABEL: define internal void @__omp_outlined__6(
+; AMDGPU-DISABLED1-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
+; AMDGPU-DISABLED1-NEXT:  [[ENTRY:.*]]:
 ; AMDGPU-DISABLED1-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-DISABLED1-NEXT:    [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
-; AMDGPU-DISABLED1-NEXT:    store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), align 4, !tbaa [[TBAA12]]
-; AMDGPU-DISABLED1-NEXT:    br label [[FOR_COND:%.*]]
-; AMDGPU-DISABLED1:       for.cond:
-; AMDGPU-DISABLED1-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
+; AMDGPU-DISABLED1-NEXT:    store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), align 4, !tbaa [[INT_TBAA12]]
+; AMDGPU-DISABLED1-NEXT:    br label %[[FOR_COND:.*]]
+; AMDGPU-DISABLED1:       [[FOR_COND]]:
+; AMDGPU-DISABLED1-NEXT:    [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY:.*]] ]
 ; AMDGPU-DISABLED1-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
-; AMDGPU-DISABLED1-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
-; AMDGPU-DISABLED1:       for.cond.cleanup:
+; AMDGPU-DISABLED1-NEXT:    br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP:.*]]
+; AMDGPU-DISABLED1:       [[FOR_COND_CLEANUP]]:
 ; AMDGPU-DISABLED1-NEXT:    call void @spmd_amenable() #[[ATTR7]]
 ; AMDGPU-DISABLED1-NEXT:    ret void
-; AMDGPU-DISABLED1:       for.body:
-; AMDGPU-DISABLED1-NEXT:    store ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), ptr addrspace(5) [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20]]
+; AMDGPU-DISABLED1:       [[FOR_BODY]]:
+; AMDGPU-DISABLED1-NEXT:    store ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), ptr addrspace(5) [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[ANYPTR_TBAA20]]
 ; AMDGPU-DISABLED1-NEXT:    [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
-; AMDGPU-DISABLED1-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-DISABLED1-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[INT_TBAA12]]
 ; AMDGPU-DISABLED1-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 1)
 ; AMDGPU-DISABLED1-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
-; AMDGPU-DISABLED1-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
+; AMDGPU-DISABLED1-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
 ;
-; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__6
-; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
-; AMDGPU-DISABLED2-NEXT:  entry:
+; AMDGPU-DISABLED2-LABEL: define internal void @__omp_outlined__6(
+; AMDGPU-DISABLED2-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
+; AMDGPU-DISABLED2-NEXT:  [[ENTRY:.*]]:
 ; AMDGPU-DISABLED2-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-DISABLED2-NEXT:    [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
-; AMDGPU-DISABLED2-NEXT:    store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), align 4, !tbaa [[TBAA12]]
-; AMDGPU-DISABLED2-NEXT:    br label [[FOR_COND:%.*]]
-; AMDGPU-DISABLED2:       for.cond:
-; AMDGPU-DISABLED2-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
+; AMDGPU-DISABLED2-NEXT:    store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), align 4, !tbaa [[INT_TBAA12]]
+; AMDGPU-DISABLED2-NEXT:    br label %[[FOR_COND:.*]]
+; AMDGPU-DISABLED2:       [[FOR_COND]]:
+; AMDGPU-DISABLED2-NEXT:    [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY:.*]] ]
 ; AMDGPU-DISABLED2-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
-; AMDGPU-DISABLED2-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
-; AMDGPU-DISABLED2:       for.cond.cleanup:
+; AMDGPU-DISABLED2-NEXT:    br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP:.*]]
+; AMDGPU-DISABLED2:       [[FOR_COND_CLEANUP]]:
 ; AMDGPU-DISABLED2-NEXT:    call void @spmd_amenable() #[[ATTR7]]
 ; AMDGPU-DISABLED2-NEXT:    ret void
-; AMDGPU-DISABLED2:       for.body:
-; AMDGPU-DISABLED2-NEXT:    store ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), ptr addrspace(5) [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20]]
+; AMDGPU-DISABLED2:       [[FOR_BODY]]:
+; AMDGPU-DISABLED2-NEXT:    store ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), ptr addrspace(5) [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[ANYPTR_TBAA20]]
 ; AMDGPU-DISABLED2-NEXT:    [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
-; AMDGPU-DISABLED2-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-DISABLED2-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[INT_TBAA12]]
 ; AMDGPU-DISABLED2-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 1)
 ; AMDGPU-DISABLED2-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
-; AMDGPU-DISABLED2-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
+; AMDGPU-DISABLED2-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
 ;
-; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__6
-; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
-; NVPTX-DISABLED1-NEXT:  entry:
+; NVPTX-DISABLED1-LABEL: define internal void @__omp_outlined__6(
+; NVPTX-DISABLED1-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
+; NVPTX-DISABLED1-NEXT:  [[ENTRY:.*]]:
 ; NVPTX-DISABLED1-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
 ; NVPTX-DISABLED1-NEXT:    [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
-; NVPTX-DISABLED1-NEXT:    store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), align 4, !tbaa [[TBAA12]]
-; NVPTX-DISABLED1-NEXT:    br label [[FOR_COND:%.*]]
-; NVPTX-DISABLED1:       for.cond:
-; NVPTX-DISABLED1-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
+; NVPTX-DISABLED1-NEXT:    store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), align 4, !tbaa [[INT_TBAA12]]
+; NVPTX-DISABLED1-NEXT:    br label %[[FOR_COND:.*]]
+; NVPTX-DISABLED1:       [[FOR_COND]]:
+; NVPTX-DISABLED1-NEXT:    [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY:.*]] ]
 ; NVPTX-DISABLED1-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
-; NVPTX-DISABLED1-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
-; NVPTX-DISABLED1:       for.cond.cleanup:
+; NVPTX-DISABLED1-NEXT:    br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP:.*]]
+; NVPTX-DISABLED1:       [[FOR_COND_CLEANUP]]:
 ; NVPTX-DISABLED1-NEXT:    call void @spmd_amenable() #[[ATTR7]]
 ; NVPTX-DISABLED1-NEXT:    ret void
-; NVPTX-DISABLED1:       for.body:
-; NVPTX-DISABLED1-NEXT:    store ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), ptr addrspace(5) [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20]]
+; NVPTX-DISABLED1:       [[FOR_BODY]]:
+; NVPTX-DISABLED1-NEXT:    store ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), ptr addrspace(5) [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[ANYPTR_TBAA20]]
 ; NVPTX-DISABLED1-NEXT:    [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
-; NVPTX-DISABLED1-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]]
+; NVPTX-DISABLED1-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[INT_TBAA12]]
 ; NVPTX-DISABLED1-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 1)
 ; NVPTX-DISABLED1-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
-; NVPTX-DISABLED1-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
+; NVPTX-DISABLED1-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
 ;
-; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__6
-; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
-; NVPTX-DISABLED2-NEXT:  entry:
+; NVPTX-DISABLED2-LABEL: define internal void @__omp_outlined__6(
+; NVPTX-DISABLED2-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
+; NVPTX-DISABLED2-NEXT:  [[ENTRY:.*]]:
 ; NVPTX-DISABLED2-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
 ; NVPTX-DISABLED2-NEXT:    [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
-; NVPTX-DISABLED2-NEXT:    store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), align 4, !tbaa [[TBAA12]]
-; NVPTX-DISABLED2-NEXT:    br label [[FOR_COND:%.*]]
-; NVPTX-DISABLED2:       for.cond:
-; NVPTX-DISABLED2-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
+; NVPTX-DISABLED2-NEXT:    store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), align 4, !tbaa [[INT_TBAA12]]
+; NVPTX-DISABLED2-NEXT:    br label %[[FOR_COND:.*]]
+; NVPTX-DISABLED2:       [[FOR_COND]]:
+; NVPTX-DISABLED2-NEXT:    [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY:.*]] ]
 ; NVPTX-DISABLED2-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
-; NVPTX-DISABLED2-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
-; NVPTX-DISABLED2:       for.cond.cleanup:
+; NVPTX-DISABLED2-NEXT:    br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP:.*]]
+; NVPTX-DISABLED2:       [[FOR_COND_CLEANUP]]:
 ; NVPTX-DISABLED2-NEXT:    call void @spmd_amenable() #[[ATTR7]]
 ; NVPTX-DISABLED2-NEXT:    ret void
-; NVPTX-DISABLED2:       for.body:
-; NVPTX-DISABLED2-NEXT:    store ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), ptr addrspace(5) [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20]]
+; NVPTX-DISABLED2:       [[FOR_BODY]]:
+; NVPTX-DISABLED2-NEXT:    store ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), ptr addrspace(5) [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[ANYPTR_TBAA20]]
 ; NVPTX-DISABLED2-NEXT:    [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
-; NVPTX-DISABLED2-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]]
+; NVPTX-DISABLED2-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[INT_TBAA12]]
 ; NVPTX-DISABLED2-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 1)
 ; NVPTX-DISABLED2-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
-; NVPTX-DISABLED2-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
+; NVPTX-DISABLED2-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
 ;
 entry:
   %captured_vars_addrs = alloca ptr, align 8, addrspace(5)
@@ -2116,57 +2116,57 @@ for.body:                                         ; preds = %for.cond
 }
 
 define internal void @__omp_outlined__7(ptr noalias %.global_tid., ptr noalias %.bound_tid., ptr nonnull align 4 dereferenceable(4) %x) {
-; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__7
-; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
-; AMDGPU-NEXT:  entry:
-; AMDGPU-NEXT:    [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-LABEL: define internal void @__omp_outlined__7(
+; AMDGPU-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
+; AMDGPU-NEXT:  [[ENTRY:.*:]]
+; AMDGPU-NEXT:    [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[INT_TBAA12]]
 ; AMDGPU-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP0]], 1
-; AMDGPU-NEXT:    store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-NEXT:    store i32 [[INC]], ptr [[X]], align 4, !tbaa [[INT_TBAA12]]
 ; AMDGPU-NEXT:    call void @unknowni32p(ptr [[X]]) #[[ATTR8]]
 ; AMDGPU-NEXT:    ret void
 ;
-; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__7
-; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
-; NVPTX-NEXT:  entry:
-; NVPTX-NEXT:    [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]]
+; NVPTX-LABEL: define internal void @__omp_outlined__7(
+; NVPTX-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
+; NVPTX-NEXT:  [[ENTRY:.*:]]
+; NVPTX-NEXT:    [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[INT_TBAA12]]
 ; NVPTX-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP0]], 1
-; NVPTX-NEXT:    store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]]
+; NVPTX-NEXT:    store i32 [[INC]], ptr [[X]], align 4, !tbaa [[INT_TBAA12]]
 ; NVPTX-NEXT:    call void @unknowni32p(ptr [[X]]) #[[ATTR8]]
 ; NVPTX-NEXT:    ret void
 ;
-; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__7
-; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
-; AMDGPU-DISABLED1-NEXT:  entry:
-; AMDGPU-DISABLED1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-DISABLED1-LABEL: define internal void @__omp_outlined__7(
+; AMDGPU-DISABLED1-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
+; AMDGPU-DISABLED1-NEXT:  [[ENTRY:.*:]]
+; AMDGPU-DISABLED1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[INT_TBAA12]]
 ; AMDGPU-DISABLED1-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP0]], 1
-; AMDGPU-DISABLED1-NEXT:    store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-DISABLED1-NEXT:    store i32 [[INC]], ptr [[X]], align 4, !tbaa [[INT_TBAA12]]
 ; AMDGPU-DISABLED1-NEXT:    call void @unknowni32p(ptr [[X]]) #[[ATTR8]]
 ; AMDGPU-DISABLED1-NEXT:    ret void
 ;
-; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__7
-; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
-; AMDGPU-DISABLED2-NEXT:  entry:
-; AMDGPU-DISABLED2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-DISABLED2-LABEL: define internal void @__omp_outlined__7(
+; AMDGPU-DISABLED2-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
+; AMDGPU-DISABLED2-NEXT:  [[ENTRY:.*:]]
+; AMDGPU-DISABLED2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[INT_TBAA12]]
 ; AMDGPU-DISABLED2-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP0]], 1
-; AMDGPU-DISABLED2-NEXT:    store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-DISABLED2-NEXT:    store i32 [[INC]], ptr [[X]], align 4, !tbaa [[INT_TBAA12]]
 ; AMDGPU-DISABLED2-NEXT:    call void @unknowni32p(ptr [[X]]) #[[ATTR8]]
 ; AMDGPU-DISABLED2-NEXT:    ret void
 ;
-; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__7
-; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
-; NVPTX-DISABLED1-NEXT:  entry:
-; NVPTX-DISABLED1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]]
+; NVPTX-DISABLED1-LABEL: define internal void @__omp_outlined__7(
+; NVPTX-DISABLED1-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
+; NVPTX-DISABLED1-NEXT:  [[ENTRY:.*:]]
+; NVPTX-DISABLED1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[INT_TBAA12]]
 ; NVPTX-DISABLED1-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP0]], 1
-; NVPTX-DISABLED1-NEXT:    store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]]
+; NVPTX-DISABLED1-NEXT:    store i32 [[INC]], ptr [[X]], align 4, !tbaa [[INT_TBAA12]]
 ; NVPTX-DISABLED1-NEXT:    call void @unknowni32p(ptr [[X]]) #[[ATTR8]]
 ; NVPTX-DISABLED1-NEXT:    ret void
 ;
-; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__7
-; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
-; NVPTX-DISABLED2-NEXT:  entry:
-; NVPTX-DISABLED2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]]
+; NVPTX-DISABLED2-LABEL: define internal void @__omp_outlined__7(
+; NVPTX-DISABLED2-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
+; NVPTX-DISABLED2-NEXT:  [[ENTRY:.*:]]
+; NVPTX-DISABLED2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[INT_TBAA12]]
 ; NVPTX-DISABLED2-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP0]], 1
-; NVPTX-DISABLED2-NEXT:    store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]]
+; NVPTX-DISABLED2-NEXT:    store i32 [[INC]], ptr [[X]], align 4, !tbaa [[INT_TBAA12]]
 ; NVPTX-DISABLED2-NEXT:    call void @unknowni32p(ptr [[X]]) #[[ATTR8]]
 ; NVPTX-DISABLED2-NEXT:    ret void
 ;
@@ -2180,9 +2180,9 @@ entry:
 
 ; Function Attrs: convergent norecurse nounwind
 define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #1 {
-; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
-; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
-; AMDGPU-NEXT:  entry:
+; AMDGPU-LABEL: define internal void @__omp_outlined__7_wrapper(
+; AMDGPU-SAME: i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
+; AMDGPU-NEXT:  [[ENTRY:.*:]]
 ; AMDGPU-NEXT:    [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-NEXT:    [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
 ; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
@@ -2191,13 +2191,13 @@ define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #1 {
 ; AMDGPU-NEXT:    [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
 ; AMDGPU-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
 ; AMDGPU-NEXT:    [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[GLOBAL_ARGS]], align 8
-; AMDGPU-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
+; AMDGPU-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[ANYPTR_TBAA20]]
 ; AMDGPU-NEXT:    call void @__omp_outlined__7(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]], ptr [[TMP3]]) #[[ATTR4]]
 ; AMDGPU-NEXT:    ret void
 ;
-; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
-; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
-; NVPTX-NEXT:  entry:
+; NVPTX-LABEL: define internal void @__omp_outlined__7_wrapper(
+; NVPTX-SAME: i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
+; NVPTX-NEXT:  [[ENTRY:.*:]]
 ; NVPTX-NEXT:    [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
 ; NVPTX-NEXT:    [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
 ; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
@@ -2206,13 +2206,13 @@ define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #1 {
 ; NVPTX-NEXT:    [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
 ; NVPTX-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
 ; NVPTX-NEXT:    [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[GLOBAL_ARGS]], align 8
-; NVPTX-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
+; NVPTX-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[ANYPTR_TBAA20]]
 ; NVPTX-NEXT:    call void @__omp_outlined__7(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]], ptr [[TMP3]]) #[[ATTR4]]
 ; NVPTX-NEXT:    ret void
 ;
-; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
-; AMDGPU-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
-; AMDGPU-DISABLED1-NEXT:  entry:
+; AMDGPU-DISABLED1-LABEL: define internal void @__omp_outlined__7_wrapper(
+; AMDGPU-DISABLED1-SAME: i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
+; AMDGPU-DISABLED1-NEXT:  [[ENTRY:.*:]]
 ; AMDGPU-DISABLED1-NEXT:    [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-DISABLED1-NEXT:    [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
 ; AMDGPU-DISABLED1-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
@@ -2221,13 +2221,13 @@ define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #1 {
 ; AMDGPU-DISABLED1-NEXT:    [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
 ; AMDGPU-DISABLED1-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
 ; AMDGPU-DISABLED1-NEXT:    [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[GLOBAL_ARGS]], align 8
-; AMDGPU-DISABLED1-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
+; AMDGPU-DISABLED1-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[ANYPTR_TBAA20]]
 ; AMDGPU-DISABLED1-NEXT:    call void @__omp_outlined__7(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]], ptr [[TMP3]]) #[[ATTR4]]
 ; AMDGPU-DISABLED1-NEXT:    ret void
 ;
-; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
-; AMDGPU-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
-; AMDGPU-DISABLED2-NEXT:  entry:
+; AMDGPU-DISABLED2-LABEL: define internal void @__omp_outlined__7_wrapper(
+; AMDGPU-DISABLED2-SAME: i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
+; AMDGPU-DISABLED2-NEXT:  [[ENTRY:.*:]]
 ; AMDGPU-DISABLED2-NEXT:    [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-DISABLED2-NEXT:    [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
 ; AMDGPU-DISABLED2-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
@@ -2236,13 +2236,13 @@ define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #1 {
 ; AMDGPU-DISABLED2-NEXT:    [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
 ; AMDGPU-DISABLED2-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
 ; AMDGPU-DISABLED2-NEXT:    [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[GLOBAL_ARGS]], align 8
-; AMDGPU-DISABLED2-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
+; AMDGPU-DISABLED2-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[ANYPTR_TBAA20]]
 ; AMDGPU-DISABLED2-NEXT:    call void @__omp_outlined__7(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]], ptr [[TMP3]]) #[[ATTR4]]
 ; AMDGPU-DISABLED2-NEXT:    ret void
 ;
-; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
-; NVPTX-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
-; NVPTX-DISABLED1-NEXT:  entry:
+; NVPTX-DISABLED1-LABEL: define internal void @__omp_outlined__7_wrapper(
+; NVPTX-DISABLED1-SAME: i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
+; NVPTX-DISABLED1-NEXT:  [[ENTRY:.*:]]
 ; NVPTX-DISABLED1-NEXT:    [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
 ; NVPTX-DISABLED1-NEXT:    [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
 ; NVPTX-DISABLED1-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
@@ -2251,13 +2251,13 @@ define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #1 {
 ; NVPTX-DISABLED1-NEXT:    [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
 ; NVPTX-DISABLED1-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
 ; NVPTX-DISABLED1-NEXT:    [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[GLOBAL_ARGS]], align 8
-; NVPTX-DISABLED1-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
+; NVPTX-DISABLED1-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[ANYPTR_TBAA20]]
 ; NVPTX-DISABLED1-NEXT:    call void @__omp_outlined__7(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]], ptr [[TMP3]]) #[[ATTR4]]
 ; NVPTX-DISABLED1-NEXT:    ret void
 ;
-; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
-; NVPTX-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
-; NVPTX-DISABLED2-NEXT:  entry:
+; NVPTX-DISABLED2-LABEL: define internal void @__omp_outlined__7_wrapper(
+; NVPTX-DISABLED2-SAME: i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
+; NVPTX-DISABLED2-NEXT:  [[ENTRY:.*:]]
 ; NVPTX-DISABLED2-NEXT:    [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
 ; NVPTX-DISABLED2-NEXT:    [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
 ; NVPTX-DISABLED2-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
@@ -2266,7 +2266,7 @@ define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #1 {
 ; NVPTX-DISABLED2-NEXT:    [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
 ; NVPTX-DISABLED2-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
 ; NVPTX-DISABLED2-NEXT:    [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[GLOBAL_ARGS]], align 8
-; NVPTX-DISABLED2-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
+; NVPTX-DISABLED2-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[ANYPTR_TBAA20]]
 ; NVPTX-DISABLED2-NEXT:    call void @__omp_outlined__7(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]], ptr [[TMP3]]) #[[ATTR4]]
 ; NVPTX-DISABLED2-NEXT:    ret void
 ;
@@ -2288,9 +2288,9 @@ entry:
 
 ; Function Attrs: alwaysinline convergent norecurse nounwind
 define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65() #0 {
-; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65
-; AMDGPU-SAME: () #[[ATTR0]] {
-; AMDGPU-NEXT:  entry:
+; AMDGPU-LABEL: define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65(
+; AMDGPU-SAME: ) #[[ATTR0]] {
+; AMDGPU-NEXT:  [[ENTRY:.*:]]
 ; AMDGPU-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-NEXT:    [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
@@ -2298,47 +2298,47 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_targe
 ; AMDGPU-NEXT:    [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
 ; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment, ptr null)
 ; AMDGPU-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
-; AMDGPU-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
-; AMDGPU:       is_worker_check:
+; AMDGPU-NEXT:    br i1 [[THREAD_IS_WORKER]], label %[[IS_WORKER_CHECK:.*]], label %[[THREAD_USER_CODE_CHECK:.*]]
+; AMDGPU:       [[IS_WORKER_CHECK]]:
 ; AMDGPU-NEXT:    [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
 ; AMDGPU-NEXT:    [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
 ; AMDGPU-NEXT:    [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
 ; AMDGPU-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
-; AMDGPU-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
-; AMDGPU:       worker_state_machine.begin:
+; AMDGPU-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label %[[WORKER_STATE_MACHINE_BEGIN:.*]], label %[[WORKER_STATE_MACHINE_FINISHED:.*]]
+; AMDGPU:       [[WORKER_STATE_MACHINE_BEGIN]]:
 ; AMDGPU-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
 ; AMDGPU-NEXT:    [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
 ; AMDGPU-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
 ; AMDGPU-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
 ; AMDGPU-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
-; AMDGPU-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
-; AMDGPU:       worker_state_machine.finished:
+; AMDGPU-NEXT:    br i1 [[WORKER_IS_DONE]], label %[[WORKER_STATE_MACHINE_FINISHED]], label %[[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:.*]]
+; AMDGPU:       [[WORKER_STATE_MACHINE_FINISHED]]:
 ; AMDGPU-NEXT:    ret void
-; AMDGPU:       worker_state_machine.is_active.check:
-; AMDGPU-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
-; AMDGPU:       worker_state_machine.parallel_region.fallback.execute:
+; AMDGPU:       [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK]]:
+; AMDGPU-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:.*]], label %[[WORKER_STATE_MACHINE_DONE_BARRIER:.*]]
+; AMDGPU:       [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE]]:
 ; AMDGPU-NEXT:    call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]])
-; AMDGPU-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
-; AMDGPU:       worker_state_machine.parallel_region.end:
+; AMDGPU-NEXT:    br label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_END:.*]]
+; AMDGPU:       [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]:
 ; AMDGPU-NEXT:    call void @__kmpc_kernel_end_parallel()
-; AMDGPU-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
-; AMDGPU:       worker_state_machine.done.barrier:
+; AMDGPU-NEXT:    br label %[[WORKER_STATE_MACHINE_DONE_BARRIER]]
+; AMDGPU:       [[WORKER_STATE_MACHINE_DONE_BARRIER]]:
 ; AMDGPU-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; AMDGPU-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
-; AMDGPU:       thread.user_code.check:
+; AMDGPU-NEXT:    br label %[[WORKER_STATE_MACHINE_BEGIN]]
+; AMDGPU:       [[THREAD_USER_CODE_CHECK]]:
 ; AMDGPU-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; AMDGPU-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
-; AMDGPU:       common.ret:
+; AMDGPU-NEXT:    br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]]
+; AMDGPU:       [[COMMON_RET]]:
 ; AMDGPU-NEXT:    ret void
-; AMDGPU:       user_code.entry:
+; AMDGPU:       [[USER_CODE_ENTRY]]:
 ; AMDGPU-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
 ; AMDGPU-NEXT:    call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
 ; AMDGPU-NEXT:    call void @__kmpc_target_deinit()
-; AMDGPU-NEXT:    br label [[COMMON_RET]]
+; AMDGPU-NEXT:    br label %[[COMMON_RET]]
 ;
-; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65
-; NVPTX-SAME: () #[[ATTR0]] {
-; NVPTX-NEXT:  entry:
+; NVPTX-LABEL: define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65(
+; NVPTX-SAME: ) #[[ATTR0]] {
+; NVPTX-NEXT:  [[ENTRY:.*:]]
 ; NVPTX-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
 ; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
 ; NVPTX-NEXT:    [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
@@ -2346,46 +2346,46 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_targe
 ; NVPTX-NEXT:    [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
 ; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment, ptr null)
 ; NVPTX-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
-; NVPTX-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
-; NVPTX:       is_worker_check:
+; NVPTX-NEXT:    br i1 [[THREAD_IS_WORKER]], label %[[IS_WORKER_CHECK:.*]], label %[[THREAD_USER_CODE_CHECK:.*]]
+; NVPTX:       [[IS_WORKER_CHECK]]:
 ; NVPTX-NEXT:    [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
 ; NVPTX-NEXT:    [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
 ; NVPTX-NEXT:    [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
 ; NVPTX-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
-; NVPTX-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
-; NVPTX:       worker_state_machine.begin:
+; NVPTX-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label %[[WORKER_STATE_MACHINE_BEGIN:.*]], label %[[WORKER_STATE_MACHINE_FINISHED:.*]]
+; NVPTX:       [[WORKER_STATE_MACHINE_BEGIN]]:
 ; NVPTX-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
 ; NVPTX-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
 ; NVPTX-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
 ; NVPTX-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
-; NVPTX-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
-; NVPTX:       worker_state_machine.finished:
+; NVPTX-NEXT:    br i1 [[WORKER_IS_DONE]], label %[[WORKER_STATE_MACHINE_FINISHED]], label %[[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:.*]]
+; NVPTX:       [[WORKER_STATE_MACHINE_FINISHED]]:
 ; NVPTX-NEXT:    ret void
-; NVPTX:       worker_state_machine.is_active.check:
-; NVPTX-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
-; NVPTX:       worker_state_machine.parallel_region.fallback.execute:
+; NVPTX:       [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK]]:
+; NVPTX-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:.*]], label %[[WORKER_STATE_MACHINE_DONE_BARRIER:.*]]
+; NVPTX:       [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE]]:
 ; NVPTX-NEXT:    call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]])
-; NVPTX-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
-; NVPTX:       worker_state_machine.parallel_region.end:
+; NVPTX-NEXT:    br label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_END:.*]]
+; NVPTX:       [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]:
 ; NVPTX-NEXT:    call void @__kmpc_kernel_end_parallel()
-; NVPTX-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
-; NVPTX:       worker_state_machine.done.barrier:
+; NVPTX-NEXT:    br label %[[WORKER_STATE_MACHINE_DONE_BARRIER]]
+; NVPTX:       [[WORKER_STATE_MACHINE_DONE_BARRIER]]:
 ; NVPTX-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; NVPTX-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
-; NVPTX:       thread.user_code.check:
+; NVPTX-NEXT:    br label %[[WORKER_STATE_MACHINE_BEGIN]]
+; NVPTX:       [[THREAD_USER_CODE_CHECK]]:
 ; NVPTX-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; NVPTX-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
-; NVPTX:       common.ret:
+; NVPTX-NEXT:    br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]]
+; NVPTX:       [[COMMON_RET]]:
 ; NVPTX-NEXT:    ret void
-; NVPTX:       user_code.entry:
+; NVPTX:       [[USER_CODE_ENTRY]]:
 ; NVPTX-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
 ; NVPTX-NEXT:    call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
 ; NVPTX-NEXT:    call void @__kmpc_target_deinit()
-; NVPTX-NEXT:    br label [[COMMON_RET]]
+; NVPTX-NEXT:    br label %[[COMMON_RET]]
 ;
-; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65
-; AMDGPU-DISABLED1-SAME: () #[[ATTR0]] {
-; AMDGPU-DISABLED1-NEXT:  entry:
+; AMDGPU-DISABLED1-LABEL: define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65(
+; AMDGPU-DISABLED1-SAME: ) #[[ATTR0]] {
+; AMDGPU-DISABLED1-NEXT:  [[ENTRY:.*:]]
 ; AMDGPU-DISABLED1-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-DISABLED1-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-DISABLED1-NEXT:    [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
@@ -2393,65 +2393,65 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_targe
 ; AMDGPU-DISABLED1-NEXT:    [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
 ; AMDGPU-DISABLED1-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment, ptr null)
 ; AMDGPU-DISABLED1-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
-; AMDGPU-DISABLED1-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
-; AMDGPU-DISABLED1:       is_worker_check:
+; AMDGPU-DISABLED1-NEXT:    br i1 [[THREAD_IS_WORKER]], label %[[IS_WORKER_CHECK:.*]], label %[[THREAD_USER_CODE_CHECK:.*]]
+; AMDGPU-DISABLED1:       [[IS_WORKER_CHECK]]:
 ; AMDGPU-DISABLED1-NEXT:    [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
 ; AMDGPU-DISABLED1-NEXT:    [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
 ; AMDGPU-DISABLED1-NEXT:    [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
 ; AMDGPU-DISABLED1-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
-; AMDGPU-DISABLED1-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
-; AMDGPU-DISABLED1:       worker_state_machine.begin:
+; AMDGPU-DISABLED1-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label %[[WORKER_STATE_MACHINE_BEGIN:.*]], label %[[WORKER_STATE_MACHINE_FINISHED:.*]]
+; AMDGPU-DISABLED1:       [[WORKER_STATE_MACHINE_BEGIN]]:
 ; AMDGPU-DISABLED1-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
 ; AMDGPU-DISABLED1-NEXT:    [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
 ; AMDGPU-DISABLED1-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
 ; AMDGPU-DISABLED1-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
 ; AMDGPU-DISABLED1-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
-; AMDGPU-DISABLED1-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
-; AMDGPU-DISABLED1:       worker_state_machine.finished:
+; AMDGPU-DISABLED1-NEXT:    br i1 [[WORKER_IS_DONE]], label %[[WORKER_STATE_MACHINE_FINISHED]], label %[[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:.*]]
+; AMDGPU-DISABLED1:       [[WORKER_STATE_MACHINE_FINISHED]]:
 ; AMDGPU-DISABLED1-NEXT:    ret void
-; AMDGPU-DISABLED1:       worker_state_machine.is_active.check:
-; AMDGPU-DISABLED1-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
-; AMDGPU-DISABLED1:       worker_state_machine.parallel_region.fallback.execute:
+; AMDGPU-DISABLED1:       [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK]]:
+; AMDGPU-DISABLED1-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:.*]], label %[[WORKER_STATE_MACHINE_DONE_BARRIER:.*]]
+; AMDGPU-DISABLED1:       [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE]]:
 ; AMDGPU-DISABLED1-NEXT:    call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]])
-; AMDGPU-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
-; AMDGPU-DISABLED1:       worker_state_machine.parallel_region.end:
+; AMDGPU-DISABLED1-NEXT:    br label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_END:.*]]
+; AMDGPU-DISABLED1:       [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]:
 ; AMDGPU-DISABLED1-NEXT:    call void @__kmpc_kernel_end_parallel()
-; AMDGPU-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
-; AMDGPU-DISABLED1:       worker_state_machine.done.barrier:
+; AMDGPU-DISABLED1-NEXT:    br label %[[WORKER_STATE_MACHINE_DONE_BARRIER]]
+; AMDGPU-DISABLED1:       [[WORKER_STATE_MACHINE_DONE_BARRIER]]:
 ; AMDGPU-DISABLED1-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; AMDGPU-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
-; AMDGPU-DISABLED1:       thread.user_code.check:
+; AMDGPU-DISABLED1-NEXT:    br label %[[WORKER_STATE_MACHINE_BEGIN]]
+; AMDGPU-DISABLED1:       [[THREAD_USER_CODE_CHECK]]:
 ; AMDGPU-DISABLED1-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; AMDGPU-DISABLED1-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
-; AMDGPU-DISABLED1:       common.ret:
+; AMDGPU-DISABLED1-NEXT:    br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]]
+; AMDGPU-DISABLED1:       [[COMMON_RET]]:
 ; AMDGPU-DISABLED1-NEXT:    ret void
-; AMDGPU-DISABLED1:       user_code.entry:
+; AMDGPU-DISABLED1:       [[USER_CODE_ENTRY]]:
 ; AMDGPU-DISABLED1-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
 ; AMDGPU-DISABLED1-NEXT:    call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
 ; AMDGPU-DISABLED1-NEXT:    call void @__kmpc_target_deinit()
-; AMDGPU-DISABLED1-NEXT:    br label [[COMMON_RET]]
+; AMDGPU-DISABLED1-NEXT:    br label %[[COMMON_RET]]
 ;
-; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65
-; AMDGPU-DISABLED2-SAME: () #[[ATTR0]] {
-; AMDGPU-DISABLED2-NEXT:  entry:
+; AMDGPU-DISABLED2-LABEL: define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65(
+; AMDGPU-DISABLED2-SAME: ) #[[ATTR0]] {
+; AMDGPU-DISABLED2-NEXT:  [[ENTRY:.*:]]
 ; AMDGPU-DISABLED2-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-DISABLED2-NEXT:    [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
 ; AMDGPU-DISABLED2-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-DISABLED2-NEXT:    [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
 ; AMDGPU-DISABLED2-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment, ptr null)
 ; AMDGPU-DISABLED2-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; AMDGPU-DISABLED2-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
-; AMDGPU-DISABLED2:       common.ret:
+; AMDGPU-DISABLED2-NEXT:    br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]]
+; AMDGPU-DISABLED2:       [[COMMON_RET]]:
 ; AMDGPU-DISABLED2-NEXT:    ret void
-; AMDGPU-DISABLED2:       user_code.entry:
+; AMDGPU-DISABLED2:       [[USER_CODE_ENTRY]]:
 ; AMDGPU-DISABLED2-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
 ; AMDGPU-DISABLED2-NEXT:    call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
 ; AMDGPU-DISABLED2-NEXT:    call void @__kmpc_target_deinit()
-; AMDGPU-DISABLED2-NEXT:    br label [[COMMON_RET]]
+; AMDGPU-DISABLED2-NEXT:    br label %[[COMMON_RET]]
 ;
-; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65
-; NVPTX-DISABLED1-SAME: () #[[ATTR0]] {
-; NVPTX-DISABLED1-NEXT:  entry:
+; NVPTX-DISABLED1-LABEL: define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65(
+; NVPTX-DISABLED1-SAME: ) #[[ATTR0]] {
+; NVPTX-DISABLED1-NEXT:  [[ENTRY:.*:]]
 ; NVPTX-DISABLED1-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
 ; NVPTX-DISABLED1-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
 ; NVPTX-DISABLED1-NEXT:    [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
@@ -2459,60 +2459,60 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_targe
 ; NVPTX-DISABLED1-NEXT:    [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
 ; NVPTX-DISABLED1-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment, ptr null)
 ; NVPTX-DISABLED1-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
-; NVPTX-DISABLED1-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
-; NVPTX-DISABLED1:       is_worker_check:
+; NVPTX-DISABLED1-NEXT:    br i1 [[THREAD_IS_WORKER]], label %[[IS_WORKER_CHECK:.*]], label %[[THREAD_USER_CODE_CHECK:.*]]
+; NVPTX-DISABLED1:       [[IS_WORKER_CHECK]]:
 ; NVPTX-DISABLED1-NEXT:    [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
 ; NVPTX-DISABLED1-NEXT:    [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
 ; NVPTX-DISABLED1-NEXT:    [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
 ; NVPTX-DISABLED1-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
-; NVPTX-DISABLED1-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
-; NVPTX-DISABLED1:       worker_state_machine.begin:
+; NVPTX-DISABLED1-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label %[[WORKER_STATE_MACHINE_BEGIN:.*]], label %[[WORKER_STATE_MACHINE_FINISHED:.*]]
+; NVPTX-DISABLED1:       [[WORKER_STATE_MACHINE_BEGIN]]:
 ; NVPTX-DISABLED1-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
 ; NVPTX-DISABLED1-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
 ; NVPTX-DISABLED1-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
 ; NVPTX-DISABLED1-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
-; NVPTX-DISABLED1-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
-; NVPTX-DISABLED1:       worker_state_machine.finished:
+; NVPTX-DISABLED1-NEXT:    br i1 [[WORKER_IS_DONE]], label %[[WORKER_STATE_MACHINE_FINISHED]], label %[[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:.*]]
+; NVPTX-DISABLED1:       [[WORKER_STATE_MACHINE_FINISHED]]:
 ; NVPTX-DISABLED1-NEXT:    ret void
-; NVPTX-DISABLED1:       worker_state_machine.is_active.check:
-; NVPTX-DISABLED1-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
-; NVPTX-DISABLED1:       worker_state_machine.parallel_region.fallback.execute:
+; NVPTX-DISABLED1:       [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK]]:
+; NVPTX-DISABLED1-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:.*]], label %[[WORKER_STATE_MACHINE_DONE_BARRIER:.*]]
+; NVPTX-DISABLED1:       [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE]]:
 ; NVPTX-DISABLED1-NEXT:    call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]])
-; NVPTX-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
-; NVPTX-DISABLED1:       worker_state_machine.parallel_region.end:
+; NVPTX-DISABLED1-NEXT:    br label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_END:.*]]
+; NVPTX-DISABLED1:       [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]:
 ; NVPTX-DISABLED1-NEXT:    call void @__kmpc_kernel_end_parallel()
-; NVPTX-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
-; NVPTX-DISABLED1:       worker_state_machine.done.barrier:
+; NVPTX-DISABLED1-NEXT:    br label %[[WORKER_STATE_MACHINE_DONE_BARRIER]]
+; NVPTX-DISABLED1:       [[WORKER_STATE_MACHINE_DONE_BARRIER]]:
 ; NVPTX-DISABLED1-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; NVPTX-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
-; NVPTX-DISABLED1:       thread.user_code.check:
+; NVPTX-DISABLED1-NEXT:    br label %[[WORKER_STATE_MACHINE_BEGIN]]
+; NVPTX-DISABLED1:       [[THREAD_USER_CODE_CHECK]]:
 ; NVPTX-DISABLED1-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; NVPTX-DISABLED1-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
-; NVPTX-DISABLED1:       common.ret:
+; NVPTX-DISABLED1-NEXT:    br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]]
+; NVPTX-DISABLED1:       [[COMMON_RET]]:
 ; NVPTX-DISABLED1-NEXT:    ret void
-; NVPTX-DISABLED1:       user_code.entry:
+; NVPTX-DISABLED1:       [[USER_CODE_ENTRY]]:
 ; NVPTX-DISABLED1-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
 ; NVPTX-DISABLED1-NEXT:    call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
 ; NVPTX-DISABLED1-NEXT:    call void @__kmpc_target_deinit()
-; NVPTX-DISABLED1-NEXT:    br label [[COMMON_RET]]
+; NVPTX-DISABLED1-NEXT:    br label %[[COMMON_RET]]
 ;
-; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65
-; NVPTX-DISABLED2-SAME: () #[[ATTR0]] {
-; NVPTX-DISABLED2-NEXT:  entry:
+; NVPTX-DISABLED2-LABEL: define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65(
+; NVPTX-DISABLED2-SAME: ) #[[ATTR0]] {
+; NVPTX-DISABLED2-NEXT:  [[ENTRY:.*:]]
 ; NVPTX-DISABLED2-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
 ; NVPTX-DISABLED2-NEXT:    [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
 ; NVPTX-DISABLED2-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
 ; NVPTX-DISABLED2-NEXT:    [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
 ; NVPTX-DISABLED2-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment, ptr null)
 ; NVPTX-DISABLED2-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; NVPTX-DISABLED2-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
-; NVPTX-DISABLED2:       common.ret:
+; NVPTX-DISABLED2-NEXT:    br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]]
+; NVPTX-DISABLED2:       [[COMMON_RET]]:
 ; NVPTX-DISABLED2-NEXT:    ret void
-; NVPTX-DISABLED2:       user_code.entry:
+; NVPTX-DISABLED2:       [[USER_CODE_ENTRY]]:
 ; NVPTX-DISABLED2-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
 ; NVPTX-DISABLED2-NEXT:    call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
 ; NVPTX-DISABLED2-NEXT:    call void @__kmpc_target_deinit()
-; NVPTX-DISABLED2-NEXT:    br label [[COMMON_RET]]
+; NVPTX-DISABLED2-NEXT:    br label %[[COMMON_RET]]
 ;
 entry:
   %.zero.addr = alloca ptr, align 8, addrspace(5)
@@ -2536,39 +2536,39 @@ user_code.entry:                                  ; preds = %entry
 }
 
 define internal void @__omp_outlined__8(ptr noalias %.global_tid., ptr noalias %.bound_tid.) {
-; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__8
-; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
-; AMDGPU-NEXT:  entry:
+; AMDGPU-LABEL: define internal void @__omp_outlined__8(
+; AMDGPU-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
+; AMDGPU-NEXT:  [[ENTRY:.*:]]
 ; AMDGPU-NEXT:    call void @unknown() #[[ATTR8]]
 ; AMDGPU-NEXT:    ret void
 ;
-; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__8
-; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
-; NVPTX-NEXT:  entry:
+; NVPTX-LABEL: define internal void @__omp_outlined__8(
+; NVPTX-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
+; NVPTX-NEXT:  [[ENTRY:.*:]]
 ; NVPTX-NEXT:    call void @unknown() #[[ATTR8]]
 ; NVPTX-NEXT:    ret void
 ;
-; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__8
-; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
-; AMDGPU-DISABLED1-NEXT:  entry:
+; AMDGPU-DISABLED1-LABEL: define internal void @__omp_outlined__8(
+; AMDGPU-DISABLED1-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
+; AMDGPU-DISABLED1-NEXT:  [[ENTRY:.*:]]
 ; AMDGPU-DISABLED1-NEXT:    call void @unknown() #[[ATTR8]]
 ; AMDGPU-DISABLED1-NEXT:    ret void
 ;
-; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__8
-; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
-; AMDGPU-DISABLED2-NEXT:  entry:
+; AMDGPU-DISABLED2-LABEL: define internal void @__omp_outlined__8(
+; AMDGPU-DISABLED2-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
+; AMDGPU-DISABLED2-NEXT:  [[ENTRY:.*:]]
 ; AMDGPU-DISABLED2-NEXT:    call void @unknown() #[[ATTR8]]
 ; AMDGPU-DISABLED2-NEXT:    ret void
 ;
-; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__8
-; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
-; NVPTX-DISABLED1-NEXT:  entry:
+; NVPTX-DISABLED1-LABEL: define internal void @__omp_outlined__8(
+; NVPTX-DISABLED1-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
+; NVPTX-DISABLED1-NEXT:  [[ENTRY:.*:]]
 ; NVPTX-DISABLED1-NEXT:    call void @unknown() #[[ATTR8]]
 ; NVPTX-DISABLED1-NEXT:    ret void
 ;
-; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__8
-; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
-; NVPTX-DISABLED2-NEXT:  entry:
+; NVPTX-DISABLED2-LABEL: define internal void @__omp_outlined__8(
+; NVPTX-DISABLED2-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
+; NVPTX-DISABLED2-NEXT:  [[ENTRY:.*:]]
 ; NVPTX-DISABLED2-NEXT:    call void @unknown() #[[ATTR8]]
 ; NVPTX-DISABLED2-NEXT:    ret void
 ;
@@ -2579,255 +2579,255 @@ entry:
 
 ; Function Attrs: alwaysinline convergent norecurse nounwind
 define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74() #0 {
-; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74
-; AMDGPU-SAME: () #[[ATTR0]] {
-; AMDGPU-NEXT:  entry:
+; AMDGPU-LABEL: define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74(
+; AMDGPU-SAME: ) #[[ATTR0]] {
+; AMDGPU-NEXT:  [[ENTRY:.*:]]
 ; AMDGPU-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
 ; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment, ptr null)
 ; AMDGPU-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
-; AMDGPU-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
-; AMDGPU:       is_worker_check:
+; AMDGPU-NEXT:    br i1 [[THREAD_IS_WORKER]], label %[[IS_WORKER_CHECK:.*]], label %[[THREAD_USER_CODE_CHECK:.*]]
+; AMDGPU:       [[IS_WORKER_CHECK]]:
 ; AMDGPU-NEXT:    [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
 ; AMDGPU-NEXT:    [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
 ; AMDGPU-NEXT:    [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
 ; AMDGPU-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
-; AMDGPU-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
-; AMDGPU:       worker_state_machine.begin:
+; AMDGPU-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label %[[WORKER_STATE_MACHINE_BEGIN:.*]], label %[[WORKER_STATE_MACHINE_FINISHED:.*]]
+; AMDGPU:       [[WORKER_STATE_MACHINE_BEGIN]]:
 ; AMDGPU-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
 ; AMDGPU-NEXT:    [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
 ; AMDGPU-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
 ; AMDGPU-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
 ; AMDGPU-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
-; AMDGPU-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
-; AMDGPU:       worker_state_machine.finished:
+; AMDGPU-NEXT:    br i1 [[WORKER_IS_DONE]], label %[[WORKER_STATE_MACHINE_FINISHED]], label %[[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:.*]]
+; AMDGPU:       [[WORKER_STATE_MACHINE_FINISHED]]:
 ; AMDGPU-NEXT:    ret void
-; AMDGPU:       worker_state_machine.is_active.check:
-; AMDGPU-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
-; AMDGPU:       worker_state_machine.parallel_region.check:
+; AMDGPU:       [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK]]:
+; AMDGPU-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:.*]], label %[[WORKER_STATE_MACHINE_DONE_BARRIER:.*]]
+; AMDGPU:       [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK]]:
 ; AMDGPU-NEXT:    [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__9_wrapper.ID
-; AMDGPU-NEXT:    br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
-; AMDGPU:       worker_state_machine.parallel_region.execute:
+; AMDGPU-NEXT:    br i1 [[WORKER_CHECK_PARALLEL_REGION]], label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:.*]], label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:.*]]
+; AMDGPU:       [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE]]:
 ; AMDGPU-NEXT:    call void @__omp_outlined__9_wrapper(i16 0, i32 [[TMP0]])
-; AMDGPU-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
-; AMDGPU:       worker_state_machine.parallel_region.fallback.execute:
+; AMDGPU-NEXT:    br label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_END:.*]]
+; AMDGPU:       [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE]]:
 ; AMDGPU-NEXT:    call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]])
-; AMDGPU-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
-; AMDGPU:       worker_state_machine.parallel_region.end:
+; AMDGPU-NEXT:    br label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
+; AMDGPU:       [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]:
 ; AMDGPU-NEXT:    call void @__kmpc_kernel_end_parallel()
-; AMDGPU-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
-; AMDGPU:       worker_state_machine.done.barrier:
+; AMDGPU-NEXT:    br label %[[WORKER_STATE_MACHINE_DONE_BARRIER]]
+; AMDGPU:       [[WORKER_STATE_MACHINE_DONE_BARRIER]]:
 ; AMDGPU-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; AMDGPU-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
-; AMDGPU:       thread.user_code.check:
+; AMDGPU-NEXT:    br label %[[WORKER_STATE_MACHINE_BEGIN]]
+; AMDGPU:       [[THREAD_USER_CODE_CHECK]]:
 ; AMDGPU-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; AMDGPU-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
-; AMDGPU:       common.ret:
+; AMDGPU-NEXT:    br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]]
+; AMDGPU:       [[COMMON_RET]]:
 ; AMDGPU-NEXT:    ret void
-; AMDGPU:       user_code.entry:
+; AMDGPU:       [[USER_CODE_ENTRY]]:
 ; AMDGPU-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
 ; AMDGPU-NEXT:    [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR4]]
 ; AMDGPU-NEXT:    [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR4]]
 ; AMDGPU-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
 ; AMDGPU-NEXT:    call void @__kmpc_target_deinit()
-; AMDGPU-NEXT:    br label [[COMMON_RET]]
+; AMDGPU-NEXT:    br label %[[COMMON_RET]]
 ;
-; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74
-; NVPTX-SAME: () #[[ATTR0]] {
-; NVPTX-NEXT:  entry:
+; NVPTX-LABEL: define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74(
+; NVPTX-SAME: ) #[[ATTR0]] {
+; NVPTX-NEXT:  [[ENTRY:.*:]]
 ; NVPTX-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
 ; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
 ; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
 ; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment, ptr null)
 ; NVPTX-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
-; NVPTX-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
-; NVPTX:       is_worker_check:
+; NVPTX-NEXT:    br i1 [[THREAD_IS_WORKER]], label %[[IS_WORKER_CHECK:.*]], label %[[THREAD_USER_CODE_CHECK:.*]]
+; NVPTX:       [[IS_WORKER_CHECK]]:
 ; NVPTX-NEXT:    [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
 ; NVPTX-NEXT:    [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
 ; NVPTX-NEXT:    [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
 ; NVPTX-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
-; NVPTX-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
-; NVPTX:       worker_state_machine.begin:
+; NVPTX-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label %[[WORKER_STATE_MACHINE_BEGIN:.*]], label %[[WORKER_STATE_MACHINE_FINISHED:.*]]
+; NVPTX:       [[WORKER_STATE_MACHINE_BEGIN]]:
 ; NVPTX-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
 ; NVPTX-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
 ; NVPTX-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
 ; NVPTX-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
-; NVPTX-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
-; NVPTX:       worker_state_machine.finished:
+; NVPTX-NEXT:    br i1 [[WORKER_IS_DONE]], label %[[WORKER_STATE_MACHINE_FINISHED]], label %[[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:.*]]
+; NVPTX:       [[WORKER_STATE_MACHINE_FINISHED]]:
 ; NVPTX-NEXT:    ret void
-; NVPTX:       worker_state_machine.is_active.check:
-; NVPTX-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
-; NVPTX:       worker_state_machine.parallel_region.check:
+; NVPTX:       [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK]]:
+; NVPTX-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:.*]], label %[[WORKER_STATE_MACHINE_DONE_BARRIER:.*]]
+; NVPTX:       [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK]]:
 ; NVPTX-NEXT:    [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__9_wrapper.ID
-; NVPTX-NEXT:    br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
-; NVPTX:       worker_state_machine.parallel_region.execute:
+; NVPTX-NEXT:    br i1 [[WORKER_CHECK_PARALLEL_REGION]], label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:.*]], label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:.*]]
+; NVPTX:       [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE]]:
 ; NVPTX-NEXT:    call void @__omp_outlined__9_wrapper(i16 0, i32 [[TMP0]])
-; NVPTX-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
-; NVPTX:       worker_state_machine.parallel_region.fallback.execute:
+; NVPTX-NEXT:    br label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_END:.*]]
+; NVPTX:       [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE]]:
 ; NVPTX-NEXT:    call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]])
-; NVPTX-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
-; NVPTX:       worker_state_machine.parallel_region.end:
+; NVPTX-NEXT:    br label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
+; NVPTX:       [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]:
 ; NVPTX-NEXT:    call void @__kmpc_kernel_end_parallel()
-; NVPTX-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
-; NVPTX:       worker_state_machine.done.barrier:
+; NVPTX-NEXT:    br label %[[WORKER_STATE_MACHINE_DONE_BARRIER]]
+; NVPTX:       [[WORKER_STATE_MACHINE_DONE_BARRIER]]:
 ; NVPTX-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; NVPTX-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
-; NVPTX:       thread.user_code.check:
+; NVPTX-NEXT:    br label %[[WORKER_STATE_MACHINE_BEGIN]]
+; NVPTX:       [[THREAD_USER_CODE_CHECK]]:
 ; NVPTX-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; NVPTX-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
-; NVPTX:       common.ret:
+; NVPTX-NEXT:    br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]]
+; NVPTX:       [[COMMON_RET]]:
 ; NVPTX-NEXT:    ret void
-; NVPTX:       user_code.entry:
+; NVPTX:       [[USER_CODE_ENTRY]]:
 ; NVPTX-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
 ; NVPTX-NEXT:    [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR4]]
 ; NVPTX-NEXT:    [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR4]]
 ; NVPTX-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
 ; NVPTX-NEXT:    call void @__kmpc_target_deinit()
-; NVPTX-NEXT:    br label [[COMMON_RET]]
+; NVPTX-NEXT:    br label %[[COMMON_RET]]
 ;
-; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74
-; AMDGPU-DISABLED1-SAME: () #[[ATTR0]] {
-; AMDGPU-DISABLED1-NEXT:  entry:
+; AMDGPU-DISABLED1-LABEL: define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74(
+; AMDGPU-DISABLED1-SAME: ) #[[ATTR0]] {
+; AMDGPU-DISABLED1-NEXT:  [[ENTRY:.*:]]
 ; AMDGPU-DISABLED1-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-DISABLED1-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-DISABLED1-NEXT:    [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
 ; AMDGPU-DISABLED1-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment, ptr null)
 ; AMDGPU-DISABLED1-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
-; AMDGPU-DISABLED1-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
-; AMDGPU-DISABLED1:       is_worker_check:
+; AMDGPU-DISABLED1-NEXT:    br i1 [[THREAD_IS_WORKER]], label %[[IS_WORKER_CHECK:.*]], label %[[THREAD_USER_CODE_CHECK:.*]]
+; AMDGPU-DISABLED1:       [[IS_WORKER_CHECK]]:
 ; AMDGPU-DISABLED1-NEXT:    [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
 ; AMDGPU-DISABLED1-NEXT:    [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
 ; AMDGPU-DISABLED1-NEXT:    [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
 ; AMDGPU-DISABLED1-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
-; AMDGPU-DISABLED1-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
-; AMDGPU-DISABLED1:       worker_state_machine.begin:
+; AMDGPU-DISABLED1-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label %[[WORKER_STATE_MACHINE_BEGIN:.*]], label %[[WORKER_STATE_MACHINE_FINISHED:.*]]
+; AMDGPU-DISABLED1:       [[WORKER_STATE_MACHINE_BEGIN]]:
 ; AMDGPU-DISABLED1-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
 ; AMDGPU-DISABLED1-NEXT:    [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
 ; AMDGPU-DISABLED1-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
 ; AMDGPU-DISABLED1-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
 ; AMDGPU-DISABLED1-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
-; AMDGPU-DISABLED1-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
-; AMDGPU-DISABLED1:       worker_state_machine.finished:
+; AMDGPU-DISABLED1-NEXT:    br i1 [[WORKER_IS_DONE]], label %[[WORKER_STATE_MACHINE_FINISHED]], label %[[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:.*]]
+; AMDGPU-DISABLED1:       [[WORKER_STATE_MACHINE_FINISHED]]:
 ; AMDGPU-DISABLED1-NEXT:    ret void
-; AMDGPU-DISABLED1:       worker_state_machine.is_active.check:
-; AMDGPU-DISABLED1-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
-; AMDGPU-DISABLED1:       worker_state_machine.parallel_region.check:
+; AMDGPU-DISABLED1:       [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK]]:
+; AMDGPU-DISABLED1-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:.*]], label %[[WORKER_STATE_MACHINE_DONE_BARRIER:.*]]
+; AMDGPU-DISABLED1:       [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK]]:
 ; AMDGPU-DISABLED1-NEXT:    [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__9_wrapper.ID
-; AMDGPU-DISABLED1-NEXT:    br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
-; AMDGPU-DISABLED1:       worker_state_machine.parallel_region.execute:
+; AMDGPU-DISABLED1-NEXT:    br i1 [[WORKER_CHECK_PARALLEL_REGION]], label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:.*]], label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:.*]]
+; AMDGPU-DISABLED1:       [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE]]:
 ; AMDGPU-DISABLED1-NEXT:    call void @__omp_outlined__9_wrapper(i16 0, i32 [[TMP0]])
-; AMDGPU-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
-; AMDGPU-DISABLED1:       worker_state_machine.parallel_region.fallback.execute:
+; AMDGPU-DISABLED1-NEXT:    br label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_END:.*]]
+; AMDGPU-DISABLED1:       [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE]]:
 ; AMDGPU-DISABLED1-NEXT:    call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]])
-; AMDGPU-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
-; AMDGPU-DISABLED1:       worker_state_machine.parallel_region.end:
+; AMDGPU-DISABLED1-NEXT:    br label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
+; AMDGPU-DISABLED1:       [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]:
 ; AMDGPU-DISABLED1-NEXT:    call void @__kmpc_kernel_end_parallel()
-; AMDGPU-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
-; AMDGPU-DISABLED1:       worker_state_machine.done.barrier:
+; AMDGPU-DISABLED1-NEXT:    br label %[[WORKER_STATE_MACHINE_DONE_BARRIER]]
+; AMDGPU-DISABLED1:       [[WORKER_STATE_MACHINE_DONE_BARRIER]]:
 ; AMDGPU-DISABLED1-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; AMDGPU-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
-; AMDGPU-DISABLED1:       thread.user_code.check:
+; AMDGPU-DISABLED1-NEXT:    br label %[[WORKER_STATE_MACHINE_BEGIN]]
+; AMDGPU-DISABLED1:       [[THREAD_USER_CODE_CHECK]]:
 ; AMDGPU-DISABLED1-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; AMDGPU-DISABLED1-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
-; AMDGPU-DISABLED1:       common.ret:
+; AMDGPU-DISABLED1-NEXT:    br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]]
+; AMDGPU-DISABLED1:       [[COMMON_RET]]:
 ; AMDGPU-DISABLED1-NEXT:    ret void
-; AMDGPU-DISABLED1:       user_code.entry:
+; AMDGPU-DISABLED1:       [[USER_CODE_ENTRY]]:
 ; AMDGPU-DISABLED1-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
 ; AMDGPU-DISABLED1-NEXT:    [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR4]]
 ; AMDGPU-DISABLED1-NEXT:    [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR4]]
 ; AMDGPU-DISABLED1-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
 ; AMDGPU-DISABLED1-NEXT:    call void @__kmpc_target_deinit()
-; AMDGPU-DISABLED1-NEXT:    br label [[COMMON_RET]]
+; AMDGPU-DISABLED1-NEXT:    br label %[[COMMON_RET]]
 ;
-; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74
-; AMDGPU-DISABLED2-SAME: () #[[ATTR0]] {
-; AMDGPU-DISABLED2-NEXT:  entry:
+; AMDGPU-DISABLED2-LABEL: define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74(
+; AMDGPU-DISABLED2-SAME: ) #[[ATTR0]] {
+; AMDGPU-DISABLED2-NEXT:  [[ENTRY:.*:]]
 ; AMDGPU-DISABLED2-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-DISABLED2-NEXT:    [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
 ; AMDGPU-DISABLED2-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment, ptr null)
 ; AMDGPU-DISABLED2-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; AMDGPU-DISABLED2-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
-; AMDGPU-DISABLED2:       common.ret:
+; AMDGPU-DISABLED2-NEXT:    br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]]
+; AMDGPU-DISABLED2:       [[COMMON_RET]]:
 ; AMDGPU-DISABLED2-NEXT:    ret void
-; AMDGPU-DISABLED2:       user_code.entry:
+; AMDGPU-DISABLED2:       [[USER_CODE_ENTRY]]:
 ; AMDGPU-DISABLED2-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
 ; AMDGPU-DISABLED2-NEXT:    [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR4]]
 ; AMDGPU-DISABLED2-NEXT:    [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR4]]
 ; AMDGPU-DISABLED2-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
 ; AMDGPU-DISABLED2-NEXT:    call void @__kmpc_target_deinit()
-; AMDGPU-DISABLED2-NEXT:    br label [[COMMON_RET]]
+; AMDGPU-DISABLED2-NEXT:    br label %[[COMMON_RET]]
 ;
-; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74
-; NVPTX-DISABLED1-SAME: () #[[ATTR0]] {
-; NVPTX-DISABLED1-NEXT:  entry:
+; NVPTX-DISABLED1-LABEL: define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74(
+; NVPTX-DISABLED1-SAME: ) #[[ATTR0]] {
+; NVPTX-DISABLED1-NEXT:  [[ENTRY:.*:]]
 ; NVPTX-DISABLED1-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
 ; NVPTX-DISABLED1-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
 ; NVPTX-DISABLED1-NEXT:    [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
 ; NVPTX-DISABLED1-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment, ptr null)
 ; NVPTX-DISABLED1-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
-; NVPTX-DISABLED1-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
-; NVPTX-DISABLED1:       is_worker_check:
+; NVPTX-DISABLED1-NEXT:    br i1 [[THREAD_IS_WORKER]], label %[[IS_WORKER_CHECK:.*]], label %[[THREAD_USER_CODE_CHECK:.*]]
+; NVPTX-DISABLED1:       [[IS_WORKER_CHECK]]:
 ; NVPTX-DISABLED1-NEXT:    [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
 ; NVPTX-DISABLED1-NEXT:    [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
 ; NVPTX-DISABLED1-NEXT:    [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
 ; NVPTX-DISABLED1-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
-; NVPTX-DISABLED1-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
-; NVPTX-DISABLED1:       worker_state_machine.begin:
+; NVPTX-DISABLED1-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label %[[WORKER_STATE_MACHINE_BEGIN:.*]], label %[[WORKER_STATE_MACHINE_FINISHED:.*]]
+; NVPTX-DISABLED1:       [[WORKER_STATE_MACHINE_BEGIN]]:
 ; NVPTX-DISABLED1-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
 ; NVPTX-DISABLED1-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
 ; NVPTX-DISABLED1-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
 ; NVPTX-DISABLED1-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
-; NVPTX-DISABLED1-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
-; NVPTX-DISABLED1:       worker_state_machine.finished:
+; NVPTX-DISABLED1-NEXT:    br i1 [[WORKER_IS_DONE]], label %[[WORKER_STATE_MACHINE_FINISHED]], label %[[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:.*]]
+; NVPTX-DISABLED1:       [[WORKER_STATE_MACHINE_FINISHED]]:
 ; NVPTX-DISABLED1-NEXT:    ret void
-; NVPTX-DISABLED1:       worker_state_machine.is_active.check:
-; NVPTX-DISABLED1-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
-; NVPTX-DISABLED1:       worker_state_machine.parallel_region.check:
+; NVPTX-DISABLED1:       [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK]]:
+; NVPTX-DISABLED1-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:.*]], label %[[WORKER_STATE_MACHINE_DONE_BARRIER:.*]]
+; NVPTX-DISABLED1:       [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK]]:
 ; NVPTX-DISABLED1-NEXT:    [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__9_wrapper.ID
-; NVPTX-DISABLED1-NEXT:    br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
-; NVPTX-DISABLED1:       worker_state_machine.parallel_region.execute:
+; NVPTX-DISABLED1-NEXT:    br i1 [[WORKER_CHECK_PARALLEL_REGION]], label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:.*]], label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:.*]]
+; NVPTX-DISABLED1:       [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE]]:
 ; NVPTX-DISABLED1-NEXT:    call void @__omp_outlined__9_wrapper(i16 0, i32 [[TMP0]])
-; NVPTX-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
-; NVPTX-DISABLED1:       worker_state_machine.parallel_region.fallback.execute:
+; NVPTX-DISABLED1-NEXT:    br label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_END:.*]]
+; NVPTX-DISABLED1:       [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE]]:
 ; NVPTX-DISABLED1-NEXT:    call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]])
-; NVPTX-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
-; NVPTX-DISABLED1:       worker_state_machine.parallel_region.end:
+; NVPTX-DISABLED1-NEXT:    br label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
+; NVPTX-DISABLED1:       [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]:
 ; NVPTX-DISABLED1-NEXT:    call void @__kmpc_kernel_end_parallel()
-; NVPTX-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
-; NVPTX-DISABLED1:       worker_state_machine.done.barrier:
+; NVPTX-DISABLED1-NEXT:    br label %[[WORKER_STATE_MACHINE_DONE_BARRIER]]
+; NVPTX-DISABLED1:       [[WORKER_STATE_MACHINE_DONE_BARRIER]]:
 ; NVPTX-DISABLED1-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; NVPTX-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
-; NVPTX-DISABLED1:       thread.user_code.check:
+; NVPTX-DISABLED1-NEXT:    br label %[[WORKER_STATE_MACHINE_BEGIN]]
+; NVPTX-DISABLED1:       [[THREAD_USER_CODE_CHECK]]:
 ; NVPTX-DISABLED1-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; NVPTX-DISABLED1-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
-; NVPTX-DISABLED1:       common.ret:
+; NVPTX-DISABLED1-NEXT:    br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]]
+; NVPTX-DISABLED1:       [[COMMON_RET]]:
 ; NVPTX-DISABLED1-NEXT:    ret void
-; NVPTX-DISABLED1:       user_code.entry:
+; NVPTX-DISABLED1:       [[USER_CODE_ENTRY]]:
 ; NVPTX-DISABLED1-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
 ; NVPTX-DISABLED1-NEXT:    [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR4]]
 ; NVPTX-DISABLED1-NEXT:    [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR4]]
 ; NVPTX-DISABLED1-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
 ; NVPTX-DISABLED1-NEXT:    call void @__kmpc_target_deinit()
-; NVPTX-DISABLED1-NEXT:    br label [[COMMON_RET]]
+; NVPTX-DISABLED1-NEXT:    br label %[[COMMON_RET]]
 ;
-; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74
-; NVPTX-DISABLED2-SAME: () #[[ATTR0]] {
-; NVPTX-DISABLED2-NEXT:  entry:
+; NVPTX-DISABLED2-LABEL: define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74(
+; NVPTX-DISABLED2-SAME: ) #[[ATTR0]] {
+; NVPTX-DISABLED2-NEXT:  [[ENTRY:.*:]]
 ; NVPTX-DISABLED2-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
 ; NVPTX-DISABLED2-NEXT:    [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
 ; NVPTX-DISABLED2-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment, ptr null)
 ; NVPTX-DISABLED2-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; NVPTX-DISABLED2-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
-; NVPTX-DISABLED2:       common.ret:
+; NVPTX-DISABLED2-NEXT:    br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]]
+; NVPTX-DISABLED2:       [[COMMON_RET]]:
 ; NVPTX-DISABLED2-NEXT:    ret void
-; NVPTX-DISABLED2:       user_code.entry:
+; NVPTX-DISABLED2:       [[USER_CODE_ENTRY]]:
 ; NVPTX-DISABLED2-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
 ; NVPTX-DISABLED2-NEXT:    [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR4]]
 ; NVPTX-DISABLED2-NEXT:    [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR4]]
 ; NVPTX-DISABLED2-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
 ; NVPTX-DISABLED2-NEXT:    call void @__kmpc_target_deinit()
-; NVPTX-DISABLED2-NEXT:    br label [[COMMON_RET]]
+; NVPTX-DISABLED2-NEXT:    br label %[[COMMON_RET]]
 ;
 entry:
   %captured_vars_addrs = alloca ptr, align 8, addrspace(5)
@@ -2850,39 +2850,39 @@ user_code.entry:                                  ; preds = %entry
 
 ; Function Attrs: alwaysinline convergent nounwind
 define internal void @.omp_outlined.(i32 %.global_tid., ptr noalias %.part_id., ptr noalias %.privates., ptr noalias %.copy_fn., ptr %.task_t., ptr noalias %__context) #2 {
-; AMDGPU-LABEL: define {{[^@]+}}@.omp_outlined.
-; AMDGPU-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] {
-; AMDGPU-NEXT:  entry:
+; AMDGPU-LABEL: define internal void @.omp_outlined.(
+; AMDGPU-SAME: i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] {
+; AMDGPU-NEXT:  [[ENTRY:.*:]]
 ; AMDGPU-NEXT:    call void @spmd_amenable() #[[ATTR7]]
 ; AMDGPU-NEXT:    ret void
 ;
-; NVPTX-LABEL: define {{[^@]+}}@.omp_outlined.
-; NVPTX-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] {
-; NVPTX-NEXT:  entry:
+; NVPTX-LABEL: define internal void @.omp_outlined.(
+; NVPTX-SAME: i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] {
+; NVPTX-NEXT:  [[ENTRY:.*:]]
 ; NVPTX-NEXT:    call void @spmd_amenable() #[[ATTR7]]
 ; NVPTX-NEXT:    ret void
 ;
-; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@.omp_outlined.
-; AMDGPU-DISABLED1-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] {
-; AMDGPU-DISABLED1-NEXT:  entry:
+; AMDGPU-DISABLED1-LABEL: define internal void @.omp_outlined.(
+; AMDGPU-DISABLED1-SAME: i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] {
+; AMDGPU-DISABLED1-NEXT:  [[ENTRY:.*:]]
 ; AMDGPU-DISABLED1-NEXT:    call void @spmd_amenable() #[[ATTR7]]
 ; AMDGPU-DISABLED1-NEXT:    ret void
 ;
-; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@.omp_outlined.
-; AMDGPU-DISABLED2-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] {
-; AMDGPU-DISABLED2-NEXT:  entry:
+; AMDGPU-DISABLED2-LABEL: define internal void @.omp_outlined.(
+; AMDGPU-DISABLED2-SAME: i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] {
+; AMDGPU-DISABLED2-NEXT:  [[ENTRY:.*:]]
 ; AMDGPU-DISABLED2-NEXT:    call void @spmd_amenable() #[[ATTR7]]
 ; AMDGPU-DISABLED2-NEXT:    ret void
 ;
-; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@.omp_outlined.
-; NVPTX-DISABLED1-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] {
-; NVPTX-DISABLED1-NEXT:  entry:
+; NVPTX-DISABLED1-LABEL: define internal void @.omp_outlined.(
+; NVPTX-DISABLED1-SAME: i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] {
+; NVPTX-DISABLED1-NEXT:  [[ENTRY:.*:]]
 ; NVPTX-DISABLED1-NEXT:    call void @spmd_amenable() #[[ATTR7]]
 ; NVPTX-DISABLED1-NEXT:    ret void
 ;
-; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@.omp_outlined.
-; NVPTX-DISABLED2-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] {
-; NVPTX-DISABLED2-NEXT:  entry:
+; NVPTX-DISABLED2-LABEL: define internal void @.omp_outlined.(
+; NVPTX-DISABLED2-SAME: i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] {
+; NVPTX-DISABLED2-NEXT:  [[ENTRY:.*:]]
 ; NVPTX-DISABLED2-NEXT:    call void @spmd_amenable() #[[ATTR7]]
 ; NVPTX-DISABLED2-NEXT:    ret void
 ;
@@ -2925,28 +2925,28 @@ declare void @unknowni32p(ptr) #7
 declare void @llvm.lifetime.start.p0(ptr captures(none)) #8
 
 define weak i32 @__kmpc_target_init(ptr %0, ptr %1) {
-; AMDGPU-LABEL: define {{[^@]+}}@__kmpc_target_init
-; AMDGPU-SAME: (ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) {
+; AMDGPU-LABEL: define weak i32 @__kmpc_target_init(
+; AMDGPU-SAME: ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) {
 ; AMDGPU-NEXT:    ret i32 0
 ;
-; NVPTX-LABEL: define {{[^@]+}}@__kmpc_target_init
-; NVPTX-SAME: (ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) {
+; NVPTX-LABEL: define weak i32 @__kmpc_target_init(
+; NVPTX-SAME: ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) {
 ; NVPTX-NEXT:    ret i32 0
 ;
-; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__kmpc_target_init
-; AMDGPU-DISABLED1-SAME: (ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) {
+; AMDGPU-DISABLED1-LABEL: define weak i32 @__kmpc_target_init(
+; AMDGPU-DISABLED1-SAME: ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) {
 ; AMDGPU-DISABLED1-NEXT:    ret i32 0
 ;
-; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__kmpc_target_init
-; AMDGPU-DISABLED2-SAME: (ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) {
+; AMDGPU-DISABLED2-LABEL: define weak i32 @__kmpc_target_init(
+; AMDGPU-DISABLED2-SAME: ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) {
 ; AMDGPU-DISABLED2-NEXT:    ret i32 0
 ;
-; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__kmpc_target_init
-; NVPTX-DISABLED1-SAME: (ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) {
+; NVPTX-DISABLED1-LABEL: define weak i32 @__kmpc_target_init(
+; NVPTX-DISABLED1-SAME: ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) {
 ; NVPTX-DISABLED1-NEXT:    ret i32 0
 ;
-; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__kmpc_target_init
-; NVPTX-DISABLED2-SAME: (ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) {
+; NVPTX-DISABLED2-LABEL: define weak i32 @__kmpc_target_init(
+; NVPTX-DISABLED2-SAME: ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) {
 ; NVPTX-DISABLED2-NEXT:    ret i32 0
 ;
   ret i32 0
@@ -2969,39 +2969,39 @@ declare i32 @__kmpc_global_thread_num(ptr) #3
 declare void @__kmpc_target_deinit()
 
 define internal void @__omp_outlined__9(ptr noalias %.global_tid., ptr noalias %.bound_tid.) {
-; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__9
-; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
-; AMDGPU-NEXT:  entry:
+; AMDGPU-LABEL: define internal void @__omp_outlined__9(
+; AMDGPU-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
+; AMDGPU-NEXT:  [[ENTRY:.*:]]
 ; AMDGPU-NEXT:    call void @unknown() #[[ATTR8]]
 ; AMDGPU-NEXT:    ret void
 ;
-; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__9
-; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
-; NVPTX-NEXT:  entry:
+; NVPTX-LABEL: define internal void @__omp_outlined__9(
+; NVPTX-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
+; NVPTX-NEXT:  [[ENTRY:.*:]]
 ; NVPTX-NEXT:    call void @unknown() #[[ATTR8]]
 ; NVPTX-NEXT:    ret void
 ;
-; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__9
-; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
-; AMDGPU-DISABLED1-NEXT:  entry:
+; AMDGPU-DISABLED1-LABEL: define internal void @__omp_outlined__9(
+; AMDGPU-DISABLED1-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
+; AMDGPU-DISABLED1-NEXT:  [[ENTRY:.*:]]
 ; AMDGPU-DISABLED1-NEXT:    call void @unknown() #[[ATTR8]]
 ; AMDGPU-DISABLED1-NEXT:    ret void
 ;
-; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__9
-; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
-; AMDGPU-DISABLED2-NEXT:  entry:
+; AMDGPU-DISABLED2-LABEL: define internal void @__omp_outlined__9(
+; AMDGPU-DISABLED2-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
+; AMDGPU-DISABLED2-NEXT:  [[ENTRY:.*:]]
 ; AMDGPU-DISABLED2-NEXT:    call void @unknown() #[[ATTR8]]
 ; AMDGPU-DISABLED2-NEXT:    ret void
 ;
-; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__9
-; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
-; NVPTX-DISABLED1-NEXT:  entry:
+; NVPTX-DISABLED1-LABEL: define internal void @__omp_outlined__9(
+; NVPTX-DISABLED1-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
+; NVPTX-DISABLED1-NEXT:  [[ENTRY:.*:]]
 ; NVPTX-DISABLED1-NEXT:    call void @unknown() #[[ATTR8]]
 ; NVPTX-DISABLED1-NEXT:    ret void
 ;
-; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__9
-; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
-; NVPTX-DISABLED2-NEXT:  entry:
+; NVPTX-DISABLED2-LABEL: define internal void @__omp_outlined__9(
+; NVPTX-DISABLED2-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
+; NVPTX-DISABLED2-NEXT:  [[ENTRY:.*:]]
 ; NVPTX-DISABLED2-NEXT:    call void @unknown() #[[ATTR8]]
 ; NVPTX-DISABLED2-NEXT:    ret void
 ;
@@ -3012,9 +3012,9 @@ entry:
 
 ; Function Attrs: convergent norecurse nounwind
 define internal void @__omp_outlined__9_wrapper(i16 zeroext %0, i32 %1) #1 {
-; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper
-; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
-; AMDGPU-NEXT:  entry:
+; AMDGPU-LABEL: define internal void @__omp_outlined__9_wrapper(
+; AMDGPU-SAME: i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
+; AMDGPU-NEXT:  [[ENTRY:.*:]]
 ; AMDGPU-NEXT:    [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-NEXT:    [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
 ; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
@@ -3025,9 +3025,9 @@ define internal void @__omp_outlined__9_wrapper(i16 zeroext %0, i32 %1) #1 {
 ; AMDGPU-NEXT:    call void @__omp_outlined__9(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
 ; AMDGPU-NEXT:    ret void
 ;
-; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper
-; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
-; NVPTX-NEXT:  entry:
+; NVPTX-LABEL: define internal void @__omp_outlined__9_wrapper(
+; NVPTX-SAME: i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
+; NVPTX-NEXT:  [[ENTRY:.*:]]
 ; NVPTX-NEXT:    [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
 ; NVPTX-NEXT:    [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
 ; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
@@ -3038,9 +3038,9 @@ define internal void @__omp_outlined__9_wrapper(i16 zeroext %0, i32 %1) #1 {
 ; NVPTX-NEXT:    call void @__omp_outlined__9(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
 ; NVPTX-NEXT:    ret void
 ;
-; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper
-; AMDGPU-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
-; AMDGPU-DISABLED1-NEXT:  entry:
+; AMDGPU-DISABLED1-LABEL: define internal void @__omp_outlined__9_wrapper(
+; AMDGPU-DISABLED1-SAME: i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
+; AMDGPU-DISABLED1-NEXT:  [[ENTRY:.*:]]
 ; AMDGPU-DISABLED1-NEXT:    [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-DISABLED1-NEXT:    [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
 ; AMDGPU-DISABLED1-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
@@ -3051,9 +3051,9 @@ define internal void @__omp_outlined__9_wrapper(i16 zeroext %0, i32 %1) #1 {
 ; AMDGPU-DISABLED1-NEXT:    call void @__omp_outlined__9(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
 ; AMDGPU-DISABLED1-NEXT:    ret void
 ;
-; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper
-; AMDGPU-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
-; AMDGPU-DISABLED2-NEXT:  entry:
+; AMDGPU-DISABLED2-LABEL: define internal void @__omp_outlined__9_wrapper(
+; AMDGPU-DISABLED2-SAME: i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
+; AMDGPU-DISABLED2-NEXT:  [[ENTRY:.*:]]
 ; AMDGPU-DISABLED2-NEXT:    [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-DISABLED2-NEXT:    [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
 ; AMDGPU-DISABLED2-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
@@ -3064,9 +3064,9 @@ define internal void @__omp_outlined__9_wrapper(i16 zeroext %0, i32 %1) #1 {
 ; AMDGPU-DISABLED2-NEXT:    call void @__omp_outlined__9(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
 ; AMDGPU-DISABLED2-NEXT:    ret void
 ;
-; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper
-; NVPTX-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
-; NVPTX-DISABLED1-NEXT:  entry:
+; NVPTX-DISABLED1-LABEL: define internal void @__omp_outlined__9_wrapper(
+; NVPTX-DISABLED1-SAME: i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
+; NVPTX-DISABLED1-NEXT:  [[ENTRY:.*:]]
 ; NVPTX-DISABLED1-NEXT:    [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
 ; NVPTX-DISABLED1-NEXT:    [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
 ; NVPTX-DISABLED1-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
@@ -3077,9 +3077,9 @@ define internal void @__omp_outlined__9_wrapper(i16 zeroext %0, i32 %1) #1 {
 ; NVPTX-DISABLED1-NEXT:    call void @__omp_outlined__9(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
 ; NVPTX-DISABLED1-NEXT:    ret void
 ;
-; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper
-; NVPTX-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
-; NVPTX-DISABLED2-NEXT:  entry:
+; NVPTX-DISABLED2-LABEL: define internal void @__omp_outlined__9_wrapper(
+; NVPTX-DISABLED2-SAME: i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
+; NVPTX-DISABLED2-NEXT:  [[ENTRY:.*:]]
 ; NVPTX-DISABLED2-NEXT:    [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
 ; NVPTX-DISABLED2-NEXT:    [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
 ; NVPTX-DISABLED2-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
@@ -3237,7 +3237,7 @@ attributes #9 = { alwaysinline }
 ; AMDGPU: [[META9:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
 ; AMDGPU: [[META10:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
 ; AMDGPU: [[META11:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
-; AMDGPU: [[TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0}
+; AMDGPU: [[INT_TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0}
 ; AMDGPU: [[META13]] = !{!"int", [[META14:![0-9]+]], i64 0}
 ; AMDGPU: [[META14]] = !{!"omnipotent char", [[META15:![0-9]+]], i64 0}
 ; AMDGPU: [[META15]] = !{!"Simple C/C++ TBAA"}
@@ -3245,7 +3245,7 @@ attributes #9 = { alwaysinline }
 ; AMDGPU: [[META17]] = !{!"llvm.loop.mustprogress"}
 ; AMDGPU: [[META18]] = !{!"llvm.loop.unroll.disable"}
 ; AMDGPU: [[LOOP19]] = distinct !{[[LOOP19]], [[META17]], [[META18]]}
-; AMDGPU: [[TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0}
+; AMDGPU: [[ANYPTR_TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0}
 ; AMDGPU: [[META21]] = !{!"any pointer", [[META14]], i64 0}
 ; AMDGPU: [[LOOP22]] = distinct !{[[LOOP22]], [[META17]], [[META18]]}
 ; AMDGPU: [[LOOP23]] = distinct !{[[LOOP23]], [[META17]], [[META18]]}
@@ -3262,7 +3262,7 @@ attributes #9 = { alwaysinline }
 ; NVPTX: [[META9:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
 ; NVPTX: [[META10:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
 ; NVPTX: [[META11:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
-; NVPTX: [[TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0}
+; NVPTX: [[INT_TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0}
 ; NVPTX: [[META13]] = !{!"int", [[META14:![0-9]+]], i64 0}
 ; NVPTX: [[META14]] = !{!"omnipotent char", [[META15:![0-9]+]], i64 0}
 ; NVPTX: [[META15]] = !{!"Simple C/C++ TBAA"}
@@ -3270,7 +3270,7 @@ attributes #9 = { alwaysinline }
 ; NVPTX: [[META17]] = !{!"llvm.loop.mustprogress"}
 ; NVPTX: [[META18]] = !{!"llvm.loop.unroll.disable"}
 ; NVPTX: [[LOOP19]] = distinct !{[[LOOP19]], [[META17]], [[META18]]}
-; NVPTX: [[TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0}
+; NVPTX: [[ANYPTR_TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0}
 ; NVPTX: [[META21]] = !{!"any pointer", [[META14]], i64 0}
 ; NVPTX: [[LOOP22]] = distinct !{[[LOOP22]], [[META17]], [[META18]]}
 ; NVPTX: [[LOOP23]] = distinct !{[[LOOP23]], [[META17]], [[META18]]}
@@ -3287,7 +3287,7 @@ attributes #9 = { alwaysinline }
 ; AMDGPU-DISABLED1: [[META9:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
 ; AMDGPU-DISABLED1: [[META10:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
 ; AMDGPU-DISABLED1: [[META11:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
-; AMDGPU-DISABLED1: [[TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0}
+; AMDGPU-DISABLED1: [[INT_TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0}
 ; AMDGPU-DISABLED1: [[META13]] = !{!"int", [[META14:![0-9]+]], i64 0}
 ; AMDGPU-DISABLED1: [[META14]] = !{!"omnipotent char", [[META15:![0-9]+]], i64 0}
 ; AMDGPU-DISABLED1: [[META15]] = !{!"Simple C/C++ TBAA"}
@@ -3295,7 +3295,7 @@ attributes #9 = { alwaysinline }
 ; AMDGPU-DISABLED1: [[META17]] = !{!"llvm.loop.mustprogress"}
 ; AMDGPU-DISABLED1: [[META18]] = !{!"llvm.loop.unroll.disable"}
 ; AMDGPU-DISABLED1: [[LOOP19]] = distinct !{[[LOOP19]], [[META17]], [[META18]]}
-; AMDGPU-DISABLED1: [[TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0}
+; AMDGPU-DISABLED1: [[ANYPTR_TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0}
 ; AMDGPU-DISABLED1: [[META21]] = !{!"any pointer", [[META14]], i64 0}
 ; AMDGPU-DISABLED1: [[LOOP22]] = distinct !{[[LOOP22]], [[META17]], [[META18]]}
 ; AMDGPU-DISABLED1: [[LOOP23]] = distinct !{[[LOOP23]], [[META17]], [[META18]]}
@@ -3312,7 +3312,7 @@ attributes #9 = { alwaysinline }
 ; AMDGPU-DISABLED2: [[META9:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
 ; AMDGPU-DISABLED2: [[META10:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
 ; AMDGPU-DISABLED2: [[META11:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
-; AMDGPU-DISABLED2: [[TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0}
+; AMDGPU-DISABLED2: [[INT_TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0}
 ; AMDGPU-DISABLED2: [[META13]] = !{!"int", [[META14:![0-9]+]], i64 0}
 ; AMDGPU-DISABLED2: [[META14]] = !{!"omnipotent char", [[META15:![0-9]+]], i64 0}
 ; AMDGPU-DISABLED2: [[META15]] = !{!"Simple C/C++ TBAA"}
@@ -3320,7 +3320,7 @@ attributes #9 = { alwaysinline }
 ; AMDGPU-DISABLED2: [[META17]] = !{!"llvm.loop.mustprogress"}
 ; AMDGPU-DISABLED2: [[META18]] = !{!"llvm.loop.unroll.disable"}
 ; AMDGPU-DISABLED2: [[LOOP19]] = distinct !{[[LOOP19]], [[META17]], [[META18]]}
-; AMDGPU-DISABLED2: [[TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0}
+; AMDGPU-DISABLED2: [[ANYPTR_TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0}
 ; AMDGPU-DISABLED2: [[META21]] = !{!"any pointer", [[META14]], i64 0}
 ; AMDGPU-DISABLED2: [[LOOP22]] = distinct !{[[LOOP22]], [[META17]], [[META18]]}
 ; AMDGPU-DISABLED2: [[LOOP23]] = distinct !{[[LOOP23]], [[META17]], [[META18]]}
@@ -3337,7 +3337,7 @@ attributes #9 = { alwaysinline }
 ; NVPTX-DISABLED1: [[META9:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
 ; NVPTX-DISABLED1: [[META10:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
 ; NVPTX-DISABLED1: [[META11:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
-; NVPTX-DISABLED1: [[TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0}
+; NVPTX-DISABLED1: [[INT_TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0}
 ; NVPTX-DISABLED1: [[META13]] = !{!"int", [[META14:![0-9]+]], i64 0}
 ; NVPTX-DISABLED1: [[META14]] = !{!"omnipotent char", [[META15:![0-9]+]], i64 0}
 ; NVPTX-DISABLED1: [[META15]] = !{!"Simple C/C++ TBAA"}
@@ -3345,7 +3345,7 @@ attributes #9 = { alwaysinline }
 ; NVPTX-DISABLED1: [[META17]] = !{!"llvm.loop.mustprogress"}
 ; NVPTX-DISABLED1: [[META18]] = !{!"llvm.loop.unroll.disable"}
 ; NVPTX-DISABLED1: [[LOOP19]] = distinct !{[[LOOP19]], [[META17]], [[META18]]}
-; NVPTX-DISABLED1: [[TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0}
+; NVPTX-DISABLED1: [[ANYPTR_TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0}
 ; NVPTX-DISABLED1: [[META21]] = !{!"any pointer", [[META14]], i64 0}
 ; NVPTX-DISABLED1: [[LOOP22]] = distinct !{[[LOOP22]], [[META17]], [[META18]]}
 ; NVPTX-DISABLED1: [[LOOP23]] = distinct !{[[LOOP23]], [[META17]], [[META18]]}
@@ -3362,7 +3362,7 @@ attributes #9 = { alwaysinline }
 ; NVPTX-DISABLED2: [[META9:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
 ; NVPTX-DISABLED2: [[META10:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
 ; NVPTX-DISABLED2: [[META11:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
-; NVPTX-DISABLED2: [[TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0}
+; NVPTX-DISABLED2: [[INT_TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0}
 ; NVPTX-DISABLED2: [[META13]] = !{!"int", [[META14:![0-9]+]], i64 0}
 ; NVPTX-DISABLED2: [[META14]] = !{!"omnipotent char", [[META15:![0-9]+]], i64 0}
 ; NVPTX-DISABLED2: [[META15]] = !{!"Simple C/C++ TBAA"}
@@ -3370,7 +3370,7 @@ attributes #9 = { alwaysinline }
 ; NVPTX-DISABLED2: [[META17]] = !{!"llvm.loop.mustprogress"}
 ; NVPTX-DISABLED2: [[META18]] = !{!"llvm.loop.unroll.disable"}
 ; NVPTX-DISABLED2: [[LOOP19]] = distinct !{[[LOOP19]], [[META17]], [[META18]]}
-; NVPTX-DISABLED2: [[TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0}
+; NVPTX-DISABLED2: [[ANYPTR_TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0}
 ; NVPTX-DISABLED2: [[META21]] = !{!"any pointer", [[META14]], i64 0}
 ; NVPTX-DISABLED2: [[LOOP22]] = distinct !{[[LOOP22]], [[META17]], [[META18]]}
 ; NVPTX-DISABLED2: [[LOOP23]] = distinct !{[[LOOP23]], [[META17]], [[META18]]}
diff --git a/llvm/test/Transforms/OpenMP/spmdization_assumes.ll b/llvm/test/Transforms/OpenMP/spmdization_assumes.ll
index 59e2499..60d42ed 100644
--- a/llvm/test/Transforms/OpenMP/spmdization_assumes.ll
+++ b/llvm/test/Transforms/OpenMP/spmdization_assumes.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 6
 ; RUN: opt -S -passes=openmp-opt < %s | FileCheck %s
 
 ; void foo(double x) {
@@ -29,35 +29,35 @@ target triple = "nvptx64"
 ; CHECK: @[[GLOB2:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @[[GLOB0]] }, align 8
 ;.
 define weak ptx_kernel void @__omp_offloading_fd02_404433c2_main_l5(ptr %dyn, ptr nonnull align 8 dereferenceable(8) %x) local_unnamed_addr #0 {
-; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_fd02_404433c2_main_l5
-; CHECK-SAME: (ptr [[DYN:%.*]], ptr nonnull align 8 dereferenceable(8) [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT:  entry:
+; CHECK-LABEL: define weak ptx_kernel void @__omp_offloading_fd02_404433c2_main_l5(
+; CHECK-SAME: ptr [[DYN:%.*]], ptr nonnull align 8 dereferenceable(8) [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
 ; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr nonnull @__omp_offloading_fd02_404433c2_main_l5_kernel_environment, ptr [[DYN]]) #[[ATTR3:[0-9]+]]
 ; CHECK-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; CHECK-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
-; CHECK:       common.ret:
+; CHECK-NEXT:    br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]]
+; CHECK:       [[COMMON_RET]]:
 ; CHECK-NEXT:    ret void
-; CHECK:       user_code.entry:
+; CHECK:       [[USER_CODE_ENTRY]]:
 ; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR3]]
 ; CHECK-NEXT:    [[CALL_I:%.*]] = call double @__nv_sin(double 0x400921FB54442D18) #[[ATTR7:[0-9]+]]
-; CHECK-NEXT:    br label [[REGION_CHECK_TID:%.*]]
-; CHECK:       region.check.tid:
+; CHECK-NEXT:    br label %[[REGION_CHECK_TID:.*]]
+; CHECK:       [[REGION_CHECK_TID]]:
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[TMP3]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]]
-; CHECK:       region.guarded:
-; CHECK-NEXT:    store double [[CALL_I]], ptr [[X]], align 8, !tbaa [[TBAA7:![0-9]+]]
-; CHECK-NEXT:    br label [[REGION_GUARDED_END:%.*]]
-; CHECK:       region.guarded.end:
-; CHECK-NEXT:    br label [[REGION_BARRIER]]
-; CHECK:       region.barrier:
+; CHECK-NEXT:    br i1 [[TMP3]], label %[[REGION_GUARDED:.*]], label %[[REGION_BARRIER:.*]]
+; CHECK:       [[REGION_GUARDED]]:
+; CHECK-NEXT:    store double [[CALL_I]], ptr [[X]], align 8, !tbaa [[DOUBLE_TBAA7:![0-9]+]]
+; CHECK-NEXT:    br label %[[REGION_GUARDED_END:.*]]
+; CHECK:       [[REGION_GUARDED_END]]:
+; CHECK-NEXT:    br label %[[REGION_BARRIER]]
+; CHECK:       [[REGION_BARRIER]]:
 ; CHECK-NEXT:    call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP2]])
-; CHECK-NEXT:    br label [[REGION_EXIT:%.*]]
-; CHECK:       region.exit:
+; CHECK-NEXT:    br label %[[REGION_EXIT:.*]]
+; CHECK:       [[REGION_EXIT]]:
 ; CHECK-NEXT:    call void @__kmpc_parallel_51(ptr nonnull @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr nonnull [[CAPTURED_VARS_ADDRS]], i64 0) #[[ATTR3]]
 ; CHECK-NEXT:    call void @__kmpc_target_deinit() #[[ATTR3]]
-; CHECK-NEXT:    br label [[COMMON_RET]]
+; CHECK-NEXT:    br label %[[COMMON_RET]]
 ;
 entry:
   %captured_vars_addrs = alloca [0 x ptr], align 8
@@ -81,9 +81,9 @@ declare i32 @__kmpc_target_init(ptr, ptr) local_unnamed_addr
 
 ; Function Attrs: alwaysinline mustprogress nofree norecurse nosync nounwind readnone willreturn
 define internal void @__omp_outlined__(ptr noalias nocapture %.global_tid., ptr noalias nocapture %.bound_tid.) #1 {
-; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__
-; CHECK-SAME: (ptr noalias captures(none) [[DOTGLOBAL_TID_:%.*]], ptr noalias captures(none) [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] {
-; CHECK-NEXT:  entry:
+; CHECK-LABEL: define internal void @__omp_outlined__(
+; CHECK-SAME: ptr noalias captures(none) [[DOTGLOBAL_TID_:%.*]], ptr noalias captures(none) [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -92,9 +92,9 @@ entry:
 
 ; Function Attrs: norecurse nounwind
 define internal void @__omp_outlined___wrapper(i16 zeroext %0, i32 %1) #2 {
-; CHECK-LABEL: define {{[^@]+}}@__omp_outlined___wrapper
-; CHECK-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] {
-; CHECK-NEXT:  entry:
+; CHECK-LABEL: define internal void @__omp_outlined___wrapper(
+; CHECK-SAME: i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
 ; CHECK-NEXT:    call void @__kmpc_get_shared_variables(ptr nonnull [[GLOBAL_ARGS]]) #[[ATTR3]]
 ; CHECK-NEXT:    ret void
@@ -158,7 +158,7 @@ attributes #6 = { convergent nounwind "llvm.assume"="ompx_spmd_amenable" }
 ; CHECK: [[META4:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
 ; CHECK: [[META5:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
 ; CHECK: [[META6:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
-; CHECK: [[TBAA7]] = !{[[META8:![0-9]+]], [[META8]], i64 0}
+; CHECK: [[DOUBLE_TBAA7]] = !{[[META8:![0-9]+]], [[META8]], i64 0}
 ; CHECK: [[META8]] = !{!"double", [[META9:![0-9]+]], i64 0}
 ; CHECK: [[META9]] = !{!"omnipotent char", [[META10:![0-9]+]], i64 0}
 ; CHECK: [[META10]] = !{!"Simple C/C++ TBAA"}
diff --git a/llvm/test/Transforms/OpenMP/spmdization_indirect.ll b/llvm/test/Transforms/OpenMP/spmdization_indirect.ll
index d1e006a..dec6a68 100644
--- a/llvm/test/Transforms/OpenMP/spmdization_indirect.ll
+++ b/llvm/test/Transforms/OpenMP/spmdization_indirect.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 6
 ; RUN: opt --mtriple=amdgcn-amd-amdhsa --data-layout=A5 -S -passes=openmp-opt < %s | FileCheck %s --check-prefixes=AMDGPU
 ; RUN: opt --mtriple=nvptx64-- -S -passes=openmp-opt < %s | FileCheck %s --check-prefixes=NVPTX
 
@@ -30,13 +30,13 @@
 ; NVPTX: @spmd_and_non_spmd_callee_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
 ;.
 define weak ptx_kernel void @spmd_callees(i1 %c) #0 {
-; AMDGPU-LABEL: define {{[^@]+}}@spmd_callees
-; AMDGPU-SAME: (i1 [[C:%.*]]) #[[ATTR0:[0-9]+]] {
+; AMDGPU-LABEL: define weak ptx_kernel void @spmd_callees(
+; AMDGPU-SAME: i1 [[C:%.*]]) #[[ATTR0:[0-9]+]] {
 ; AMDGPU-NEXT:    call void @spmd_callees__debug(i1 [[C]])
 ; AMDGPU-NEXT:    ret void
 ;
-; NVPTX-LABEL: define {{[^@]+}}@spmd_callees
-; NVPTX-SAME: (i1 [[C:%.*]]) #[[ATTR0:[0-9]+]] {
+; NVPTX-LABEL: define weak ptx_kernel void @spmd_callees(
+; NVPTX-SAME: i1 [[C:%.*]]) #[[ATTR0:[0-9]+]] {
 ; NVPTX-NEXT:    call void @spmd_callees__debug(i1 [[C]])
 ; NVPTX-NEXT:    ret void
 ;
@@ -45,71 +45,71 @@ define weak ptx_kernel void @spmd_callees(i1 %c) #0 {
 }
 
 define internal void @spmd_callees__debug(i1 %c) {
-; AMDGPU-LABEL: define {{[^@]+}}@spmd_callees__debug
-; AMDGPU-SAME: (i1 [[C:%.*]]) #[[ATTR1:[0-9]+]] {
-; AMDGPU-NEXT:  entry:
+; AMDGPU-LABEL: define internal void @spmd_callees__debug(
+; AMDGPU-SAME: i1 [[C:%.*]]) #[[ATTR1:[0-9]+]] {
+; AMDGPU-NEXT:  [[ENTRY:.*:]]
 ; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-NEXT:    [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
 ; AMDGPU-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-NEXT:    [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
 ; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @spmd_callees_kernel_environment, ptr null)
 ; AMDGPU-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; AMDGPU-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
-; AMDGPU:       common.ret:
+; AMDGPU-NEXT:    br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]]
+; AMDGPU:       [[COMMON_RET]]:
 ; AMDGPU-NEXT:    ret void
-; AMDGPU:       user_code.entry:
+; AMDGPU:       [[USER_CODE_ENTRY]]:
 ; AMDGPU-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR10:[0-9]+]]
 ; AMDGPU-NEXT:    store i32 0, ptr addrspace(5) [[DOTZERO_ADDR]], align 4
-; AMDGPU-NEXT:    store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]]
+; AMDGPU-NEXT:    store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[INT_TBAA12:![0-9]+]]
 ; AMDGPU-NEXT:    [[FP:%.*]] = select i1 [[C]], ptr @__omp_outlined_spmd_amenable1, ptr @__omp_outlined_spmd_amenable2
 ; AMDGPU-NEXT:    [[TMP2:%.*]] = icmp eq ptr [[FP]], @__omp_outlined_spmd_amenable2
-; AMDGPU-NEXT:    br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]]
-; AMDGPU:       3:
+; AMDGPU-NEXT:    br i1 [[TMP2]], label %[[BB3:.*]], label %[[BB4:.*]]
+; AMDGPU:       [[BB3]]:
 ; AMDGPU-NEXT:    call void @__omp_outlined_spmd_amenable2(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR10]]
-; AMDGPU-NEXT:    br label [[TMP7:%.*]]
-; AMDGPU:       4:
-; AMDGPU-NEXT:    br i1 true, label [[TMP5:%.*]], label [[TMP6:%.*]]
-; AMDGPU:       5:
+; AMDGPU-NEXT:    br label %[[BB7:.*]]
+; AMDGPU:       [[BB4]]:
+; AMDGPU-NEXT:    br i1 true, label %[[BB5:.*]], label %[[BB6:.*]]
+; AMDGPU:       [[BB5]]:
 ; AMDGPU-NEXT:    call void @__omp_outlined_spmd_amenable1(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR10]]
-; AMDGPU-NEXT:    br label [[TMP7]]
-; AMDGPU:       6:
+; AMDGPU-NEXT:    br label %[[BB7]]
+; AMDGPU:       [[BB6]]:
 ; AMDGPU-NEXT:    unreachable
-; AMDGPU:       7:
+; AMDGPU:       [[BB7]]:
 ; AMDGPU-NEXT:    call void @__kmpc_target_deinit()
-; AMDGPU-NEXT:    br label [[COMMON_RET]]
+; AMDGPU-NEXT:    br label %[[COMMON_RET]]
 ;
-; NVPTX-LABEL: define {{[^@]+}}@spmd_callees__debug
-; NVPTX-SAME: (i1 [[C:%.*]]) #[[ATTR1:[0-9]+]] {
-; NVPTX-NEXT:  entry:
+; NVPTX-LABEL: define internal void @spmd_callees__debug(
+; NVPTX-SAME: i1 [[C:%.*]]) #[[ATTR1:[0-9]+]] {
+; NVPTX-NEXT:  [[ENTRY:.*:]]
 ; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
 ; NVPTX-NEXT:    [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
 ; NVPTX-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
 ; NVPTX-NEXT:    [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
 ; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @spmd_callees_kernel_environment, ptr null)
 ; NVPTX-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; NVPTX-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
-; NVPTX:       common.ret:
+; NVPTX-NEXT:    br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]]
+; NVPTX:       [[COMMON_RET]]:
 ; NVPTX-NEXT:    ret void
-; NVPTX:       user_code.entry:
+; NVPTX:       [[USER_CODE_ENTRY]]:
 ; NVPTX-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR10:[0-9]+]]
 ; NVPTX-NEXT:    store i32 0, ptr addrspace(5) [[DOTZERO_ADDR]], align 4
-; NVPTX-NEXT:    store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]]
+; NVPTX-NEXT:    store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[INT_TBAA12:![0-9]+]]
 ; NVPTX-NEXT:    [[FP:%.*]] = select i1 [[C]], ptr @__omp_outlined_spmd_amenable1, ptr @__omp_outlined_spmd_amenable2
 ; NVPTX-NEXT:    [[TMP2:%.*]] = icmp eq ptr [[FP]], @__omp_outlined_spmd_amenable2
-; NVPTX-NEXT:    br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]]
-; NVPTX:       3:
+; NVPTX-NEXT:    br i1 [[TMP2]], label %[[BB3:.*]], label %[[BB4:.*]]
+; NVPTX:       [[BB3]]:
 ; NVPTX-NEXT:    call void @__omp_outlined_spmd_amenable2(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR10]]
-; NVPTX-NEXT:    br label [[TMP7:%.*]]
-; NVPTX:       4:
-; NVPTX-NEXT:    br i1 true, label [[TMP5:%.*]], label [[TMP6:%.*]]
-; NVPTX:       5:
+; NVPTX-NEXT:    br label %[[BB7:.*]]
+; NVPTX:       [[BB4]]:
+; NVPTX-NEXT:    br i1 true, label %[[BB5:.*]], label %[[BB6:.*]]
+; NVPTX:       [[BB5]]:
 ; NVPTX-NEXT:    call void @__omp_outlined_spmd_amenable1(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR10]]
-; NVPTX-NEXT:    br label [[TMP7]]
-; NVPTX:       6:
+; NVPTX-NEXT:    br label %[[BB7]]
+; NVPTX:       [[BB6]]:
 ; NVPTX-NEXT:    unreachable
-; NVPTX:       7:
+; NVPTX:       [[BB7]]:
 ; NVPTX-NEXT:    call void @__kmpc_target_deinit()
-; NVPTX-NEXT:    br label [[COMMON_RET]]
+; NVPTX-NEXT:    br label %[[COMMON_RET]]
 ;
 entry:
   %.zero.addr = alloca ptr, align 8, addrspace(5)
@@ -134,43 +134,43 @@ user_code.entry:                                  ; preds = %entry
 }
 
 define internal void @__omp_outlined_spmd_amenable1(ptr noalias %.global_tid., ptr noalias %.bound_tid.) {
-; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined_spmd_amenable1
-; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
-; AMDGPU-NEXT:  entry:
+; AMDGPU-LABEL: define internal void @__omp_outlined_spmd_amenable1(
+; AMDGPU-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
+; AMDGPU-NEXT:  [[ENTRY:.*]]:
 ; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
-; AMDGPU-NEXT:    br label [[FOR_COND:%.*]]
-; AMDGPU:       for.cond:
-; AMDGPU-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
+; AMDGPU-NEXT:    br label %[[FOR_COND:.*]]
+; AMDGPU:       [[FOR_COND]]:
+; AMDGPU-NEXT:    [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY:.*]] ]
 ; AMDGPU-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
-; AMDGPU-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
-; AMDGPU:       for.cond.cleanup:
+; AMDGPU-NEXT:    br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP:.*]]
+; AMDGPU:       [[FOR_COND_CLEANUP]]:
 ; AMDGPU-NEXT:    call void @spmd_amenable() #[[ATTR6:[0-9]+]]
 ; AMDGPU-NEXT:    ret void
-; AMDGPU:       for.body:
-; AMDGPU-NEXT:    [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
+; AMDGPU:       [[FOR_BODY]]:
+; AMDGPU-NEXT:    [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[INT_TBAA12]]
 ; AMDGPU-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
 ; AMDGPU-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
-; AMDGPU-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
+; AMDGPU-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
 ;
-; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined_spmd_amenable1
-; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
-; NVPTX-NEXT:  entry:
+; NVPTX-LABEL: define internal void @__omp_outlined_spmd_amenable1(
+; NVPTX-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
+; NVPTX-NEXT:  [[ENTRY:.*]]:
 ; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
 ; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
-; NVPTX-NEXT:    br label [[FOR_COND:%.*]]
-; NVPTX:       for.cond:
-; NVPTX-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
+; NVPTX-NEXT:    br label %[[FOR_COND:.*]]
+; NVPTX:       [[FOR_COND]]:
+; NVPTX-NEXT:    [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY:.*]] ]
 ; NVPTX-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
-; NVPTX-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
-; NVPTX:       for.cond.cleanup:
+; NVPTX-NEXT:    br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP:.*]]
+; NVPTX:       [[FOR_COND_CLEANUP]]:
 ; NVPTX-NEXT:    call void @spmd_amenable() #[[ATTR6:[0-9]+]]
 ; NVPTX-NEXT:    ret void
-; NVPTX:       for.body:
-; NVPTX-NEXT:    [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
+; NVPTX:       [[FOR_BODY]]:
+; NVPTX-NEXT:    [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[INT_TBAA12]]
 ; NVPTX-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
 ; NVPTX-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
-; NVPTX-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
+; NVPTX-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
 ;
 entry:
   %captured_vars_addrs = alloca ptr, align 8, addrspace(5)
@@ -194,15 +194,15 @@ for.body:                                         ; preds = %for.cond
 }
 
 define internal void @__omp_outlined__1(ptr noalias %.global_tid., ptr noalias %.bound_tid.) {
-; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__1
-; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
-; AMDGPU-NEXT:  entry:
+; AMDGPU-LABEL: define internal void @__omp_outlined__1(
+; AMDGPU-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
+; AMDGPU-NEXT:  [[ENTRY:.*:]]
 ; AMDGPU-NEXT:    call void @unknown() #[[ATTR7:[0-9]+]]
 ; AMDGPU-NEXT:    ret void
 ;
-; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__1
-; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
-; NVPTX-NEXT:  entry:
+; NVPTX-LABEL: define internal void @__omp_outlined__1(
+; NVPTX-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
+; NVPTX-NEXT:  [[ENTRY:.*:]]
 ; NVPTX-NEXT:    call void @unknown() #[[ATTR7:[0-9]+]]
 ; NVPTX-NEXT:    ret void
 ;
@@ -213,9 +213,9 @@ entry:
 
 ; Function Attrs: convergent norecurse nounwind
 define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) #1 {
-; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper
-; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] {
-; AMDGPU-NEXT:  entry:
+; AMDGPU-LABEL: define internal void @__omp_outlined__1_wrapper(
+; AMDGPU-SAME: i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] {
+; AMDGPU-NEXT:  [[ENTRY:.*:]]
 ; AMDGPU-NEXT:    [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-NEXT:    [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
 ; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
@@ -226,9 +226,9 @@ define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) #1 {
 ; AMDGPU-NEXT:    call void @__omp_outlined__1(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR10]]
 ; AMDGPU-NEXT:    ret void
 ;
-; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper
-; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] {
-; NVPTX-NEXT:  entry:
+; NVPTX-LABEL: define internal void @__omp_outlined__1_wrapper(
+; NVPTX-SAME: i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] {
+; NVPTX-NEXT:  [[ENTRY:.*:]]
 ; NVPTX-NEXT:    [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
 ; NVPTX-NEXT:    [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
 ; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
@@ -254,48 +254,48 @@ entry:
 }
 
 define internal void @__omp_outlined_spmd_amenable2(ptr noalias %.global_tid., ptr noalias %.bound_tid.) {
-; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined_spmd_amenable2
-; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
-; AMDGPU-NEXT:  entry:
+; AMDGPU-LABEL: define internal void @__omp_outlined_spmd_amenable2(
+; AMDGPU-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
+; AMDGPU-NEXT:  [[ENTRY:.*]]:
 ; AMDGPU-NEXT:    [[X_H2S:%.*]] = alloca i8, i64 4, align 4, addrspace(5)
 ; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
 ; AMDGPU-NEXT:    [[MALLOC_CAST:%.*]] = addrspacecast ptr addrspace(5) [[X_H2S]] to ptr
 ; AMDGPU-NEXT:    call void @use(ptr captures(none) [[MALLOC_CAST]]) #[[ATTR6]]
-; AMDGPU-NEXT:    br label [[FOR_COND:%.*]]
-; AMDGPU:       for.cond:
-; AMDGPU-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
+; AMDGPU-NEXT:    br label %[[FOR_COND:.*]]
+; AMDGPU:       [[FOR_COND]]:
+; AMDGPU-NEXT:    [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY:.*]] ]
 ; AMDGPU-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
-; AMDGPU-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
-; AMDGPU:       for.cond.cleanup:
+; AMDGPU-NEXT:    br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP:.*]]
+; AMDGPU:       [[FOR_COND_CLEANUP]]:
 ; AMDGPU-NEXT:    call void @spmd_amenable() #[[ATTR6]]
 ; AMDGPU-NEXT:    ret void
-; AMDGPU:       for.body:
-; AMDGPU-NEXT:    [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
+; AMDGPU:       [[FOR_BODY]]:
+; AMDGPU-NEXT:    [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[INT_TBAA12]]
 ; AMDGPU-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
 ; AMDGPU-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
-; AMDGPU-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
+; AMDGPU-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
 ;
-; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined_spmd_amenable2
-; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
-; NVPTX-NEXT:  entry:
+; NVPTX-LABEL: define internal void @__omp_outlined_spmd_amenable2(
+; NVPTX-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
+; NVPTX-NEXT:  [[ENTRY:.*]]:
 ; NVPTX-NEXT:    [[X_H2S:%.*]] = alloca i8, i64 4, align 4
 ; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
 ; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
 ; NVPTX-NEXT:    call void @use(ptr captures(none) [[X_H2S]]) #[[ATTR6]]
-; NVPTX-NEXT:    br label [[FOR_COND:%.*]]
-; NVPTX:       for.cond:
-; NVPTX-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
+; NVPTX-NEXT:    br label %[[FOR_COND:.*]]
+; NVPTX:       [[FOR_COND]]:
+; NVPTX-NEXT:    [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY:.*]] ]
 ; NVPTX-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
-; NVPTX-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
-; NVPTX:       for.cond.cleanup:
+; NVPTX-NEXT:    br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP:.*]]
+; NVPTX:       [[FOR_COND_CLEANUP]]:
 ; NVPTX-NEXT:    call void @spmd_amenable() #[[ATTR6]]
 ; NVPTX-NEXT:    ret void
-; NVPTX:       for.body:
-; NVPTX-NEXT:    [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
+; NVPTX:       [[FOR_BODY]]:
+; NVPTX-NEXT:    [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[INT_TBAA12]]
 ; NVPTX-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
 ; NVPTX-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
-; NVPTX-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
+; NVPTX-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
 ;
 entry:
   %captured_vars_addrs = alloca ptr, align 8, addrspace(5)
@@ -322,15 +322,15 @@ for.body:                                         ; preds = %for.cond
 }
 
 define internal void @__omp_outlined__3(ptr noalias %.global_tid., ptr noalias %.bound_tid.) {
-; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__3
-; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
-; AMDGPU-NEXT:  entry:
+; AMDGPU-LABEL: define internal void @__omp_outlined__3(
+; AMDGPU-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
+; AMDGPU-NEXT:  [[ENTRY:.*:]]
 ; AMDGPU-NEXT:    call void @unknown() #[[ATTR7]]
 ; AMDGPU-NEXT:    ret void
 ;
-; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__3
-; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
-; NVPTX-NEXT:  entry:
+; NVPTX-LABEL: define internal void @__omp_outlined__3(
+; NVPTX-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
+; NVPTX-NEXT:  [[ENTRY:.*:]]
 ; NVPTX-NEXT:    call void @unknown() #[[ATTR7]]
 ; NVPTX-NEXT:    ret void
 ;
@@ -341,9 +341,9 @@ entry:
 
 ; Function Attrs: convergent norecurse nounwind
 define internal void @__omp_outlined__3_wrapper(i16 zeroext %0, i32 %1) #1 {
-; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
-; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
-; AMDGPU-NEXT:  entry:
+; AMDGPU-LABEL: define internal void @__omp_outlined__3_wrapper(
+; AMDGPU-SAME: i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
+; AMDGPU-NEXT:  [[ENTRY:.*:]]
 ; AMDGPU-NEXT:    [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-NEXT:    [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
 ; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
@@ -354,9 +354,9 @@ define internal void @__omp_outlined__3_wrapper(i16 zeroext %0, i32 %1) #1 {
 ; AMDGPU-NEXT:    call void @__omp_outlined__3(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR10]]
 ; AMDGPU-NEXT:    ret void
 ;
-; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
-; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
-; NVPTX-NEXT:  entry:
+; NVPTX-LABEL: define internal void @__omp_outlined__3_wrapper(
+; NVPTX-SAME: i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
+; NVPTX-NEXT:  [[ENTRY:.*:]]
 ; NVPTX-NEXT:    [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
 ; NVPTX-NEXT:    [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
 ; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
@@ -383,9 +383,9 @@ entry:
 
 ; Function Attrs: alwaysinline convergent norecurse nounwind
 define weak ptx_kernel void @spmd_and_non_spmd_callee(i1 %c) #0 {
-; AMDGPU-LABEL: define {{[^@]+}}@spmd_and_non_spmd_callee
-; AMDGPU-SAME: (i1 [[C:%.*]]) #[[ATTR0]] {
-; AMDGPU-NEXT:  entry:
+; AMDGPU-LABEL: define weak ptx_kernel void @spmd_and_non_spmd_callee(
+; AMDGPU-SAME: i1 [[C:%.*]]) #[[ATTR0]] {
+; AMDGPU-NEXT:  [[ENTRY:.*:]]
 ; AMDGPU-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-NEXT:    [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
@@ -393,62 +393,62 @@ define weak ptx_kernel void @spmd_and_non_spmd_callee(i1 %c) #0 {
 ; AMDGPU-NEXT:    [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
 ; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @spmd_and_non_spmd_callee_kernel_environment, ptr null)
 ; AMDGPU-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
-; AMDGPU-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
-; AMDGPU:       is_worker_check:
+; AMDGPU-NEXT:    br i1 [[THREAD_IS_WORKER]], label %[[IS_WORKER_CHECK:.*]], label %[[THREAD_USER_CODE_CHECK:.*]]
+; AMDGPU:       [[IS_WORKER_CHECK]]:
 ; AMDGPU-NEXT:    [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
 ; AMDGPU-NEXT:    [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
 ; AMDGPU-NEXT:    [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
 ; AMDGPU-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
-; AMDGPU-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
-; AMDGPU:       worker_state_machine.begin:
+; AMDGPU-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label %[[WORKER_STATE_MACHINE_BEGIN:.*]], label %[[WORKER_STATE_MACHINE_FINISHED:.*]]
+; AMDGPU:       [[WORKER_STATE_MACHINE_BEGIN]]:
 ; AMDGPU-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
 ; AMDGPU-NEXT:    [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
 ; AMDGPU-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
 ; AMDGPU-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
 ; AMDGPU-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
-; AMDGPU-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
-; AMDGPU:       worker_state_machine.finished:
+; AMDGPU-NEXT:    br i1 [[WORKER_IS_DONE]], label %[[WORKER_STATE_MACHINE_FINISHED]], label %[[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:.*]]
+; AMDGPU:       [[WORKER_STATE_MACHINE_FINISHED]]:
 ; AMDGPU-NEXT:    ret void
-; AMDGPU:       worker_state_machine.is_active.check:
-; AMDGPU-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
-; AMDGPU:       worker_state_machine.parallel_region.fallback.execute:
+; AMDGPU:       [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK]]:
+; AMDGPU-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:.*]], label %[[WORKER_STATE_MACHINE_DONE_BARRIER:.*]]
+; AMDGPU:       [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE]]:
 ; AMDGPU-NEXT:    call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]])
-; AMDGPU-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
-; AMDGPU:       worker_state_machine.parallel_region.end:
+; AMDGPU-NEXT:    br label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_END:.*]]
+; AMDGPU:       [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]:
 ; AMDGPU-NEXT:    call void @__kmpc_kernel_end_parallel()
-; AMDGPU-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
-; AMDGPU:       worker_state_machine.done.barrier:
+; AMDGPU-NEXT:    br label %[[WORKER_STATE_MACHINE_DONE_BARRIER]]
+; AMDGPU:       [[WORKER_STATE_MACHINE_DONE_BARRIER]]:
 ; AMDGPU-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; AMDGPU-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
-; AMDGPU:       thread.user_code.check:
+; AMDGPU-NEXT:    br label %[[WORKER_STATE_MACHINE_BEGIN]]
+; AMDGPU:       [[THREAD_USER_CODE_CHECK]]:
 ; AMDGPU-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; AMDGPU-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
-; AMDGPU:       common.ret:
+; AMDGPU-NEXT:    br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]]
+; AMDGPU:       [[COMMON_RET]]:
 ; AMDGPU-NEXT:    ret void
-; AMDGPU:       user_code.entry:
+; AMDGPU:       [[USER_CODE_ENTRY]]:
 ; AMDGPU-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR10]]
 ; AMDGPU-NEXT:    store i32 0, ptr addrspace(5) [[DOTZERO_ADDR]], align 4
-; AMDGPU-NEXT:    store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-NEXT:    store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[INT_TBAA12]]
 ; AMDGPU-NEXT:    [[FP:%.*]] = select i1 [[C]], ptr @__omp_outlined_spmd_amenable3, ptr @__omp_outlined_not_spmd_amenable
 ; AMDGPU-NEXT:    [[TMP2:%.*]] = icmp eq ptr [[FP]], @__omp_outlined_not_spmd_amenable
-; AMDGPU-NEXT:    br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]]
-; AMDGPU:       3:
+; AMDGPU-NEXT:    br i1 [[TMP2]], label %[[BB3:.*]], label %[[BB4:.*]]
+; AMDGPU:       [[BB3]]:
 ; AMDGPU-NEXT:    call void @__omp_outlined_not_spmd_amenable(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR10]]
-; AMDGPU-NEXT:    br label [[TMP7:%.*]]
-; AMDGPU:       4:
-; AMDGPU-NEXT:    br i1 true, label [[TMP5:%.*]], label [[TMP6:%.*]]
-; AMDGPU:       5:
+; AMDGPU-NEXT:    br label %[[BB7:.*]]
+; AMDGPU:       [[BB4]]:
+; AMDGPU-NEXT:    br i1 true, label %[[BB5:.*]], label %[[BB6:.*]]
+; AMDGPU:       [[BB5]]:
 ; AMDGPU-NEXT:    call void @__omp_outlined_spmd_amenable3(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR10]]
-; AMDGPU-NEXT:    br label [[TMP7]]
-; AMDGPU:       6:
+; AMDGPU-NEXT:    br label %[[BB7]]
+; AMDGPU:       [[BB6]]:
 ; AMDGPU-NEXT:    unreachable
-; AMDGPU:       7:
+; AMDGPU:       [[BB7]]:
 ; AMDGPU-NEXT:    call void @__kmpc_target_deinit()
-; AMDGPU-NEXT:    br label [[COMMON_RET]]
+; AMDGPU-NEXT:    br label %[[COMMON_RET]]
 ;
-; NVPTX-LABEL: define {{[^@]+}}@spmd_and_non_spmd_callee
-; NVPTX-SAME: (i1 [[C:%.*]]) #[[ATTR0]] {
-; NVPTX-NEXT:  entry:
+; NVPTX-LABEL: define weak ptx_kernel void @spmd_and_non_spmd_callee(
+; NVPTX-SAME: i1 [[C:%.*]]) #[[ATTR0]] {
+; NVPTX-NEXT:  [[ENTRY:.*:]]
 ; NVPTX-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
 ; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
 ; NVPTX-NEXT:    [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
@@ -456,57 +456,57 @@ define weak ptx_kernel void @spmd_and_non_spmd_callee(i1 %c) #0 {
 ; NVPTX-NEXT:    [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
 ; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @spmd_and_non_spmd_callee_kernel_environment, ptr null)
 ; NVPTX-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
-; NVPTX-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
-; NVPTX:       is_worker_check:
+; NVPTX-NEXT:    br i1 [[THREAD_IS_WORKER]], label %[[IS_WORKER_CHECK:.*]], label %[[THREAD_USER_CODE_CHECK:.*]]
+; NVPTX:       [[IS_WORKER_CHECK]]:
 ; NVPTX-NEXT:    [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
 ; NVPTX-NEXT:    [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
 ; NVPTX-NEXT:    [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
 ; NVPTX-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
-; NVPTX-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
-; NVPTX:       worker_state_machine.begin:
+; NVPTX-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label %[[WORKER_STATE_MACHINE_BEGIN:.*]], label %[[WORKER_STATE_MACHINE_FINISHED:.*]]
+; NVPTX:       [[WORKER_STATE_MACHINE_BEGIN]]:
 ; NVPTX-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
 ; NVPTX-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
 ; NVPTX-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
 ; NVPTX-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
-; NVPTX-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
-; NVPTX:       worker_state_machine.finished:
+; NVPTX-NEXT:    br i1 [[WORKER_IS_DONE]], label %[[WORKER_STATE_MACHINE_FINISHED]], label %[[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:.*]]
+; NVPTX:       [[WORKER_STATE_MACHINE_FINISHED]]:
 ; NVPTX-NEXT:    ret void
-; NVPTX:       worker_state_machine.is_active.check:
-; NVPTX-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
-; NVPTX:       worker_state_machine.parallel_region.fallback.execute:
+; NVPTX:       [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK]]:
+; NVPTX-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:.*]], label %[[WORKER_STATE_MACHINE_DONE_BARRIER:.*]]
+; NVPTX:       [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE]]:
 ; NVPTX-NEXT:    call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]])
-; NVPTX-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
-; NVPTX:       worker_state_machine.parallel_region.end:
+; NVPTX-NEXT:    br label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_END:.*]]
+; NVPTX:       [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]:
 ; NVPTX-NEXT:    call void @__kmpc_kernel_end_parallel()
-; NVPTX-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
-; NVPTX:       worker_state_machine.done.barrier:
+; NVPTX-NEXT:    br label %[[WORKER_STATE_MACHINE_DONE_BARRIER]]
+; NVPTX:       [[WORKER_STATE_MACHINE_DONE_BARRIER]]:
 ; NVPTX-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; NVPTX-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
-; NVPTX:       thread.user_code.check:
+; NVPTX-NEXT:    br label %[[WORKER_STATE_MACHINE_BEGIN]]
+; NVPTX:       [[THREAD_USER_CODE_CHECK]]:
 ; NVPTX-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; NVPTX-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
-; NVPTX:       common.ret:
+; NVPTX-NEXT:    br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]]
+; NVPTX:       [[COMMON_RET]]:
 ; NVPTX-NEXT:    ret void
-; NVPTX:       user_code.entry:
+; NVPTX:       [[USER_CODE_ENTRY]]:
 ; NVPTX-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR10]]
 ; NVPTX-NEXT:    store i32 0, ptr addrspace(5) [[DOTZERO_ADDR]], align 4
-; NVPTX-NEXT:    store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
+; NVPTX-NEXT:    store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[INT_TBAA12]]
 ; NVPTX-NEXT:    [[FP:%.*]] = select i1 [[C]], ptr @__omp_outlined_spmd_amenable3, ptr @__omp_outlined_not_spmd_amenable
 ; NVPTX-NEXT:    [[TMP2:%.*]] = icmp eq ptr [[FP]], @__omp_outlined_not_spmd_amenable
-; NVPTX-NEXT:    br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]]
-; NVPTX:       3:
+; NVPTX-NEXT:    br i1 [[TMP2]], label %[[BB3:.*]], label %[[BB4:.*]]
+; NVPTX:       [[BB3]]:
 ; NVPTX-NEXT:    call void @__omp_outlined_not_spmd_amenable(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR10]]
-; NVPTX-NEXT:    br label [[TMP7:%.*]]
-; NVPTX:       4:
-; NVPTX-NEXT:    br i1 true, label [[TMP5:%.*]], label [[TMP6:%.*]]
-; NVPTX:       5:
+; NVPTX-NEXT:    br label %[[BB7:.*]]
+; NVPTX:       [[BB4]]:
+; NVPTX-NEXT:    br i1 true, label %[[BB5:.*]], label %[[BB6:.*]]
+; NVPTX:       [[BB5]]:
 ; NVPTX-NEXT:    call void @__omp_outlined_spmd_amenable3(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR10]]
-; NVPTX-NEXT:    br label [[TMP7]]
-; NVPTX:       6:
+; NVPTX-NEXT:    br label %[[BB7]]
+; NVPTX:       [[BB6]]:
 ; NVPTX-NEXT:    unreachable
-; NVPTX:       7:
+; NVPTX:       [[BB7]]:
 ; NVPTX-NEXT:    call void @__kmpc_target_deinit()
-; NVPTX-NEXT:    br label [[COMMON_RET]]
+; NVPTX-NEXT:    br label %[[COMMON_RET]]
 ;
 entry:
   %.zero.addr = alloca ptr, align 8, addrspace(5)
@@ -531,49 +531,49 @@ user_code.entry:                                  ; preds = %entry
 }
 
 define internal void @__omp_outlined_spmd_amenable3(ptr noalias %.global_tid., ptr noalias %.bound_tid.) {
-; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined_spmd_amenable3
-; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
-; AMDGPU-NEXT:  entry:
+; AMDGPU-LABEL: define internal void @__omp_outlined_spmd_amenable3(
+; AMDGPU-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
+; AMDGPU-NEXT:  [[ENTRY:.*]]:
 ; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
 ; AMDGPU-NEXT:    [[X:%.*]] = call align 4 ptr @__kmpc_alloc_shared(i64 4) #[[ATTR10]]
-; AMDGPU-NEXT:    br label [[FOR_COND:%.*]]
-; AMDGPU:       for.cond:
-; AMDGPU-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
+; AMDGPU-NEXT:    br label %[[FOR_COND:.*]]
+; AMDGPU:       [[FOR_COND]]:
+; AMDGPU-NEXT:    [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY:.*]] ]
 ; AMDGPU-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
-; AMDGPU-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
-; AMDGPU:       for.cond.cleanup:
+; AMDGPU-NEXT:    br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP:.*]]
+; AMDGPU:       [[FOR_COND_CLEANUP]]:
 ; AMDGPU-NEXT:    call void @spmd_amenable() #[[ATTR6]]
 ; AMDGPU-NEXT:    call void @__kmpc_free_shared(ptr [[X]], i64 4) #[[ATTR10]]
 ; AMDGPU-NEXT:    ret void
-; AMDGPU:       for.body:
-; AMDGPU-NEXT:    store ptr [[X]], ptr addrspace(5) [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]]
-; AMDGPU-NEXT:    [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
+; AMDGPU:       [[FOR_BODY]]:
+; AMDGPU-NEXT:    store ptr [[X]], ptr addrspace(5) [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[ANYPTR_TBAA20:![0-9]+]]
+; AMDGPU-NEXT:    [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[INT_TBAA12]]
 ; AMDGPU-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 1)
 ; AMDGPU-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
-; AMDGPU-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
+; AMDGPU-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
 ;
-; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined_spmd_amenable3
-; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
-; NVPTX-NEXT:  entry:
+; NVPTX-LABEL: define internal void @__omp_outlined_spmd_amenable3(
+; NVPTX-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
+; NVPTX-NEXT:  [[ENTRY:.*]]:
 ; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
 ; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
 ; NVPTX-NEXT:    [[X:%.*]] = call align 4 ptr @__kmpc_alloc_shared(i64 4) #[[ATTR10]]
-; NVPTX-NEXT:    br label [[FOR_COND:%.*]]
-; NVPTX:       for.cond:
-; NVPTX-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
+; NVPTX-NEXT:    br label %[[FOR_COND:.*]]
+; NVPTX:       [[FOR_COND]]:
+; NVPTX-NEXT:    [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY:.*]] ]
 ; NVPTX-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
-; NVPTX-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
-; NVPTX:       for.cond.cleanup:
+; NVPTX-NEXT:    br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP:.*]]
+; NVPTX:       [[FOR_COND_CLEANUP]]:
 ; NVPTX-NEXT:    call void @spmd_amenable() #[[ATTR6]]
 ; NVPTX-NEXT:    call void @__kmpc_free_shared(ptr [[X]], i64 4) #[[ATTR10]]
 ; NVPTX-NEXT:    ret void
-; NVPTX:       for.body:
-; NVPTX-NEXT:    store ptr [[X]], ptr addrspace(5) [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]]
-; NVPTX-NEXT:    [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
+; NVPTX:       [[FOR_BODY]]:
+; NVPTX-NEXT:    store ptr [[X]], ptr addrspace(5) [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[ANYPTR_TBAA20:![0-9]+]]
+; NVPTX-NEXT:    [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[INT_TBAA12]]
 ; NVPTX-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 1)
 ; NVPTX-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
-; NVPTX-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
+; NVPTX-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
 ;
 entry:
   %captured_vars_addrs = alloca ptr, align 8, addrspace(5)
@@ -600,21 +600,21 @@ for.body:                                         ; preds = %for.cond
 }
 
 define internal void @__omp_outlined__5(ptr noalias %.global_tid., ptr noalias %.bound_tid., ptr nonnull align 4 dereferenceable(4) %x) {
-; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__5
-; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
-; AMDGPU-NEXT:  entry:
-; AMDGPU-NEXT:    [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-LABEL: define internal void @__omp_outlined__5(
+; AMDGPU-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
+; AMDGPU-NEXT:  [[ENTRY:.*:]]
+; AMDGPU-NEXT:    [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[INT_TBAA12]]
 ; AMDGPU-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP0]], 1
-; AMDGPU-NEXT:    store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-NEXT:    store i32 [[INC]], ptr [[X]], align 4, !tbaa [[INT_TBAA12]]
 ; AMDGPU-NEXT:    call void @unknown() #[[ATTR7]]
 ; AMDGPU-NEXT:    ret void
 ;
-; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__5
-; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
-; NVPTX-NEXT:  entry:
-; NVPTX-NEXT:    [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]]
+; NVPTX-LABEL: define internal void @__omp_outlined__5(
+; NVPTX-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
+; NVPTX-NEXT:  [[ENTRY:.*:]]
+; NVPTX-NEXT:    [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[INT_TBAA12]]
 ; NVPTX-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP0]], 1
-; NVPTX-NEXT:    store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]]
+; NVPTX-NEXT:    store i32 [[INC]], ptr [[X]], align 4, !tbaa [[INT_TBAA12]]
 ; NVPTX-NEXT:    call void @unknown() #[[ATTR7]]
 ; NVPTX-NEXT:    ret void
 ;
@@ -628,9 +628,9 @@ entry:
 
 ; Function Attrs: convergent norecurse nounwind
 define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #1 {
-; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
-; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
-; AMDGPU-NEXT:  entry:
+; AMDGPU-LABEL: define internal void @__omp_outlined__5_wrapper(
+; AMDGPU-SAME: i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
+; AMDGPU-NEXT:  [[ENTRY:.*:]]
 ; AMDGPU-NEXT:    [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-NEXT:    [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
 ; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
@@ -639,13 +639,13 @@ define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #1 {
 ; AMDGPU-NEXT:    [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
 ; AMDGPU-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
 ; AMDGPU-NEXT:    [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[GLOBAL_ARGS]], align 8
-; AMDGPU-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
+; AMDGPU-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[ANYPTR_TBAA20]]
 ; AMDGPU-NEXT:    call void @__omp_outlined__5(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]], ptr [[TMP3]]) #[[ATTR10]]
 ; AMDGPU-NEXT:    ret void
 ;
-; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
-; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
-; NVPTX-NEXT:  entry:
+; NVPTX-LABEL: define internal void @__omp_outlined__5_wrapper(
+; NVPTX-SAME: i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
+; NVPTX-NEXT:  [[ENTRY:.*:]]
 ; NVPTX-NEXT:    [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
 ; NVPTX-NEXT:    [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
 ; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
@@ -654,7 +654,7 @@ define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #1 {
 ; NVPTX-NEXT:    [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
 ; NVPTX-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
 ; NVPTX-NEXT:    [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[GLOBAL_ARGS]], align 8
-; NVPTX-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
+; NVPTX-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[ANYPTR_TBAA20]]
 ; NVPTX-NEXT:    call void @__omp_outlined__5(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]], ptr [[TMP3]]) #[[ATTR10]]
 ; NVPTX-NEXT:    ret void
 ;
@@ -676,45 +676,45 @@ entry:
 
 ; Function Attrs: alwaysinline convergent norecurse nounwind
 define weak ptx_kernel void @spmd_callees_metadata(ptr %fp) #0 {
-; AMDGPU-LABEL: define {{[^@]+}}@spmd_callees_metadata
-; AMDGPU-SAME: (ptr [[FP:%.*]]) #[[ATTR0]] {
-; AMDGPU-NEXT:  entry:
+; AMDGPU-LABEL: define weak ptx_kernel void @spmd_callees_metadata(
+; AMDGPU-SAME: ptr [[FP:%.*]]) #[[ATTR0]] {
+; AMDGPU-NEXT:  [[ENTRY:.*:]]
 ; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-NEXT:    [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
 ; AMDGPU-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-NEXT:    [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
 ; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @spmd_callees_metadata_kernel_environment, ptr null)
 ; AMDGPU-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; AMDGPU-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
-; AMDGPU:       common.ret:
+; AMDGPU-NEXT:    br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]]
+; AMDGPU:       [[COMMON_RET]]:
 ; AMDGPU-NEXT:    ret void
-; AMDGPU:       user_code.entry:
+; AMDGPU:       [[USER_CODE_ENTRY]]:
 ; AMDGPU-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR10]]
 ; AMDGPU-NEXT:    store i32 0, ptr addrspace(5) [[DOTZERO_ADDR]], align 4
-; AMDGPU-NEXT:    store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-NEXT:    store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[INT_TBAA12]]
 ; AMDGPU-NEXT:    call void @__omp_outlined_spmd_amenable_external(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]])
 ; AMDGPU-NEXT:    call void @__kmpc_target_deinit()
-; AMDGPU-NEXT:    br label [[COMMON_RET]]
+; AMDGPU-NEXT:    br label %[[COMMON_RET]]
 ;
-; NVPTX-LABEL: define {{[^@]+}}@spmd_callees_metadata
-; NVPTX-SAME: (ptr [[FP:%.*]]) #[[ATTR0]] {
-; NVPTX-NEXT:  entry:
+; NVPTX-LABEL: define weak ptx_kernel void @spmd_callees_metadata(
+; NVPTX-SAME: ptr [[FP:%.*]]) #[[ATTR0]] {
+; NVPTX-NEXT:  [[ENTRY:.*:]]
 ; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
 ; NVPTX-NEXT:    [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
 ; NVPTX-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
 ; NVPTX-NEXT:    [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
 ; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @spmd_callees_metadata_kernel_environment, ptr null)
 ; NVPTX-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; NVPTX-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
-; NVPTX:       common.ret:
+; NVPTX-NEXT:    br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]]
+; NVPTX:       [[COMMON_RET]]:
 ; NVPTX-NEXT:    ret void
-; NVPTX:       user_code.entry:
+; NVPTX:       [[USER_CODE_ENTRY]]:
 ; NVPTX-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR10]]
 ; NVPTX-NEXT:    store i32 0, ptr addrspace(5) [[DOTZERO_ADDR]], align 4
-; NVPTX-NEXT:    store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
+; NVPTX-NEXT:    store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[INT_TBAA12]]
 ; NVPTX-NEXT:    call void @__omp_outlined_spmd_amenable_external(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]])
 ; NVPTX-NEXT:    call void @__kmpc_target_deinit()
-; NVPTX-NEXT:    br label [[COMMON_RET]]
+; NVPTX-NEXT:    br label %[[COMMON_RET]]
 ;
 entry:
   %.zero.addr = alloca ptr, align 8, addrspace(5)
@@ -739,9 +739,9 @@ user_code.entry:                                  ; preds = %entry
 
 ; Function Attrs: alwaysinline convergent norecurse nounwind
 define weak ptx_kernel void @spmd_and_non_spmd_callees_metadata(ptr %fp) #0 {
-; AMDGPU-LABEL: define {{[^@]+}}@spmd_and_non_spmd_callees_metadata
-; AMDGPU-SAME: (ptr [[FP:%.*]]) #[[ATTR0]] {
-; AMDGPU-NEXT:  entry:
+; AMDGPU-LABEL: define weak ptx_kernel void @spmd_and_non_spmd_callees_metadata(
+; AMDGPU-SAME: ptr [[FP:%.*]]) #[[ATTR0]] {
+; AMDGPU-NEXT:  [[ENTRY:.*:]]
 ; AMDGPU-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-NEXT:    [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
@@ -749,61 +749,61 @@ define weak ptx_kernel void @spmd_and_non_spmd_callees_metadata(ptr %fp) #0 {
 ; AMDGPU-NEXT:    [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
 ; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @spmd_and_non_spmd_callees_metadata_kernel_environment, ptr null)
 ; AMDGPU-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
-; AMDGPU-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
-; AMDGPU:       is_worker_check:
+; AMDGPU-NEXT:    br i1 [[THREAD_IS_WORKER]], label %[[IS_WORKER_CHECK:.*]], label %[[THREAD_USER_CODE_CHECK:.*]]
+; AMDGPU:       [[IS_WORKER_CHECK]]:
 ; AMDGPU-NEXT:    [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
 ; AMDGPU-NEXT:    [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
 ; AMDGPU-NEXT:    [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
 ; AMDGPU-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
-; AMDGPU-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
-; AMDGPU:       worker_state_machine.begin:
+; AMDGPU-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label %[[WORKER_STATE_MACHINE_BEGIN:.*]], label %[[WORKER_STATE_MACHINE_FINISHED:.*]]
+; AMDGPU:       [[WORKER_STATE_MACHINE_BEGIN]]:
 ; AMDGPU-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
 ; AMDGPU-NEXT:    [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
 ; AMDGPU-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
 ; AMDGPU-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
 ; AMDGPU-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
-; AMDGPU-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
-; AMDGPU:       worker_state_machine.finished:
+; AMDGPU-NEXT:    br i1 [[WORKER_IS_DONE]], label %[[WORKER_STATE_MACHINE_FINISHED]], label %[[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:.*]]
+; AMDGPU:       [[WORKER_STATE_MACHINE_FINISHED]]:
 ; AMDGPU-NEXT:    ret void
-; AMDGPU:       worker_state_machine.is_active.check:
-; AMDGPU-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
-; AMDGPU:       worker_state_machine.parallel_region.fallback.execute:
+; AMDGPU:       [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK]]:
+; AMDGPU-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:.*]], label %[[WORKER_STATE_MACHINE_DONE_BARRIER:.*]]
+; AMDGPU:       [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE]]:
 ; AMDGPU-NEXT:    call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]])
-; AMDGPU-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
-; AMDGPU:       worker_state_machine.parallel_region.end:
+; AMDGPU-NEXT:    br label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_END:.*]]
+; AMDGPU:       [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]:
 ; AMDGPU-NEXT:    call void @__kmpc_kernel_end_parallel()
-; AMDGPU-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
-; AMDGPU:       worker_state_machine.done.barrier:
+; AMDGPU-NEXT:    br label %[[WORKER_STATE_MACHINE_DONE_BARRIER]]
+; AMDGPU:       [[WORKER_STATE_MACHINE_DONE_BARRIER]]:
 ; AMDGPU-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; AMDGPU-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
-; AMDGPU:       thread.user_code.check:
+; AMDGPU-NEXT:    br label %[[WORKER_STATE_MACHINE_BEGIN]]
+; AMDGPU:       [[THREAD_USER_CODE_CHECK]]:
 ; AMDGPU-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; AMDGPU-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
-; AMDGPU:       common.ret:
+; AMDGPU-NEXT:    br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]]
+; AMDGPU:       [[COMMON_RET]]:
 ; AMDGPU-NEXT:    ret void
-; AMDGPU:       user_code.entry:
+; AMDGPU:       [[USER_CODE_ENTRY]]:
 ; AMDGPU-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR10]]
 ; AMDGPU-NEXT:    store i32 0, ptr addrspace(5) [[DOTZERO_ADDR]], align 4
-; AMDGPU-NEXT:    store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-NEXT:    store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[INT_TBAA12]]
 ; AMDGPU-NEXT:    [[TMP2:%.*]] = icmp eq ptr [[FP]], @__omp_outlined_spmd_amenable_external
-; AMDGPU-NEXT:    br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]]
-; AMDGPU:       3:
+; AMDGPU-NEXT:    br i1 [[TMP2]], label %[[BB3:.*]], label %[[BB4:.*]]
+; AMDGPU:       [[BB3]]:
 ; AMDGPU-NEXT:    call void @__omp_outlined_spmd_amenable_external(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]])
-; AMDGPU-NEXT:    br label [[TMP7:%.*]]
-; AMDGPU:       4:
-; AMDGPU-NEXT:    br i1 true, label [[TMP5:%.*]], label [[TMP6:%.*]]
-; AMDGPU:       5:
+; AMDGPU-NEXT:    br label %[[BB7:.*]]
+; AMDGPU:       [[BB4]]:
+; AMDGPU-NEXT:    br i1 true, label %[[BB5:.*]], label %[[BB6:.*]]
+; AMDGPU:       [[BB5]]:
 ; AMDGPU-NEXT:    call void @__omp_outlined_not_spmd_amenable_external(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]])
-; AMDGPU-NEXT:    br label [[TMP7]]
-; AMDGPU:       6:
+; AMDGPU-NEXT:    br label %[[BB7]]
+; AMDGPU:       [[BB6]]:
 ; AMDGPU-NEXT:    unreachable
-; AMDGPU:       7:
+; AMDGPU:       [[BB7]]:
 ; AMDGPU-NEXT:    call void @__kmpc_target_deinit()
-; AMDGPU-NEXT:    br label [[COMMON_RET]]
+; AMDGPU-NEXT:    br label %[[COMMON_RET]]
 ;
-; NVPTX-LABEL: define {{[^@]+}}@spmd_and_non_spmd_callees_metadata
-; NVPTX-SAME: (ptr [[FP:%.*]]) #[[ATTR0]] {
-; NVPTX-NEXT:  entry:
+; NVPTX-LABEL: define weak ptx_kernel void @spmd_and_non_spmd_callees_metadata(
+; NVPTX-SAME: ptr [[FP:%.*]]) #[[ATTR0]] {
+; NVPTX-NEXT:  [[ENTRY:.*:]]
 ; NVPTX-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
 ; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
 ; NVPTX-NEXT:    [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
@@ -811,56 +811,56 @@ define weak ptx_kernel void @spmd_and_non_spmd_callees_metadata(ptr %fp) #0 {
 ; NVPTX-NEXT:    [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
 ; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @spmd_and_non_spmd_callees_metadata_kernel_environment, ptr null)
 ; NVPTX-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
-; NVPTX-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
-; NVPTX:       is_worker_check:
+; NVPTX-NEXT:    br i1 [[THREAD_IS_WORKER]], label %[[IS_WORKER_CHECK:.*]], label %[[THREAD_USER_CODE_CHECK:.*]]
+; NVPTX:       [[IS_WORKER_CHECK]]:
 ; NVPTX-NEXT:    [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
 ; NVPTX-NEXT:    [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
 ; NVPTX-NEXT:    [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
 ; NVPTX-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
-; NVPTX-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
-; NVPTX:       worker_state_machine.begin:
+; NVPTX-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label %[[WORKER_STATE_MACHINE_BEGIN:.*]], label %[[WORKER_STATE_MACHINE_FINISHED:.*]]
+; NVPTX:       [[WORKER_STATE_MACHINE_BEGIN]]:
 ; NVPTX-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
 ; NVPTX-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
 ; NVPTX-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
 ; NVPTX-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
-; NVPTX-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
-; NVPTX:       worker_state_machine.finished:
+; NVPTX-NEXT:    br i1 [[WORKER_IS_DONE]], label %[[WORKER_STATE_MACHINE_FINISHED]], label %[[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:.*]]
+; NVPTX:       [[WORKER_STATE_MACHINE_FINISHED]]:
 ; NVPTX-NEXT:    ret void
-; NVPTX:       worker_state_machine.is_active.check:
-; NVPTX-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
-; NVPTX:       worker_state_machine.parallel_region.fallback.execute:
+; NVPTX:       [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK]]:
+; NVPTX-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:.*]], label %[[WORKER_STATE_MACHINE_DONE_BARRIER:.*]]
+; NVPTX:       [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE]]:
 ; NVPTX-NEXT:    call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]])
-; NVPTX-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
-; NVPTX:       worker_state_machine.parallel_region.end:
+; NVPTX-NEXT:    br label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_END:.*]]
+; NVPTX:       [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]:
 ; NVPTX-NEXT:    call void @__kmpc_kernel_end_parallel()
-; NVPTX-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
-; NVPTX:       worker_state_machine.done.barrier:
+; NVPTX-NEXT:    br label %[[WORKER_STATE_MACHINE_DONE_BARRIER]]
+; NVPTX:       [[WORKER_STATE_MACHINE_DONE_BARRIER]]:
 ; NVPTX-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; NVPTX-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
-; NVPTX:       thread.user_code.check:
+; NVPTX-NEXT:    br label %[[WORKER_STATE_MACHINE_BEGIN]]
+; NVPTX:       [[THREAD_USER_CODE_CHECK]]:
 ; NVPTX-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; NVPTX-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
-; NVPTX:       common.ret:
+; NVPTX-NEXT:    br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]]
+; NVPTX:       [[COMMON_RET]]:
 ; NVPTX-NEXT:    ret void
-; NVPTX:       user_code.entry:
+; NVPTX:       [[USER_CODE_ENTRY]]:
 ; NVPTX-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR10]]
 ; NVPTX-NEXT:    store i32 0, ptr addrspace(5) [[DOTZERO_ADDR]], align 4
-; NVPTX-NEXT:    store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
+; NVPTX-NEXT:    store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[INT_TBAA12]]
 ; NVPTX-NEXT:    [[TMP2:%.*]] = icmp eq ptr [[FP]], @__omp_outlined_spmd_amenable_external
-; NVPTX-NEXT:    br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]]
-; NVPTX:       3:
+; NVPTX-NEXT:    br i1 [[TMP2]], label %[[BB3:.*]], label %[[BB4:.*]]
+; NVPTX:       [[BB3]]:
 ; NVPTX-NEXT:    call void @__omp_outlined_spmd_amenable_external(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]])
-; NVPTX-NEXT:    br label [[TMP7:%.*]]
-; NVPTX:       4:
-; NVPTX-NEXT:    br i1 true, label [[TMP5:%.*]], label [[TMP6:%.*]]
-; NVPTX:       5:
+; NVPTX-NEXT:    br label %[[BB7:.*]]
+; NVPTX:       [[BB4]]:
+; NVPTX-NEXT:    br i1 true, label %[[BB5:.*]], label %[[BB6:.*]]
+; NVPTX:       [[BB5]]:
 ; NVPTX-NEXT:    call void @__omp_outlined_not_spmd_amenable_external(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]])
-; NVPTX-NEXT:    br label [[TMP7]]
-; NVPTX:       6:
+; NVPTX-NEXT:    br label %[[BB7]]
+; NVPTX:       [[BB6]]:
 ; NVPTX-NEXT:    unreachable
-; NVPTX:       7:
+; NVPTX:       [[BB7]]:
 ; NVPTX-NEXT:    call void @__kmpc_target_deinit()
-; NVPTX-NEXT:    br label [[COMMON_RET]]
+; NVPTX-NEXT:    br label %[[COMMON_RET]]
 ;
 entry:
   %.zero.addr = alloca ptr, align 8, addrspace(5)
@@ -884,39 +884,39 @@ user_code.entry:                                  ; preds = %entry
 }
 
 define void @__omp_outlined_spmd_amenable_external(ptr noalias %.global_tid., ptr noalias %.bound_tid.) {
-; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined_spmd_amenable_external
-; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
-; AMDGPU-NEXT:  entry:
-; AMDGPU-NEXT:    br label [[FOR_COND:%.*]]
-; AMDGPU:       for.cond:
-; AMDGPU-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
+; AMDGPU-LABEL: define void @__omp_outlined_spmd_amenable_external(
+; AMDGPU-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
+; AMDGPU-NEXT:  [[ENTRY:.*]]:
+; AMDGPU-NEXT:    br label %[[FOR_COND:.*]]
+; AMDGPU:       [[FOR_COND]]:
+; AMDGPU-NEXT:    [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY:.*]] ]
 ; AMDGPU-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
-; AMDGPU-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
-; AMDGPU:       for.cond.cleanup:
+; AMDGPU-NEXT:    br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP:.*]]
+; AMDGPU:       [[FOR_COND_CLEANUP]]:
 ; AMDGPU-NEXT:    call void @spmd_amenable() #[[ATTR6]]
 ; AMDGPU-NEXT:    ret void
-; AMDGPU:       for.body:
-; AMDGPU-NEXT:    [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
+; AMDGPU:       [[FOR_BODY]]:
+; AMDGPU-NEXT:    [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[INT_TBAA12]]
 ; AMDGPU-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr undef, i64 0)
 ; AMDGPU-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
-; AMDGPU-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
+; AMDGPU-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
 ;
-; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined_spmd_amenable_external
-; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
-; NVPTX-NEXT:  entry:
-; NVPTX-NEXT:    br label [[FOR_COND:%.*]]
-; NVPTX:       for.cond:
-; NVPTX-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
+; NVPTX-LABEL: define void @__omp_outlined_spmd_amenable_external(
+; NVPTX-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
+; NVPTX-NEXT:  [[ENTRY:.*]]:
+; NVPTX-NEXT:    br label %[[FOR_COND:.*]]
+; NVPTX:       [[FOR_COND]]:
+; NVPTX-NEXT:    [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY:.*]] ]
 ; NVPTX-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
-; NVPTX-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
-; NVPTX:       for.cond.cleanup:
+; NVPTX-NEXT:    br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP:.*]]
+; NVPTX:       [[FOR_COND_CLEANUP]]:
 ; NVPTX-NEXT:    call void @spmd_amenable() #[[ATTR6]]
 ; NVPTX-NEXT:    ret void
-; NVPTX:       for.body:
-; NVPTX-NEXT:    [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
+; NVPTX:       [[FOR_BODY]]:
+; NVPTX-NEXT:    [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[INT_TBAA12]]
 ; NVPTX-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr undef, i64 0)
 ; NVPTX-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
-; NVPTX-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
+; NVPTX-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
 ;
 entry:
   br label %for.cond
@@ -938,14 +938,14 @@ for.body:                                         ; preds = %for.cond
 }
 
 define internal void @__omp_outlined__7(ptr noalias %.global_tid., ptr noalias %.bound_tid., ptr nonnull align 4 dereferenceable(4) %x) {
-; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__7
-; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
-; AMDGPU-NEXT:  entry:
+; AMDGPU-LABEL: define internal void @__omp_outlined__7(
+; AMDGPU-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
+; AMDGPU-NEXT:  [[ENTRY:.*:]]
 ; AMDGPU-NEXT:    ret void
 ;
-; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__7
-; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
-; NVPTX-NEXT:  entry:
+; NVPTX-LABEL: define internal void @__omp_outlined__7(
+; NVPTX-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
+; NVPTX-NEXT:  [[ENTRY:.*:]]
 ; NVPTX-NEXT:    ret void
 ;
 entry:
@@ -954,14 +954,14 @@ entry:
 
 ; Function Attrs: convergent norecurse nounwind
 define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #1 {
-; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
-; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] {
-; AMDGPU-NEXT:  entry:
+; AMDGPU-LABEL: define internal void @__omp_outlined__7_wrapper(
+; AMDGPU-SAME: i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] {
+; AMDGPU-NEXT:  [[ENTRY:.*:]]
 ; AMDGPU-NEXT:    ret void
 ;
-; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
-; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] {
-; NVPTX-NEXT:  entry:
+; NVPTX-LABEL: define internal void @__omp_outlined__7_wrapper(
+; NVPTX-SAME: i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] {
+; NVPTX-NEXT:  [[ENTRY:.*:]]
 ; NVPTX-NEXT:    ret void
 ;
 entry:
@@ -969,13 +969,13 @@ entry:
 }
 
 define void @__omp_outlined_not_spmd_amenable_external(ptr noalias %.global_tid., ptr noalias %.bound_tid.) {
-; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined_not_spmd_amenable_external
-; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
+; AMDGPU-LABEL: define void @__omp_outlined_not_spmd_amenable_external(
+; AMDGPU-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
 ; AMDGPU-NEXT:    call void @__omp_outlined_not_spmd_amenable(ptr [[DOTGLOBAL_TID_]], ptr [[DOTBOUND_TID_]])
 ; AMDGPU-NEXT:    ret void
 ;
-; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined_not_spmd_amenable_external
-; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
+; NVPTX-LABEL: define void @__omp_outlined_not_spmd_amenable_external(
+; NVPTX-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
 ; NVPTX-NEXT:    call void @__omp_outlined_not_spmd_amenable(ptr [[DOTGLOBAL_TID_]], ptr [[DOTBOUND_TID_]])
 ; NVPTX-NEXT:    ret void
 ;
@@ -984,15 +984,15 @@ define void @__omp_outlined_not_spmd_amenable_external(ptr noalias %.global_tid.
 }
 
 define internal void @__omp_outlined_not_spmd_amenable(ptr noalias %.global_tid., ptr noalias %.bound_tid.) {
-; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined_not_spmd_amenable
-; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
-; AMDGPU-NEXT:  entry:
+; AMDGPU-LABEL: define internal void @__omp_outlined_not_spmd_amenable(
+; AMDGPU-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
+; AMDGPU-NEXT:  [[ENTRY:.*:]]
 ; AMDGPU-NEXT:    call void @unknown() #[[ATTR7]]
 ; AMDGPU-NEXT:    ret void
 ;
-; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined_not_spmd_amenable
-; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
-; NVPTX-NEXT:  entry:
+; NVPTX-LABEL: define internal void @__omp_outlined_not_spmd_amenable(
+; NVPTX-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
+; NVPTX-NEXT:  [[ENTRY:.*:]]
 ; NVPTX-NEXT:    call void @unknown() #[[ATTR7]]
 ; NVPTX-NEXT:    ret void
 ;
@@ -1020,12 +1020,12 @@ declare void @unknowni32p(ptr) #5
 declare void @llvm.lifetime.start.p0(ptr captures(none)) #6
 
 define weak i32 @__kmpc_target_init(ptr %0, ptr %1) {
-; AMDGPU-LABEL: define {{[^@]+}}@__kmpc_target_init
-; AMDGPU-SAME: (ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) {
+; AMDGPU-LABEL: define weak i32 @__kmpc_target_init(
+; AMDGPU-SAME: ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) {
 ; AMDGPU-NEXT:    ret i32 0
 ;
-; NVPTX-LABEL: define {{[^@]+}}@__kmpc_target_init
-; NVPTX-SAME: (ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) {
+; NVPTX-LABEL: define weak i32 @__kmpc_target_init(
+; NVPTX-SAME: ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) {
 ; NVPTX-NEXT:    ret i32 0
 ;
   ret i32 0
@@ -1150,7 +1150,7 @@ attributes #8 = { nounwind }
 ; AMDGPU: [[META9:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
 ; AMDGPU: [[META10:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
 ; AMDGPU: [[META11:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
-; AMDGPU: [[TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0}
+; AMDGPU: [[INT_TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0}
 ; AMDGPU: [[META13]] = !{!"int", [[META14:![0-9]+]], i64 0}
 ; AMDGPU: [[META14]] = !{!"omnipotent char", [[META15:![0-9]+]], i64 0}
 ; AMDGPU: [[META15]] = !{!"Simple C/C++ TBAA"}
@@ -1158,7 +1158,7 @@ attributes #8 = { nounwind }
 ; AMDGPU: [[META17]] = !{!"llvm.loop.mustprogress"}
 ; AMDGPU: [[META18]] = !{!"llvm.loop.unroll.disable"}
 ; AMDGPU: [[LOOP19]] = distinct !{[[LOOP19]], [[META17]], [[META18]]}
-; AMDGPU: [[TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0}
+; AMDGPU: [[ANYPTR_TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0}
 ; AMDGPU: [[META21]] = !{!"any pointer", [[META14]], i64 0}
 ; AMDGPU: [[LOOP22]] = distinct !{[[LOOP22]], [[META17]], [[META18]]}
 ; AMDGPU: [[LOOP23]] = distinct !{[[LOOP23]], [[META17]], [[META18]]}
@@ -1175,7 +1175,7 @@ attributes #8 = { nounwind }
 ; NVPTX: [[META9:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
 ; NVPTX: [[META10:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
 ; NVPTX: [[META11:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
-; NVPTX: [[TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0}
+; NVPTX: [[INT_TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0}
 ; NVPTX: [[META13]] = !{!"int", [[META14:![0-9]+]], i64 0}
 ; NVPTX: [[META14]] = !{!"omnipotent char", [[META15:![0-9]+]], i64 0}
 ; NVPTX: [[META15]] = !{!"Simple C/C++ TBAA"}
@@ -1183,7 +1183,7 @@ attributes #8 = { nounwind }
 ; NVPTX: [[META17]] = !{!"llvm.loop.mustprogress"}
 ; NVPTX: [[META18]] = !{!"llvm.loop.unroll.disable"}
 ; NVPTX: [[LOOP19]] = distinct !{[[LOOP19]], [[META17]], [[META18]]}
-; NVPTX: [[TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0}
+; NVPTX: [[ANYPTR_TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0}
 ; NVPTX: [[META21]] = !{!"any pointer", [[META14]], i64 0}
 ; NVPTX: [[LOOP22]] = distinct !{[[LOOP22]], [[META17]], [[META18]]}
 ; NVPTX: [[LOOP23]] = distinct !{[[LOOP23]], [[META17]], [[META18]]}
diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/slpordering.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/slpordering.ll
index 1fe3fde..92e625d 100644
--- a/llvm/test/Transforms/PhaseOrdering/AArch64/slpordering.ll
+++ b/llvm/test/Transforms/PhaseOrdering/AArch64/slpordering.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt -S -O3 < %s | FileCheck %s
 
 ; Check unrolling / SLP vectorization where the order of lanes is important for
@@ -11,9 +11,9 @@ target triple = "aarch64"
 
 ; Function Attrs: nounwind uwtable
 define i32 @slpordering(ptr noundef %p1, i32 noundef %ip1, ptr noundef %p2, i32 noundef %ip2) #0 {
-; CHECK-LABEL: define range(i32 0, 65536) i32 @slpordering
-; CHECK-SAME: (ptr noundef readonly captures(none) [[P1:%.*]], i32 noundef [[IP1:%.*]], ptr noundef readonly captures(none) [[P2:%.*]], i32 noundef [[IP2:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT:  entry:
+; CHECK-LABEL: define range(i32 0, 65536) i32 @slpordering(
+; CHECK-SAME: ptr noundef readonly captures(none) [[P1:%.*]], i32 noundef [[IP1:%.*]], ptr noundef readonly captures(none) [[P2:%.*]], i32 noundef [[IP2:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[IDX_EXT:%.*]] = sext i32 [[IP1]] to i64
 ; CHECK-NEXT:    [[IDX_EXT63:%.*]] = sext i32 [[IP2]] to i64
 ; CHECK-NEXT:    [[RRRAYIDX3:%.*]] = getelementptr inbounds nuw i8, ptr [[P1]], i64 4
@@ -30,26 +30,26 @@ define i32 @slpordering(ptr noundef %p1, i32 noundef %ip1, ptr noundef %p2, i32
 ; CHECK-NEXT:    [[RDD_PTR64_2:%.*]] = getelementptr inbounds i8, ptr [[RDD_PTR64_1]], i64 [[IDX_EXT63]]
 ; CHECK-NEXT:    [[RRRAYIDX3_3:%.*]] = getelementptr inbounds nuw i8, ptr [[RDD_PTR_2]], i64 4
 ; CHECK-NEXT:    [[RRRAYIDX5_3:%.*]] = getelementptr inbounds nuw i8, ptr [[RDD_PTR64_2]], i64 4
-; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i8>, ptr [[P1]], align 1, !tbaa [[TBAA0:![0-9]+]]
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i8>, ptr [[P2]], align 1, !tbaa [[TBAA0]]
-; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i8>, ptr [[RRRAYIDX3]], align 1, !tbaa [[TBAA0]]
-; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i8>, ptr [[RRRAYIDX5]], align 1, !tbaa [[TBAA0]]
-; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i8>, ptr [[RDD_PTR]], align 1, !tbaa [[TBAA0]]
-; CHECK-NEXT:    [[TMP5:%.*]] = load <4 x i8>, ptr [[RDD_PTR64]], align 1, !tbaa [[TBAA0]]
-; CHECK-NEXT:    [[TMP6:%.*]] = load <4 x i8>, ptr [[RRRAYIDX3_1]], align 1, !tbaa [[TBAA0]]
-; CHECK-NEXT:    [[TMP7:%.*]] = load <4 x i8>, ptr [[RRRAYIDX5_1]], align 1, !tbaa [[TBAA0]]
-; CHECK-NEXT:    [[TMP8:%.*]] = load <4 x i8>, ptr [[RDD_PTR_1]], align 1, !tbaa [[TBAA0]]
-; CHECK-NEXT:    [[TMP9:%.*]] = load <4 x i8>, ptr [[RDD_PTR64_1]], align 1, !tbaa [[TBAA0]]
-; CHECK-NEXT:    [[TMP10:%.*]] = load <4 x i8>, ptr [[RRRAYIDX3_2]], align 1, !tbaa [[TBAA0]]
-; CHECK-NEXT:    [[TMP11:%.*]] = load <4 x i8>, ptr [[RRRAYIDX5_2]], align 1, !tbaa [[TBAA0]]
-; CHECK-NEXT:    [[TMP12:%.*]] = load <4 x i8>, ptr [[RDD_PTR_2]], align 1, !tbaa [[TBAA0]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i8>, ptr [[P1]], align 1, !tbaa [[CHAR_TBAA0:![0-9]+]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i8>, ptr [[P2]], align 1, !tbaa [[CHAR_TBAA0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i8>, ptr [[RRRAYIDX3]], align 1, !tbaa [[CHAR_TBAA0]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i8>, ptr [[RRRAYIDX5]], align 1, !tbaa [[CHAR_TBAA0]]
+; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i8>, ptr [[RDD_PTR]], align 1, !tbaa [[CHAR_TBAA0]]
+; CHECK-NEXT:    [[TMP5:%.*]] = load <4 x i8>, ptr [[RDD_PTR64]], align 1, !tbaa [[CHAR_TBAA0]]
+; CHECK-NEXT:    [[TMP6:%.*]] = load <4 x i8>, ptr [[RRRAYIDX3_1]], align 1, !tbaa [[CHAR_TBAA0]]
+; CHECK-NEXT:    [[TMP7:%.*]] = load <4 x i8>, ptr [[RRRAYIDX5_1]], align 1, !tbaa [[CHAR_TBAA0]]
+; CHECK-NEXT:    [[TMP8:%.*]] = load <4 x i8>, ptr [[RDD_PTR_1]], align 1, !tbaa [[CHAR_TBAA0]]
+; CHECK-NEXT:    [[TMP9:%.*]] = load <4 x i8>, ptr [[RDD_PTR64_1]], align 1, !tbaa [[CHAR_TBAA0]]
+; CHECK-NEXT:    [[TMP10:%.*]] = load <4 x i8>, ptr [[RRRAYIDX3_2]], align 1, !tbaa [[CHAR_TBAA0]]
+; CHECK-NEXT:    [[TMP11:%.*]] = load <4 x i8>, ptr [[RRRAYIDX5_2]], align 1, !tbaa [[CHAR_TBAA0]]
+; CHECK-NEXT:    [[TMP12:%.*]] = load <4 x i8>, ptr [[RDD_PTR_2]], align 1, !tbaa [[CHAR_TBAA0]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <4 x i8> [[TMP0]], <4 x i8> [[TMP4]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[TMP14:%.*]] = shufflevector <4 x i8> [[TMP8]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[TMP15:%.*]] = shufflevector <16 x i8> [[TMP13]], <16 x i8> [[TMP14]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 poison, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[TMP16:%.*]] = shufflevector <4 x i8> [[TMP12]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[TMP17:%.*]] = shufflevector <16 x i8> [[TMP15]], <16 x i8> [[TMP16]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
 ; CHECK-NEXT:    [[TMP18:%.*]] = zext <16 x i8> [[TMP17]] to <16 x i32>
-; CHECK-NEXT:    [[TMP19:%.*]] = load <4 x i8>, ptr [[RDD_PTR64_2]], align 1, !tbaa [[TBAA0]]
+; CHECK-NEXT:    [[TMP19:%.*]] = load <4 x i8>, ptr [[RDD_PTR64_2]], align 1, !tbaa [[CHAR_TBAA0]]
 ; CHECK-NEXT:    [[TMP20:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> [[TMP5]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[TMP21:%.*]] = shufflevector <4 x i8> [[TMP9]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[TMP22:%.*]] = shufflevector <16 x i8> [[TMP20]], <16 x i8> [[TMP21]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 poison, i32 poison, i32 poison, i32 poison>
@@ -57,14 +57,14 @@ define i32 @slpordering(ptr noundef %p1, i32 noundef %ip1, ptr noundef %p2, i32
 ; CHECK-NEXT:    [[TMP24:%.*]] = shufflevector <16 x i8> [[TMP22]], <16 x i8> [[TMP23]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
 ; CHECK-NEXT:    [[TMP25:%.*]] = zext <16 x i8> [[TMP24]] to <16 x i32>
 ; CHECK-NEXT:    [[TMP26:%.*]] = sub nsw <16 x i32> [[TMP18]], [[TMP25]]
-; CHECK-NEXT:    [[TMP27:%.*]] = load <4 x i8>, ptr [[RRRAYIDX3_3]], align 1, !tbaa [[TBAA0]]
+; CHECK-NEXT:    [[TMP27:%.*]] = load <4 x i8>, ptr [[RRRAYIDX3_3]], align 1, !tbaa [[CHAR_TBAA0]]
 ; CHECK-NEXT:    [[TMP28:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> [[TMP6]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[TMP29:%.*]] = shufflevector <4 x i8> [[TMP10]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[TMP30:%.*]] = shufflevector <16 x i8> [[TMP28]], <16 x i8> [[TMP29]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 poison, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[TMP31:%.*]] = shufflevector <4 x i8> [[TMP27]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[TMP32:%.*]] = shufflevector <16 x i8> [[TMP30]], <16 x i8> [[TMP31]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
 ; CHECK-NEXT:    [[TMP33:%.*]] = zext <16 x i8> [[TMP32]] to <16 x i32>
-; CHECK-NEXT:    [[TMP34:%.*]] = load <4 x i8>, ptr [[RRRAYIDX5_3]], align 1, !tbaa [[TBAA0]]
+; CHECK-NEXT:    [[TMP34:%.*]] = load <4 x i8>, ptr [[RRRAYIDX5_3]], align 1, !tbaa [[CHAR_TBAA0]]
 ; CHECK-NEXT:    [[TMP35:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> [[TMP7]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[TMP36:%.*]] = shufflevector <4 x i8> [[TMP11]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[TMP37:%.*]] = shufflevector <16 x i8> [[TMP35]], <16 x i8> [[TMP36]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 poison, i32 poison, i32 poison, i32 poison>
@@ -482,3 +482,8 @@ attributes #2 = { nounwind }
 !11 = distinct !{!11, !12}
 !12 = !{!"llvm.loop.mustprogress"}
 !13 = distinct !{!13, !12}
+;.
+; CHECK: [[CHAR_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
+; CHECK: [[META1]] = !{!"omnipotent char", [[META2:![0-9]+]], i64 0}
+; CHECK: [[META2]] = !{!"Simple C/C++ TBAA"}
+;.
diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/udotabd.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/udotabd.ll
index 0967736..4c7e39d 100644
--- a/llvm/test/Transforms/PhaseOrdering/AArch64/udotabd.ll
+++ b/llvm/test/Transforms/PhaseOrdering/AArch64/udotabd.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt -S -O3 < %s | FileCheck %s --check-prefixes=CHECK-O3
 ; RUN: opt -S -passes="default<O3>,default<O3>" < %s | FileCheck %s --check-prefixes=CHECK-LTO
 
@@ -11,9 +11,9 @@ define dso_local i32 @test(ptr noundef %p1, i32 noundef %s_p1, ptr noundef %p2,
 ; CHECK-O3-NEXT:  [[ENTRY:.*:]]
 ; CHECK-O3-NEXT:    [[IDX_EXT8:%.*]] = sext i32 [[S_P2]] to i64
 ; CHECK-O3-NEXT:    [[IDX_EXT:%.*]] = sext i32 [[S_P1]] to i64
-; CHECK-O3-NEXT:    [[TMP0:%.*]] = load <16 x i8>, ptr [[P1]], align 1, !tbaa [[TBAA0:![0-9]+]]
+; CHECK-O3-NEXT:    [[TMP0:%.*]] = load <16 x i8>, ptr [[P1]], align 1, !tbaa [[CHAR_TBAA0:![0-9]+]]
 ; CHECK-O3-NEXT:    [[TMP1:%.*]] = zext <16 x i8> [[TMP0]] to <16 x i16>
-; CHECK-O3-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[P2]], align 1, !tbaa [[TBAA0]]
+; CHECK-O3-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[P2]], align 1, !tbaa [[CHAR_TBAA0]]
 ; CHECK-O3-NEXT:    [[TMP3:%.*]] = zext <16 x i8> [[TMP2]] to <16 x i16>
 ; CHECK-O3-NEXT:    [[TMP4:%.*]] = sub nsw <16 x i16> [[TMP1]], [[TMP3]]
 ; CHECK-O3-NEXT:    [[TMP5:%.*]] = tail call <16 x i16> @llvm.abs.v16i16(<16 x i16> [[TMP4]], i1 false)
@@ -21,9 +21,9 @@ define dso_local i32 @test(ptr noundef %p1, i32 noundef %s_p1, ptr noundef %p2,
 ; CHECK-O3-NEXT:    [[TMP7:%.*]] = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP6]])
 ; CHECK-O3-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[IDX_EXT]]
 ; CHECK-O3-NEXT:    [[ADD_PTR9:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[IDX_EXT8]]
-; CHECK-O3-NEXT:    [[TMP8:%.*]] = load <16 x i8>, ptr [[ADD_PTR]], align 1, !tbaa [[TBAA0]]
+; CHECK-O3-NEXT:    [[TMP8:%.*]] = load <16 x i8>, ptr [[ADD_PTR]], align 1, !tbaa [[CHAR_TBAA0]]
 ; CHECK-O3-NEXT:    [[TMP9:%.*]] = zext <16 x i8> [[TMP8]] to <16 x i16>
-; CHECK-O3-NEXT:    [[TMP10:%.*]] = load <16 x i8>, ptr [[ADD_PTR9]], align 1, !tbaa [[TBAA0]]
+; CHECK-O3-NEXT:    [[TMP10:%.*]] = load <16 x i8>, ptr [[ADD_PTR9]], align 1, !tbaa [[CHAR_TBAA0]]
 ; CHECK-O3-NEXT:    [[TMP11:%.*]] = zext <16 x i8> [[TMP10]] to <16 x i16>
 ; CHECK-O3-NEXT:    [[TMP12:%.*]] = sub nsw <16 x i16> [[TMP9]], [[TMP11]]
 ; CHECK-O3-NEXT:    [[TMP13:%.*]] = tail call <16 x i16> @llvm.abs.v16i16(<16 x i16> [[TMP12]], i1 false)
@@ -32,9 +32,9 @@ define dso_local i32 @test(ptr noundef %p1, i32 noundef %s_p1, ptr noundef %p2,
 ; CHECK-O3-NEXT:    [[OP_RDX_1:%.*]] = add i32 [[TMP15]], [[TMP7]]
 ; CHECK-O3-NEXT:    [[ADD_PTR_1:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR]], i64 [[IDX_EXT]]
 ; CHECK-O3-NEXT:    [[ADD_PTR9_1:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR9]], i64 [[IDX_EXT8]]
-; CHECK-O3-NEXT:    [[TMP16:%.*]] = load <16 x i8>, ptr [[ADD_PTR_1]], align 1, !tbaa [[TBAA0]]
+; CHECK-O3-NEXT:    [[TMP16:%.*]] = load <16 x i8>, ptr [[ADD_PTR_1]], align 1, !tbaa [[CHAR_TBAA0]]
 ; CHECK-O3-NEXT:    [[TMP17:%.*]] = zext <16 x i8> [[TMP16]] to <16 x i16>
-; CHECK-O3-NEXT:    [[TMP18:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_1]], align 1, !tbaa [[TBAA0]]
+; CHECK-O3-NEXT:    [[TMP18:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_1]], align 1, !tbaa [[CHAR_TBAA0]]
 ; CHECK-O3-NEXT:    [[TMP19:%.*]] = zext <16 x i8> [[TMP18]] to <16 x i16>
 ; CHECK-O3-NEXT:    [[TMP20:%.*]] = sub nsw <16 x i16> [[TMP17]], [[TMP19]]
 ; CHECK-O3-NEXT:    [[TMP21:%.*]] = tail call <16 x i16> @llvm.abs.v16i16(<16 x i16> [[TMP20]], i1 false)
@@ -43,9 +43,9 @@ define dso_local i32 @test(ptr noundef %p1, i32 noundef %s_p1, ptr noundef %p2,
 ; CHECK-O3-NEXT:    [[OP_RDX_2:%.*]] = add i32 [[TMP23]], [[OP_RDX_1]]
 ; CHECK-O3-NEXT:    [[ADD_PTR_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_1]], i64 [[IDX_EXT]]
 ; CHECK-O3-NEXT:    [[ADD_PTR9_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR9_1]], i64 [[IDX_EXT8]]
-; CHECK-O3-NEXT:    [[TMP24:%.*]] = load <16 x i8>, ptr [[ADD_PTR_2]], align 1, !tbaa [[TBAA0]]
+; CHECK-O3-NEXT:    [[TMP24:%.*]] = load <16 x i8>, ptr [[ADD_PTR_2]], align 1, !tbaa [[CHAR_TBAA0]]
 ; CHECK-O3-NEXT:    [[TMP25:%.*]] = zext <16 x i8> [[TMP24]] to <16 x i16>
-; CHECK-O3-NEXT:    [[TMP26:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_2]], align 1, !tbaa [[TBAA0]]
+; CHECK-O3-NEXT:    [[TMP26:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_2]], align 1, !tbaa [[CHAR_TBAA0]]
 ; CHECK-O3-NEXT:    [[TMP27:%.*]] = zext <16 x i8> [[TMP26]] to <16 x i16>
 ; CHECK-O3-NEXT:    [[TMP28:%.*]] = sub nsw <16 x i16> [[TMP25]], [[TMP27]]
 ; CHECK-O3-NEXT:    [[TMP29:%.*]] = tail call <16 x i16> @llvm.abs.v16i16(<16 x i16> [[TMP28]], i1 false)
@@ -54,9 +54,9 @@ define dso_local i32 @test(ptr noundef %p1, i32 noundef %s_p1, ptr noundef %p2,
 ; CHECK-O3-NEXT:    [[OP_RDX_3:%.*]] = add i32 [[TMP31]], [[OP_RDX_2]]
 ; CHECK-O3-NEXT:    [[ADD_PTR_3:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_2]], i64 [[IDX_EXT]]
 ; CHECK-O3-NEXT:    [[ADD_PTR9_3:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR9_2]], i64 [[IDX_EXT8]]
-; CHECK-O3-NEXT:    [[TMP32:%.*]] = load <16 x i8>, ptr [[ADD_PTR_3]], align 1, !tbaa [[TBAA0]]
+; CHECK-O3-NEXT:    [[TMP32:%.*]] = load <16 x i8>, ptr [[ADD_PTR_3]], align 1, !tbaa [[CHAR_TBAA0]]
 ; CHECK-O3-NEXT:    [[TMP33:%.*]] = zext <16 x i8> [[TMP32]] to <16 x i16>
-; CHECK-O3-NEXT:    [[TMP34:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_3]], align 1, !tbaa [[TBAA0]]
+; CHECK-O3-NEXT:    [[TMP34:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_3]], align 1, !tbaa [[CHAR_TBAA0]]
 ; CHECK-O3-NEXT:    [[TMP35:%.*]] = zext <16 x i8> [[TMP34]] to <16 x i16>
 ; CHECK-O3-NEXT:    [[TMP36:%.*]] = sub nsw <16 x i16> [[TMP33]], [[TMP35]]
 ; CHECK-O3-NEXT:    [[TMP37:%.*]] = tail call <16 x i16> @llvm.abs.v16i16(<16 x i16> [[TMP36]], i1 false)
@@ -65,9 +65,9 @@ define dso_local i32 @test(ptr noundef %p1, i32 noundef %s_p1, ptr noundef %p2,
 ; CHECK-O3-NEXT:    [[OP_RDX_4:%.*]] = add i32 [[TMP39]], [[OP_RDX_3]]
 ; CHECK-O3-NEXT:    [[ADD_PTR_4:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_3]], i64 [[IDX_EXT]]
 ; CHECK-O3-NEXT:    [[ADD_PTR9_4:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR9_3]], i64 [[IDX_EXT8]]
-; CHECK-O3-NEXT:    [[TMP40:%.*]] = load <16 x i8>, ptr [[ADD_PTR_4]], align 1, !tbaa [[TBAA0]]
+; CHECK-O3-NEXT:    [[TMP40:%.*]] = load <16 x i8>, ptr [[ADD_PTR_4]], align 1, !tbaa [[CHAR_TBAA0]]
 ; CHECK-O3-NEXT:    [[TMP41:%.*]] = zext <16 x i8> [[TMP40]] to <16 x i16>
-; CHECK-O3-NEXT:    [[TMP42:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_4]], align 1, !tbaa [[TBAA0]]
+; CHECK-O3-NEXT:    [[TMP42:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_4]], align 1, !tbaa [[CHAR_TBAA0]]
 ; CHECK-O3-NEXT:    [[TMP43:%.*]] = zext <16 x i8> [[TMP42]] to <16 x i16>
 ; CHECK-O3-NEXT:    [[TMP44:%.*]] = sub nsw <16 x i16> [[TMP41]], [[TMP43]]
 ; CHECK-O3-NEXT:    [[TMP45:%.*]] = tail call <16 x i16> @llvm.abs.v16i16(<16 x i16> [[TMP44]], i1 false)
@@ -76,9 +76,9 @@ define dso_local i32 @test(ptr noundef %p1, i32 noundef %s_p1, ptr noundef %p2,
 ; CHECK-O3-NEXT:    [[OP_RDX_5:%.*]] = add i32 [[TMP47]], [[OP_RDX_4]]
 ; CHECK-O3-NEXT:    [[ADD_PTR_5:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_4]], i64 [[IDX_EXT]]
 ; CHECK-O3-NEXT:    [[ADD_PTR9_5:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR9_4]], i64 [[IDX_EXT8]]
-; CHECK-O3-NEXT:    [[TMP48:%.*]] = load <16 x i8>, ptr [[ADD_PTR_5]], align 1, !tbaa [[TBAA0]]
+; CHECK-O3-NEXT:    [[TMP48:%.*]] = load <16 x i8>, ptr [[ADD_PTR_5]], align 1, !tbaa [[CHAR_TBAA0]]
 ; CHECK-O3-NEXT:    [[TMP49:%.*]] = zext <16 x i8> [[TMP48]] to <16 x i16>
-; CHECK-O3-NEXT:    [[TMP50:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_5]], align 1, !tbaa [[TBAA0]]
+; CHECK-O3-NEXT:    [[TMP50:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_5]], align 1, !tbaa [[CHAR_TBAA0]]
 ; CHECK-O3-NEXT:    [[TMP51:%.*]] = zext <16 x i8> [[TMP50]] to <16 x i16>
 ; CHECK-O3-NEXT:    [[TMP52:%.*]] = sub nsw <16 x i16> [[TMP49]], [[TMP51]]
 ; CHECK-O3-NEXT:    [[TMP53:%.*]] = tail call <16 x i16> @llvm.abs.v16i16(<16 x i16> [[TMP52]], i1 false)
@@ -87,9 +87,9 @@ define dso_local i32 @test(ptr noundef %p1, i32 noundef %s_p1, ptr noundef %p2,
 ; CHECK-O3-NEXT:    [[OP_RDX_6:%.*]] = add i32 [[TMP55]], [[OP_RDX_5]]
 ; CHECK-O3-NEXT:    [[ADD_PTR_6:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_5]], i64 [[IDX_EXT]]
 ; CHECK-O3-NEXT:    [[ADD_PTR9_6:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR9_5]], i64 [[IDX_EXT8]]
-; CHECK-O3-NEXT:    [[TMP56:%.*]] = load <16 x i8>, ptr [[ADD_PTR_6]], align 1, !tbaa [[TBAA0]]
+; CHECK-O3-NEXT:    [[TMP56:%.*]] = load <16 x i8>, ptr [[ADD_PTR_6]], align 1, !tbaa [[CHAR_TBAA0]]
 ; CHECK-O3-NEXT:    [[TMP57:%.*]] = zext <16 x i8> [[TMP56]] to <16 x i16>
-; CHECK-O3-NEXT:    [[TMP58:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_6]], align 1, !tbaa [[TBAA0]]
+; CHECK-O3-NEXT:    [[TMP58:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_6]], align 1, !tbaa [[CHAR_TBAA0]]
 ; CHECK-O3-NEXT:    [[TMP59:%.*]] = zext <16 x i8> [[TMP58]] to <16 x i16>
 ; CHECK-O3-NEXT:    [[TMP60:%.*]] = sub nsw <16 x i16> [[TMP57]], [[TMP59]]
 ; CHECK-O3-NEXT:    [[TMP61:%.*]] = tail call <16 x i16> @llvm.abs.v16i16(<16 x i16> [[TMP60]], i1 false)
@@ -98,9 +98,9 @@ define dso_local i32 @test(ptr noundef %p1, i32 noundef %s_p1, ptr noundef %p2,
 ; CHECK-O3-NEXT:    [[OP_RDX_7:%.*]] = add i32 [[TMP63]], [[OP_RDX_6]]
 ; CHECK-O3-NEXT:    [[ADD_PTR_7:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_6]], i64 [[IDX_EXT]]
 ; CHECK-O3-NEXT:    [[ADD_PTR9_7:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR9_6]], i64 [[IDX_EXT8]]
-; CHECK-O3-NEXT:    [[TMP64:%.*]] = load <16 x i8>, ptr [[ADD_PTR_7]], align 1, !tbaa [[TBAA0]]
+; CHECK-O3-NEXT:    [[TMP64:%.*]] = load <16 x i8>, ptr [[ADD_PTR_7]], align 1, !tbaa [[CHAR_TBAA0]]
 ; CHECK-O3-NEXT:    [[TMP65:%.*]] = zext <16 x i8> [[TMP64]] to <16 x i16>
-; CHECK-O3-NEXT:    [[TMP66:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_7]], align 1, !tbaa [[TBAA0]]
+; CHECK-O3-NEXT:    [[TMP66:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_7]], align 1, !tbaa [[CHAR_TBAA0]]
 ; CHECK-O3-NEXT:    [[TMP67:%.*]] = zext <16 x i8> [[TMP66]] to <16 x i16>
 ; CHECK-O3-NEXT:    [[TMP68:%.*]] = sub nsw <16 x i16> [[TMP65]], [[TMP67]]
 ; CHECK-O3-NEXT:    [[TMP69:%.*]] = tail call <16 x i16> @llvm.abs.v16i16(<16 x i16> [[TMP68]], i1 false)
@@ -109,9 +109,9 @@ define dso_local i32 @test(ptr noundef %p1, i32 noundef %s_p1, ptr noundef %p2,
 ; CHECK-O3-NEXT:    [[OP_RDX_8:%.*]] = add i32 [[TMP71]], [[OP_RDX_7]]
 ; CHECK-O3-NEXT:    [[ADD_PTR_8:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_7]], i64 [[IDX_EXT]]
 ; CHECK-O3-NEXT:    [[ADD_PTR9_8:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR9_7]], i64 [[IDX_EXT8]]
-; CHECK-O3-NEXT:    [[TMP72:%.*]] = load <16 x i8>, ptr [[ADD_PTR_8]], align 1, !tbaa [[TBAA0]]
+; CHECK-O3-NEXT:    [[TMP72:%.*]] = load <16 x i8>, ptr [[ADD_PTR_8]], align 1, !tbaa [[CHAR_TBAA0]]
 ; CHECK-O3-NEXT:    [[TMP73:%.*]] = zext <16 x i8> [[TMP72]] to <16 x i16>
-; CHECK-O3-NEXT:    [[TMP74:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_8]], align 1, !tbaa [[TBAA0]]
+; CHECK-O3-NEXT:    [[TMP74:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_8]], align 1, !tbaa [[CHAR_TBAA0]]
 ; CHECK-O3-NEXT:    [[TMP75:%.*]] = zext <16 x i8> [[TMP74]] to <16 x i16>
 ; CHECK-O3-NEXT:    [[TMP76:%.*]] = sub nsw <16 x i16> [[TMP73]], [[TMP75]]
 ; CHECK-O3-NEXT:    [[TMP77:%.*]] = tail call <16 x i16> @llvm.abs.v16i16(<16 x i16> [[TMP76]], i1 false)
@@ -120,9 +120,9 @@ define dso_local i32 @test(ptr noundef %p1, i32 noundef %s_p1, ptr noundef %p2,
 ; CHECK-O3-NEXT:    [[OP_RDX_9:%.*]] = add i32 [[TMP79]], [[OP_RDX_8]]
 ; CHECK-O3-NEXT:    [[ADD_PTR_9:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_8]], i64 [[IDX_EXT]]
 ; CHECK-O3-NEXT:    [[ADD_PTR9_9:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR9_8]], i64 [[IDX_EXT8]]
-; CHECK-O3-NEXT:    [[TMP80:%.*]] = load <16 x i8>, ptr [[ADD_PTR_9]], align 1, !tbaa [[TBAA0]]
+; CHECK-O3-NEXT:    [[TMP80:%.*]] = load <16 x i8>, ptr [[ADD_PTR_9]], align 1, !tbaa [[CHAR_TBAA0]]
 ; CHECK-O3-NEXT:    [[TMP81:%.*]] = zext <16 x i8> [[TMP80]] to <16 x i16>
-; CHECK-O3-NEXT:    [[TMP82:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_9]], align 1, !tbaa [[TBAA0]]
+; CHECK-O3-NEXT:    [[TMP82:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_9]], align 1, !tbaa [[CHAR_TBAA0]]
 ; CHECK-O3-NEXT:    [[TMP83:%.*]] = zext <16 x i8> [[TMP82]] to <16 x i16>
 ; CHECK-O3-NEXT:    [[TMP84:%.*]] = sub nsw <16 x i16> [[TMP81]], [[TMP83]]
 ; CHECK-O3-NEXT:    [[TMP85:%.*]] = tail call <16 x i16> @llvm.abs.v16i16(<16 x i16> [[TMP84]], i1 false)
@@ -131,9 +131,9 @@ define dso_local i32 @test(ptr noundef %p1, i32 noundef %s_p1, ptr noundef %p2,
 ; CHECK-O3-NEXT:    [[OP_RDX_10:%.*]] = add i32 [[TMP87]], [[OP_RDX_9]]
 ; CHECK-O3-NEXT:    [[ADD_PTR_10:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_9]], i64 [[IDX_EXT]]
 ; CHECK-O3-NEXT:    [[ADD_PTR9_10:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR9_9]], i64 [[IDX_EXT8]]
-; CHECK-O3-NEXT:    [[TMP88:%.*]] = load <16 x i8>, ptr [[ADD_PTR_10]], align 1, !tbaa [[TBAA0]]
+; CHECK-O3-NEXT:    [[TMP88:%.*]] = load <16 x i8>, ptr [[ADD_PTR_10]], align 1, !tbaa [[CHAR_TBAA0]]
 ; CHECK-O3-NEXT:    [[TMP89:%.*]] = zext <16 x i8> [[TMP88]] to <16 x i16>
-; CHECK-O3-NEXT:    [[TMP90:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_10]], align 1, !tbaa [[TBAA0]]
+; CHECK-O3-NEXT:    [[TMP90:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_10]], align 1, !tbaa [[CHAR_TBAA0]]
 ; CHECK-O3-NEXT:    [[TMP91:%.*]] = zext <16 x i8> [[TMP90]] to <16 x i16>
 ; CHECK-O3-NEXT:    [[TMP92:%.*]] = sub nsw <16 x i16> [[TMP89]], [[TMP91]]
 ; CHECK-O3-NEXT:    [[TMP93:%.*]] = tail call <16 x i16> @llvm.abs.v16i16(<16 x i16> [[TMP92]], i1 false)
@@ -142,9 +142,9 @@ define dso_local i32 @test(ptr noundef %p1, i32 noundef %s_p1, ptr noundef %p2,
 ; CHECK-O3-NEXT:    [[OP_RDX_11:%.*]] = add i32 [[TMP95]], [[OP_RDX_10]]
 ; CHECK-O3-NEXT:    [[ADD_PTR_11:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_10]], i64 [[IDX_EXT]]
 ; CHECK-O3-NEXT:    [[ADD_PTR9_11:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR9_10]], i64 [[IDX_EXT8]]
-; CHECK-O3-NEXT:    [[TMP96:%.*]] = load <16 x i8>, ptr [[ADD_PTR_11]], align 1, !tbaa [[TBAA0]]
+; CHECK-O3-NEXT:    [[TMP96:%.*]] = load <16 x i8>, ptr [[ADD_PTR_11]], align 1, !tbaa [[CHAR_TBAA0]]
 ; CHECK-O3-NEXT:    [[TMP97:%.*]] = zext <16 x i8> [[TMP96]] to <16 x i16>
-; CHECK-O3-NEXT:    [[TMP98:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_11]], align 1, !tbaa [[TBAA0]]
+; CHECK-O3-NEXT:    [[TMP98:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_11]], align 1, !tbaa [[CHAR_TBAA0]]
 ; CHECK-O3-NEXT:    [[TMP99:%.*]] = zext <16 x i8> [[TMP98]] to <16 x i16>
 ; CHECK-O3-NEXT:    [[TMP100:%.*]] = sub nsw <16 x i16> [[TMP97]], [[TMP99]]
 ; CHECK-O3-NEXT:    [[TMP101:%.*]] = tail call <16 x i16> @llvm.abs.v16i16(<16 x i16> [[TMP100]], i1 false)
@@ -153,9 +153,9 @@ define dso_local i32 @test(ptr noundef %p1, i32 noundef %s_p1, ptr noundef %p2,
 ; CHECK-O3-NEXT:    [[OP_RDX_12:%.*]] = add i32 [[TMP103]], [[OP_RDX_11]]
 ; CHECK-O3-NEXT:    [[ADD_PTR_12:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_11]], i64 [[IDX_EXT]]
 ; CHECK-O3-NEXT:    [[ADD_PTR9_12:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR9_11]], i64 [[IDX_EXT8]]
-; CHECK-O3-NEXT:    [[TMP104:%.*]] = load <16 x i8>, ptr [[ADD_PTR_12]], align 1, !tbaa [[TBAA0]]
+; CHECK-O3-NEXT:    [[TMP104:%.*]] = load <16 x i8>, ptr [[ADD_PTR_12]], align 1, !tbaa [[CHAR_TBAA0]]
 ; CHECK-O3-NEXT:    [[TMP105:%.*]] = zext <16 x i8> [[TMP104]] to <16 x i16>
-; CHECK-O3-NEXT:    [[TMP106:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_12]], align 1, !tbaa [[TBAA0]]
+; CHECK-O3-NEXT:    [[TMP106:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_12]], align 1, !tbaa [[CHAR_TBAA0]]
 ; CHECK-O3-NEXT:    [[TMP107:%.*]] = zext <16 x i8> [[TMP106]] to <16 x i16>
 ; CHECK-O3-NEXT:    [[TMP108:%.*]] = sub nsw <16 x i16> [[TMP105]], [[TMP107]]
 ; CHECK-O3-NEXT:    [[TMP109:%.*]] = tail call <16 x i16> @llvm.abs.v16i16(<16 x i16> [[TMP108]], i1 false)
@@ -164,9 +164,9 @@ define dso_local i32 @test(ptr noundef %p1, i32 noundef %s_p1, ptr noundef %p2,
 ; CHECK-O3-NEXT:    [[OP_RDX_13:%.*]] = add i32 [[TMP111]], [[OP_RDX_12]]
 ; CHECK-O3-NEXT:    [[ADD_PTR_13:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_12]], i64 [[IDX_EXT]]
 ; CHECK-O3-NEXT:    [[ADD_PTR9_13:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR9_12]], i64 [[IDX_EXT8]]
-; CHECK-O3-NEXT:    [[TMP112:%.*]] = load <16 x i8>, ptr [[ADD_PTR_13]], align 1, !tbaa [[TBAA0]]
+; CHECK-O3-NEXT:    [[TMP112:%.*]] = load <16 x i8>, ptr [[ADD_PTR_13]], align 1, !tbaa [[CHAR_TBAA0]]
 ; CHECK-O3-NEXT:    [[TMP113:%.*]] = zext <16 x i8> [[TMP112]] to <16 x i16>
-; CHECK-O3-NEXT:    [[TMP114:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_13]], align 1, !tbaa [[TBAA0]]
+; CHECK-O3-NEXT:    [[TMP114:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_13]], align 1, !tbaa [[CHAR_TBAA0]]
 ; CHECK-O3-NEXT:    [[TMP115:%.*]] = zext <16 x i8> [[TMP114]] to <16 x i16>
 ; CHECK-O3-NEXT:    [[TMP116:%.*]] = sub nsw <16 x i16> [[TMP113]], [[TMP115]]
 ; CHECK-O3-NEXT:    [[TMP117:%.*]] = tail call <16 x i16> @llvm.abs.v16i16(<16 x i16> [[TMP116]], i1 false)
@@ -175,9 +175,9 @@ define dso_local i32 @test(ptr noundef %p1, i32 noundef %s_p1, ptr noundef %p2,
 ; CHECK-O3-NEXT:    [[OP_RDX_14:%.*]] = add i32 [[TMP119]], [[OP_RDX_13]]
 ; CHECK-O3-NEXT:    [[ADD_PTR_14:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_13]], i64 [[IDX_EXT]]
 ; CHECK-O3-NEXT:    [[ADD_PTR9_14:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR9_13]], i64 [[IDX_EXT8]]
-; CHECK-O3-NEXT:    [[TMP120:%.*]] = load <16 x i8>, ptr [[ADD_PTR_14]], align 1, !tbaa [[TBAA0]]
+; CHECK-O3-NEXT:    [[TMP120:%.*]] = load <16 x i8>, ptr [[ADD_PTR_14]], align 1, !tbaa [[CHAR_TBAA0]]
 ; CHECK-O3-NEXT:    [[TMP121:%.*]] = zext <16 x i8> [[TMP120]] to <16 x i16>
-; CHECK-O3-NEXT:    [[TMP122:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_14]], align 1, !tbaa [[TBAA0]]
+; CHECK-O3-NEXT:    [[TMP122:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_14]], align 1, !tbaa [[CHAR_TBAA0]]
 ; CHECK-O3-NEXT:    [[TMP123:%.*]] = zext <16 x i8> [[TMP122]] to <16 x i16>
 ; CHECK-O3-NEXT:    [[TMP124:%.*]] = sub nsw <16 x i16> [[TMP121]], [[TMP123]]
 ; CHECK-O3-NEXT:    [[TMP125:%.*]] = tail call <16 x i16> @llvm.abs.v16i16(<16 x i16> [[TMP124]], i1 false)
@@ -191,9 +191,9 @@ define dso_local i32 @test(ptr noundef %p1, i32 noundef %s_p1, ptr noundef %p2,
 ; CHECK-LTO-NEXT:  [[ENTRY:.*:]]
 ; CHECK-LTO-NEXT:    [[IDX_EXT8:%.*]] = sext i32 [[S_P2]] to i64
 ; CHECK-LTO-NEXT:    [[IDX_EXT:%.*]] = sext i32 [[S_P1]] to i64
-; CHECK-LTO-NEXT:    [[TMP0:%.*]] = load <16 x i8>, ptr [[P1]], align 1, !tbaa [[TBAA0:![0-9]+]]
+; CHECK-LTO-NEXT:    [[TMP0:%.*]] = load <16 x i8>, ptr [[P1]], align 1, !tbaa [[CHAR_TBAA0:![0-9]+]]
 ; CHECK-LTO-NEXT:    [[TMP1:%.*]] = zext <16 x i8> [[TMP0]] to <16 x i16>
-; CHECK-LTO-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[P2]], align 1, !tbaa [[TBAA0]]
+; CHECK-LTO-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[P2]], align 1, !tbaa [[CHAR_TBAA0]]
 ; CHECK-LTO-NEXT:    [[TMP3:%.*]] = zext <16 x i8> [[TMP2]] to <16 x i16>
 ; CHECK-LTO-NEXT:    [[TMP4:%.*]] = sub nsw <16 x i16> [[TMP1]], [[TMP3]]
 ; CHECK-LTO-NEXT:    [[TMP5:%.*]] = tail call <16 x i16> @llvm.abs.v16i16(<16 x i16> [[TMP4]], i1 true)
@@ -201,9 +201,9 @@ define dso_local i32 @test(ptr noundef %p1, i32 noundef %s_p1, ptr noundef %p2,
 ; CHECK-LTO-NEXT:    [[TMP44:%.*]] = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP36]])
 ; CHECK-LTO-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[IDX_EXT]]
 ; CHECK-LTO-NEXT:    [[ADD_PTR9:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[IDX_EXT8]]
-; CHECK-LTO-NEXT:    [[TMP6:%.*]] = load <16 x i8>, ptr [[ADD_PTR]], align 1, !tbaa [[TBAA0]]
+; CHECK-LTO-NEXT:    [[TMP6:%.*]] = load <16 x i8>, ptr [[ADD_PTR]], align 1, !tbaa [[CHAR_TBAA0]]
 ; CHECK-LTO-NEXT:    [[TMP7:%.*]] = zext <16 x i8> [[TMP6]] to <16 x i16>
-; CHECK-LTO-NEXT:    [[TMP8:%.*]] = load <16 x i8>, ptr [[ADD_PTR9]], align 1, !tbaa [[TBAA0]]
+; CHECK-LTO-NEXT:    [[TMP8:%.*]] = load <16 x i8>, ptr [[ADD_PTR9]], align 1, !tbaa [[CHAR_TBAA0]]
 ; CHECK-LTO-NEXT:    [[TMP9:%.*]] = zext <16 x i8> [[TMP8]] to <16 x i16>
 ; CHECK-LTO-NEXT:    [[TMP10:%.*]] = sub nsw <16 x i16> [[TMP7]], [[TMP9]]
 ; CHECK-LTO-NEXT:    [[TMP11:%.*]] = tail call <16 x i16> @llvm.abs.v16i16(<16 x i16> [[TMP10]], i1 true)
@@ -212,9 +212,9 @@ define dso_local i32 @test(ptr noundef %p1, i32 noundef %s_p1, ptr noundef %p2,
 ; CHECK-LTO-NEXT:    [[OP_RDX_1:%.*]] = add i32 [[TMP60]], [[TMP44]]
 ; CHECK-LTO-NEXT:    [[ADD_PTR_1:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR]], i64 [[IDX_EXT]]
 ; CHECK-LTO-NEXT:    [[ADD_PTR9_1:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR9]], i64 [[IDX_EXT8]]
-; CHECK-LTO-NEXT:    [[TMP12:%.*]] = load <16 x i8>, ptr [[ADD_PTR_1]], align 1, !tbaa [[TBAA0]]
+; CHECK-LTO-NEXT:    [[TMP12:%.*]] = load <16 x i8>, ptr [[ADD_PTR_1]], align 1, !tbaa [[CHAR_TBAA0]]
 ; CHECK-LTO-NEXT:    [[TMP13:%.*]] = zext <16 x i8> [[TMP12]] to <16 x i16>
-; CHECK-LTO-NEXT:    [[TMP14:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_1]], align 1, !tbaa [[TBAA0]]
+; CHECK-LTO-NEXT:    [[TMP14:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_1]], align 1, !tbaa [[CHAR_TBAA0]]
 ; CHECK-LTO-NEXT:    [[TMP15:%.*]] = zext <16 x i8> [[TMP14]] to <16 x i16>
 ; CHECK-LTO-NEXT:    [[TMP16:%.*]] = sub nsw <16 x i16> [[TMP13]], [[TMP15]]
 ; CHECK-LTO-NEXT:    [[TMP17:%.*]] = tail call <16 x i16> @llvm.abs.v16i16(<16 x i16> [[TMP16]], i1 true)
@@ -223,9 +223,9 @@ define dso_local i32 @test(ptr noundef %p1, i32 noundef %s_p1, ptr noundef %p2,
 ; CHECK-LTO-NEXT:    [[OP_RDX_2:%.*]] = add i32 [[OP_RDX_1]], [[TMP76]]
 ; CHECK-LTO-NEXT:    [[ADD_PTR_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_1]], i64 [[IDX_EXT]]
 ; CHECK-LTO-NEXT:    [[ADD_PTR9_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR9_1]], i64 [[IDX_EXT8]]
-; CHECK-LTO-NEXT:    [[TMP18:%.*]] = load <16 x i8>, ptr [[ADD_PTR_2]], align 1, !tbaa [[TBAA0]]
+; CHECK-LTO-NEXT:    [[TMP18:%.*]] = load <16 x i8>, ptr [[ADD_PTR_2]], align 1, !tbaa [[CHAR_TBAA0]]
 ; CHECK-LTO-NEXT:    [[TMP19:%.*]] = zext <16 x i8> [[TMP18]] to <16 x i16>
-; CHECK-LTO-NEXT:    [[TMP20:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_2]], align 1, !tbaa [[TBAA0]]
+; CHECK-LTO-NEXT:    [[TMP20:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_2]], align 1, !tbaa [[CHAR_TBAA0]]
 ; CHECK-LTO-NEXT:    [[TMP21:%.*]] = zext <16 x i8> [[TMP20]] to <16 x i16>
 ; CHECK-LTO-NEXT:    [[TMP22:%.*]] = sub nsw <16 x i16> [[TMP19]], [[TMP21]]
 ; CHECK-LTO-NEXT:    [[TMP23:%.*]] = tail call <16 x i16> @llvm.abs.v16i16(<16 x i16> [[TMP22]], i1 true)
@@ -234,9 +234,9 @@ define dso_local i32 @test(ptr noundef %p1, i32 noundef %s_p1, ptr noundef %p2,
 ; CHECK-LTO-NEXT:    [[OP_RDX_3:%.*]] = add i32 [[OP_RDX_2]], [[TMP92]]
 ; CHECK-LTO-NEXT:    [[ADD_PTR_3:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_2]], i64 [[IDX_EXT]]
 ; CHECK-LTO-NEXT:    [[ADD_PTR9_3:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR9_2]], i64 [[IDX_EXT8]]
-; CHECK-LTO-NEXT:    [[TMP24:%.*]] = load <16 x i8>, ptr [[ADD_PTR_3]], align 1, !tbaa [[TBAA0]]
+; CHECK-LTO-NEXT:    [[TMP24:%.*]] = load <16 x i8>, ptr [[ADD_PTR_3]], align 1, !tbaa [[CHAR_TBAA0]]
 ; CHECK-LTO-NEXT:    [[TMP25:%.*]] = zext <16 x i8> [[TMP24]] to <16 x i16>
-; CHECK-LTO-NEXT:    [[TMP26:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_3]], align 1, !tbaa [[TBAA0]]
+; CHECK-LTO-NEXT:    [[TMP26:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_3]], align 1, !tbaa [[CHAR_TBAA0]]
 ; CHECK-LTO-NEXT:    [[TMP27:%.*]] = zext <16 x i8> [[TMP26]] to <16 x i16>
 ; CHECK-LTO-NEXT:    [[TMP28:%.*]] = sub nsw <16 x i16> [[TMP25]], [[TMP27]]
 ; CHECK-LTO-NEXT:    [[TMP29:%.*]] = tail call <16 x i16> @llvm.abs.v16i16(<16 x i16> [[TMP28]], i1 true)
@@ -245,9 +245,9 @@ define dso_local i32 @test(ptr noundef %p1, i32 noundef %s_p1, ptr noundef %p2,
 ; CHECK-LTO-NEXT:    [[OP_RDX_4:%.*]] = add i32 [[OP_RDX_3]], [[TMP108]]
 ; CHECK-LTO-NEXT:    [[ADD_PTR_4:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_3]], i64 [[IDX_EXT]]
 ; CHECK-LTO-NEXT:    [[ADD_PTR9_4:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR9_3]], i64 [[IDX_EXT8]]
-; CHECK-LTO-NEXT:    [[TMP30:%.*]] = load <16 x i8>, ptr [[ADD_PTR_4]], align 1, !tbaa [[TBAA0]]
+; CHECK-LTO-NEXT:    [[TMP30:%.*]] = load <16 x i8>, ptr [[ADD_PTR_4]], align 1, !tbaa [[CHAR_TBAA0]]
 ; CHECK-LTO-NEXT:    [[TMP31:%.*]] = zext <16 x i8> [[TMP30]] to <16 x i16>
-; CHECK-LTO-NEXT:    [[TMP32:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_4]], align 1, !tbaa [[TBAA0]]
+; CHECK-LTO-NEXT:    [[TMP32:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_4]], align 1, !tbaa [[CHAR_TBAA0]]
 ; CHECK-LTO-NEXT:    [[TMP33:%.*]] = zext <16 x i8> [[TMP32]] to <16 x i16>
 ; CHECK-LTO-NEXT:    [[TMP34:%.*]] = sub nsw <16 x i16> [[TMP31]], [[TMP33]]
 ; CHECK-LTO-NEXT:    [[TMP35:%.*]] = tail call <16 x i16> @llvm.abs.v16i16(<16 x i16> [[TMP34]], i1 true)
@@ -256,9 +256,9 @@ define dso_local i32 @test(ptr noundef %p1, i32 noundef %s_p1, ptr noundef %p2,
 ; CHECK-LTO-NEXT:    [[OP_RDX_5:%.*]] = add i32 [[OP_RDX_4]], [[TMP117]]
 ; CHECK-LTO-NEXT:    [[ADD_PTR_5:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_4]], i64 [[IDX_EXT]]
 ; CHECK-LTO-NEXT:    [[ADD_PTR9_5:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR9_4]], i64 [[IDX_EXT8]]
-; CHECK-LTO-NEXT:    [[TMP37:%.*]] = load <16 x i8>, ptr [[ADD_PTR_5]], align 1, !tbaa [[TBAA0]]
+; CHECK-LTO-NEXT:    [[TMP37:%.*]] = load <16 x i8>, ptr [[ADD_PTR_5]], align 1, !tbaa [[CHAR_TBAA0]]
 ; CHECK-LTO-NEXT:    [[TMP38:%.*]] = zext <16 x i8> [[TMP37]] to <16 x i16>
-; CHECK-LTO-NEXT:    [[TMP39:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_5]], align 1, !tbaa [[TBAA0]]
+; CHECK-LTO-NEXT:    [[TMP39:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_5]], align 1, !tbaa [[CHAR_TBAA0]]
 ; CHECK-LTO-NEXT:    [[TMP40:%.*]] = zext <16 x i8> [[TMP39]] to <16 x i16>
 ; CHECK-LTO-NEXT:    [[TMP41:%.*]] = sub nsw <16 x i16> [[TMP38]], [[TMP40]]
 ; CHECK-LTO-NEXT:    [[TMP42:%.*]] = tail call <16 x i16> @llvm.abs.v16i16(<16 x i16> [[TMP41]], i1 true)
@@ -267,9 +267,9 @@ define dso_local i32 @test(ptr noundef %p1, i32 noundef %s_p1, ptr noundef %p2,
 ; CHECK-LTO-NEXT:    [[OP_RDX_6:%.*]] = add i32 [[OP_RDX_5]], [[TMP118]]
 ; CHECK-LTO-NEXT:    [[ADD_PTR_6:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_5]], i64 [[IDX_EXT]]
 ; CHECK-LTO-NEXT:    [[ADD_PTR9_6:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR9_5]], i64 [[IDX_EXT8]]
-; CHECK-LTO-NEXT:    [[TMP45:%.*]] = load <16 x i8>, ptr [[ADD_PTR_6]], align 1, !tbaa [[TBAA0]]
+; CHECK-LTO-NEXT:    [[TMP45:%.*]] = load <16 x i8>, ptr [[ADD_PTR_6]], align 1, !tbaa [[CHAR_TBAA0]]
 ; CHECK-LTO-NEXT:    [[TMP46:%.*]] = zext <16 x i8> [[TMP45]] to <16 x i16>
-; CHECK-LTO-NEXT:    [[TMP47:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_6]], align 1, !tbaa [[TBAA0]]
+; CHECK-LTO-NEXT:    [[TMP47:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_6]], align 1, !tbaa [[CHAR_TBAA0]]
 ; CHECK-LTO-NEXT:    [[TMP48:%.*]] = zext <16 x i8> [[TMP47]] to <16 x i16>
 ; CHECK-LTO-NEXT:    [[TMP49:%.*]] = sub nsw <16 x i16> [[TMP46]], [[TMP48]]
 ; CHECK-LTO-NEXT:    [[TMP50:%.*]] = tail call <16 x i16> @llvm.abs.v16i16(<16 x i16> [[TMP49]], i1 true)
@@ -278,9 +278,9 @@ define dso_local i32 @test(ptr noundef %p1, i32 noundef %s_p1, ptr noundef %p2,
 ; CHECK-LTO-NEXT:    [[OP_RDX_7:%.*]] = add i32 [[OP_RDX_6]], [[TMP120]]
 ; CHECK-LTO-NEXT:    [[ADD_PTR_7:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_6]], i64 [[IDX_EXT]]
 ; CHECK-LTO-NEXT:    [[ADD_PTR9_7:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR9_6]], i64 [[IDX_EXT8]]
-; CHECK-LTO-NEXT:    [[TMP53:%.*]] = load <16 x i8>, ptr [[ADD_PTR_7]], align 1, !tbaa [[TBAA0]]
+; CHECK-LTO-NEXT:    [[TMP53:%.*]] = load <16 x i8>, ptr [[ADD_PTR_7]], align 1, !tbaa [[CHAR_TBAA0]]
 ; CHECK-LTO-NEXT:    [[TMP54:%.*]] = zext <16 x i8> [[TMP53]] to <16 x i16>
-; CHECK-LTO-NEXT:    [[TMP55:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_7]], align 1, !tbaa [[TBAA0]]
+; CHECK-LTO-NEXT:    [[TMP55:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_7]], align 1, !tbaa [[CHAR_TBAA0]]
 ; CHECK-LTO-NEXT:    [[TMP56:%.*]] = zext <16 x i8> [[TMP55]] to <16 x i16>
 ; CHECK-LTO-NEXT:    [[TMP57:%.*]] = sub nsw <16 x i16> [[TMP54]], [[TMP56]]
 ; CHECK-LTO-NEXT:    [[TMP58:%.*]] = tail call <16 x i16> @llvm.abs.v16i16(<16 x i16> [[TMP57]], i1 true)
@@ -289,9 +289,9 @@ define dso_local i32 @test(ptr noundef %p1, i32 noundef %s_p1, ptr noundef %p2,
 ; CHECK-LTO-NEXT:    [[OP_RDX_8:%.*]] = add i32 [[OP_RDX_7]], [[TMP121]]
 ; CHECK-LTO-NEXT:    [[ADD_PTR_8:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_7]], i64 [[IDX_EXT]]
 ; CHECK-LTO-NEXT:    [[ADD_PTR9_8:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR9_7]], i64 [[IDX_EXT8]]
-; CHECK-LTO-NEXT:    [[TMP61:%.*]] = load <16 x i8>, ptr [[ADD_PTR_8]], align 1, !tbaa [[TBAA0]]
+; CHECK-LTO-NEXT:    [[TMP61:%.*]] = load <16 x i8>, ptr [[ADD_PTR_8]], align 1, !tbaa [[CHAR_TBAA0]]
 ; CHECK-LTO-NEXT:    [[TMP62:%.*]] = zext <16 x i8> [[TMP61]] to <16 x i16>
-; CHECK-LTO-NEXT:    [[TMP63:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_8]], align 1, !tbaa [[TBAA0]]
+; CHECK-LTO-NEXT:    [[TMP63:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_8]], align 1, !tbaa [[CHAR_TBAA0]]
 ; CHECK-LTO-NEXT:    [[TMP64:%.*]] = zext <16 x i8> [[TMP63]] to <16 x i16>
 ; CHECK-LTO-NEXT:    [[TMP65:%.*]] = sub nsw <16 x i16> [[TMP62]], [[TMP64]]
 ; CHECK-LTO-NEXT:    [[TMP66:%.*]] = tail call <16 x i16> @llvm.abs.v16i16(<16 x i16> [[TMP65]], i1 true)
@@ -300,9 +300,9 @@ define dso_local i32 @test(ptr noundef %p1, i32 noundef %s_p1, ptr noundef %p2,
 ; CHECK-LTO-NEXT:    [[OP_RDX_9:%.*]] = add i32 [[OP_RDX_8]], [[TMP122]]
 ; CHECK-LTO-NEXT:    [[ADD_PTR_9:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_8]], i64 [[IDX_EXT]]
 ; CHECK-LTO-NEXT:    [[ADD_PTR9_9:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR9_8]], i64 [[IDX_EXT8]]
-; CHECK-LTO-NEXT:    [[TMP69:%.*]] = load <16 x i8>, ptr [[ADD_PTR_9]], align 1, !tbaa [[TBAA0]]
+; CHECK-LTO-NEXT:    [[TMP69:%.*]] = load <16 x i8>, ptr [[ADD_PTR_9]], align 1, !tbaa [[CHAR_TBAA0]]
 ; CHECK-LTO-NEXT:    [[TMP70:%.*]] = zext <16 x i8> [[TMP69]] to <16 x i16>
-; CHECK-LTO-NEXT:    [[TMP71:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_9]], align 1, !tbaa [[TBAA0]]
+; CHECK-LTO-NEXT:    [[TMP71:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_9]], align 1, !tbaa [[CHAR_TBAA0]]
 ; CHECK-LTO-NEXT:    [[TMP72:%.*]] = zext <16 x i8> [[TMP71]] to <16 x i16>
 ; CHECK-LTO-NEXT:    [[TMP73:%.*]] = sub nsw <16 x i16> [[TMP70]], [[TMP72]]
 ; CHECK-LTO-NEXT:    [[TMP74:%.*]] = tail call <16 x i16> @llvm.abs.v16i16(<16 x i16> [[TMP73]], i1 true)
@@ -311,9 +311,9 @@ define dso_local i32 @test(ptr noundef %p1, i32 noundef %s_p1, ptr noundef %p2,
 ; CHECK-LTO-NEXT:    [[OP_RDX_10:%.*]] = add i32 [[OP_RDX_9]], [[TMP123]]
 ; CHECK-LTO-NEXT:    [[ADD_PTR_10:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_9]], i64 [[IDX_EXT]]
 ; CHECK-LTO-NEXT:    [[ADD_PTR9_10:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR9_9]], i64 [[IDX_EXT8]]
-; CHECK-LTO-NEXT:    [[TMP77:%.*]] = load <16 x i8>, ptr [[ADD_PTR_10]], align 1, !tbaa [[TBAA0]]
+; CHECK-LTO-NEXT:    [[TMP77:%.*]] = load <16 x i8>, ptr [[ADD_PTR_10]], align 1, !tbaa [[CHAR_TBAA0]]
 ; CHECK-LTO-NEXT:    [[TMP78:%.*]] = zext <16 x i8> [[TMP77]] to <16 x i16>
-; CHECK-LTO-NEXT:    [[TMP79:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_10]], align 1, !tbaa [[TBAA0]]
+; CHECK-LTO-NEXT:    [[TMP79:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_10]], align 1, !tbaa [[CHAR_TBAA0]]
 ; CHECK-LTO-NEXT:    [[TMP80:%.*]] = zext <16 x i8> [[TMP79]] to <16 x i16>
 ; CHECK-LTO-NEXT:    [[TMP81:%.*]] = sub nsw <16 x i16> [[TMP78]], [[TMP80]]
 ; CHECK-LTO-NEXT:    [[TMP82:%.*]] = tail call <16 x i16> @llvm.abs.v16i16(<16 x i16> [[TMP81]], i1 true)
@@ -322,9 +322,9 @@ define dso_local i32 @test(ptr noundef %p1, i32 noundef %s_p1, ptr noundef %p2,
 ; CHECK-LTO-NEXT:    [[OP_RDX_11:%.*]] = add i32 [[OP_RDX_10]], [[TMP124]]
 ; CHECK-LTO-NEXT:    [[ADD_PTR_11:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_10]], i64 [[IDX_EXT]]
 ; CHECK-LTO-NEXT:    [[ADD_PTR9_11:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR9_10]], i64 [[IDX_EXT8]]
-; CHECK-LTO-NEXT:    [[TMP85:%.*]] = load <16 x i8>, ptr [[ADD_PTR_11]], align 1, !tbaa [[TBAA0]]
+; CHECK-LTO-NEXT:    [[TMP85:%.*]] = load <16 x i8>, ptr [[ADD_PTR_11]], align 1, !tbaa [[CHAR_TBAA0]]
 ; CHECK-LTO-NEXT:    [[TMP86:%.*]] = zext <16 x i8> [[TMP85]] to <16 x i16>
-; CHECK-LTO-NEXT:    [[TMP87:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_11]], align 1, !tbaa [[TBAA0]]
+; CHECK-LTO-NEXT:    [[TMP87:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_11]], align 1, !tbaa [[CHAR_TBAA0]]
 ; CHECK-LTO-NEXT:    [[TMP88:%.*]] = zext <16 x i8> [[TMP87]] to <16 x i16>
 ; CHECK-LTO-NEXT:    [[TMP89:%.*]] = sub nsw <16 x i16> [[TMP86]], [[TMP88]]
 ; CHECK-LTO-NEXT:    [[TMP90:%.*]] = tail call <16 x i16> @llvm.abs.v16i16(<16 x i16> [[TMP89]], i1 true)
@@ -333,9 +333,9 @@ define dso_local i32 @test(ptr noundef %p1, i32 noundef %s_p1, ptr noundef %p2,
 ; CHECK-LTO-NEXT:    [[OP_RDX_12:%.*]] = add i32 [[OP_RDX_11]], [[TMP125]]
 ; CHECK-LTO-NEXT:    [[ADD_PTR_12:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_11]], i64 [[IDX_EXT]]
 ; CHECK-LTO-NEXT:    [[ADD_PTR9_12:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR9_11]], i64 [[IDX_EXT8]]
-; CHECK-LTO-NEXT:    [[TMP93:%.*]] = load <16 x i8>, ptr [[ADD_PTR_12]], align 1, !tbaa [[TBAA0]]
+; CHECK-LTO-NEXT:    [[TMP93:%.*]] = load <16 x i8>, ptr [[ADD_PTR_12]], align 1, !tbaa [[CHAR_TBAA0]]
 ; CHECK-LTO-NEXT:    [[TMP94:%.*]] = zext <16 x i8> [[TMP93]] to <16 x i16>
-; CHECK-LTO-NEXT:    [[TMP95:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_12]], align 1, !tbaa [[TBAA0]]
+; CHECK-LTO-NEXT:    [[TMP95:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_12]], align 1, !tbaa [[CHAR_TBAA0]]
 ; CHECK-LTO-NEXT:    [[TMP96:%.*]] = zext <16 x i8> [[TMP95]] to <16 x i16>
 ; CHECK-LTO-NEXT:    [[TMP97:%.*]] = sub nsw <16 x i16> [[TMP94]], [[TMP96]]
 ; CHECK-LTO-NEXT:    [[TMP98:%.*]] = tail call <16 x i16> @llvm.abs.v16i16(<16 x i16> [[TMP97]], i1 true)
@@ -344,9 +344,9 @@ define dso_local i32 @test(ptr noundef %p1, i32 noundef %s_p1, ptr noundef %p2,
 ; CHECK-LTO-NEXT:    [[OP_RDX_13:%.*]] = add i32 [[OP_RDX_12]], [[TMP126]]
 ; CHECK-LTO-NEXT:    [[ADD_PTR_13:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_12]], i64 [[IDX_EXT]]
 ; CHECK-LTO-NEXT:    [[ADD_PTR9_13:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR9_12]], i64 [[IDX_EXT8]]
-; CHECK-LTO-NEXT:    [[TMP101:%.*]] = load <16 x i8>, ptr [[ADD_PTR_13]], align 1, !tbaa [[TBAA0]]
+; CHECK-LTO-NEXT:    [[TMP101:%.*]] = load <16 x i8>, ptr [[ADD_PTR_13]], align 1, !tbaa [[CHAR_TBAA0]]
 ; CHECK-LTO-NEXT:    [[TMP102:%.*]] = zext <16 x i8> [[TMP101]] to <16 x i16>
-; CHECK-LTO-NEXT:    [[TMP103:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_13]], align 1, !tbaa [[TBAA0]]
+; CHECK-LTO-NEXT:    [[TMP103:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_13]], align 1, !tbaa [[CHAR_TBAA0]]
 ; CHECK-LTO-NEXT:    [[TMP104:%.*]] = zext <16 x i8> [[TMP103]] to <16 x i16>
 ; CHECK-LTO-NEXT:    [[TMP105:%.*]] = sub nsw <16 x i16> [[TMP102]], [[TMP104]]
 ; CHECK-LTO-NEXT:    [[TMP106:%.*]] = tail call <16 x i16> @llvm.abs.v16i16(<16 x i16> [[TMP105]], i1 true)
@@ -355,9 +355,9 @@ define dso_local i32 @test(ptr noundef %p1, i32 noundef %s_p1, ptr noundef %p2,
 ; CHECK-LTO-NEXT:    [[OP_RDX_14:%.*]] = add i32 [[OP_RDX_13]], [[TMP119]]
 ; CHECK-LTO-NEXT:    [[ADD_PTR_14:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_13]], i64 [[IDX_EXT]]
 ; CHECK-LTO-NEXT:    [[ADD_PTR9_14:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR9_13]], i64 [[IDX_EXT8]]
-; CHECK-LTO-NEXT:    [[TMP109:%.*]] = load <16 x i8>, ptr [[ADD_PTR_14]], align 1, !tbaa [[TBAA0]]
+; CHECK-LTO-NEXT:    [[TMP109:%.*]] = load <16 x i8>, ptr [[ADD_PTR_14]], align 1, !tbaa [[CHAR_TBAA0]]
 ; CHECK-LTO-NEXT:    [[TMP110:%.*]] = zext <16 x i8> [[TMP109]] to <16 x i16>
-; CHECK-LTO-NEXT:    [[TMP111:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_14]], align 1, !tbaa [[TBAA0]]
+; CHECK-LTO-NEXT:    [[TMP111:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_14]], align 1, !tbaa [[CHAR_TBAA0]]
 ; CHECK-LTO-NEXT:    [[TMP112:%.*]] = zext <16 x i8> [[TMP111]] to <16 x i16>
 ; CHECK-LTO-NEXT:    [[TMP113:%.*]] = sub nsw <16 x i16> [[TMP110]], [[TMP112]]
 ; CHECK-LTO-NEXT:    [[TMP114:%.*]] = tail call <16 x i16> @llvm.abs.v16i16(<16 x i16> [[TMP113]], i1 true)
@@ -489,11 +489,11 @@ attributes #3 = { nounwind }
 !13 = !{!"llvm.loop.mustprogress"}
 !14 = distinct !{!14, !13}
 ;.
-; CHECK-O3: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
+; CHECK-O3: [[CHAR_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
 ; CHECK-O3: [[META1]] = !{!"omnipotent char", [[META2:![0-9]+]], i64 0}
 ; CHECK-O3: [[META2]] = !{!"Simple C/C++ TBAA"}
 ;.
-; CHECK-LTO: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
+; CHECK-LTO: [[CHAR_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
 ; CHECK-LTO: [[META1]] = !{!"omnipotent char", [[META2:![0-9]+]], i64 0}
 ; CHECK-LTO: [[META2]] = !{!"Simple C/C++ TBAA"}
 ;.
diff --git a/llvm/test/Transforms/PhaseOrdering/SystemZ/sub-xor.ll b/llvm/test/Transforms/PhaseOrdering/SystemZ/sub-xor.ll
index 5386bf9..13eed2e 100644
--- a/llvm/test/Transforms/PhaseOrdering/SystemZ/sub-xor.ll
+++ b/llvm/test/Transforms/PhaseOrdering/SystemZ/sub-xor.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt -passes='default<O2>' -S %s | FileCheck %s
 
 target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-a:8:16-n32:64"
@@ -12,134 +12,135 @@ target triple = "systemz"
 ; that transform to produce optimal asm.
 
 define dso_local zeroext i32 @foo(ptr noundef %a) #0 {
-; CHECK-LABEL: @foo(
-; CHECK-NEXT:  entry:
+; CHECK-LABEL: define dso_local zeroext i32 @foo(
+; CHECK-SAME: ptr noundef readnone captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*]]:
 ; CHECK-NEXT:    tail call void @populate(ptr noundef nonnull @ARR) #[[ATTR2:[0-9]+]]
-; CHECK-NEXT:    br label [[FOR_BODY4:%.*]]
-; CHECK:       for.body4:
-; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT_7:%.*]], [[FOR_BODY4]] ]
-; CHECK-NEXT:    [[SUM_11:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD_7:%.*]], [[FOR_BODY4]] ]
+; CHECK-NEXT:    br label %[[FOR_BODY4:.*]]
+; CHECK:       [[FOR_BODY4]]:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDVARS_IV_NEXT_7:%.*]], %[[FOR_BODY4]] ]
+; CHECK-NEXT:    [[SUM_11:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[ADD_7:%.*]], %[[FOR_BODY4]] ]
 ; CHECK-NEXT:    [[IDX_NEG:%.*]] = sub nsw i64 0, [[INDVARS_IV]]
 ; CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds nuw (i8, ptr @ARR, i64 396), i64 [[IDX_NEG]]
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ADD_PTR]], align 4, !tbaa [[TBAA3:![0-9]+]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ADD_PTR]], align 4, !tbaa [[INT_TBAA3:![0-9]+]]
 ; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[TMP0]], [[SUM_11]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_NEG:%.*]] = xor i64 [[INDVARS_IV]], -1
 ; CHECK-NEXT:    [[ADD_PTR_110:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds nuw (i8, ptr @ARR, i64 396), i64 [[INDVARS_IV_NEXT_NEG]]
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ADD_PTR_110]], align 4, !tbaa [[TBAA3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ADD_PTR_110]], align 4, !tbaa [[INT_TBAA3]]
 ; CHECK-NEXT:    [[ADD_111:%.*]] = add i32 [[TMP1]], [[ADD]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_112_NEG:%.*]] = sub nuw nsw i64 -2, [[INDVARS_IV]]
 ; CHECK-NEXT:    [[ADD_PTR_217:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds nuw (i8, ptr @ARR, i64 396), i64 [[INDVARS_IV_NEXT_112_NEG]]
-; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ADD_PTR_217]], align 4, !tbaa [[TBAA3]]
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ADD_PTR_217]], align 4, !tbaa [[INT_TBAA3]]
 ; CHECK-NEXT:    [[ADD_218:%.*]] = add i32 [[TMP2]], [[ADD_111]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_219_NEG:%.*]] = sub nuw nsw i64 -3, [[INDVARS_IV]]
 ; CHECK-NEXT:    [[ADD_PTR_3:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds nuw (i8, ptr @ARR, i64 396), i64 [[INDVARS_IV_NEXT_219_NEG]]
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[ADD_PTR_3]], align 4, !tbaa [[TBAA3]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[ADD_PTR_3]], align 4, !tbaa [[INT_TBAA3]]
 ; CHECK-NEXT:    [[ADD_3:%.*]] = add i32 [[TMP3]], [[ADD_218]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_3_NEG:%.*]] = sub nuw nsw i64 -4, [[INDVARS_IV]]
 ; CHECK-NEXT:    [[ADD_PTR_4:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds nuw (i8, ptr @ARR, i64 396), i64 [[INDVARS_IV_NEXT_3_NEG]]
-; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[ADD_PTR_4]], align 4, !tbaa [[TBAA3]]
+; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[ADD_PTR_4]], align 4, !tbaa [[INT_TBAA3]]
 ; CHECK-NEXT:    [[ADD_4:%.*]] = add i32 [[TMP4]], [[ADD_3]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_4_NEG:%.*]] = sub nuw nsw i64 -5, [[INDVARS_IV]]
 ; CHECK-NEXT:    [[ADD_PTR_5:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds nuw (i8, ptr @ARR, i64 396), i64 [[INDVARS_IV_NEXT_4_NEG]]
-; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[ADD_PTR_5]], align 4, !tbaa [[TBAA3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[ADD_PTR_5]], align 4, !tbaa [[INT_TBAA3]]
 ; CHECK-NEXT:    [[ADD_5:%.*]] = add i32 [[TMP5]], [[ADD_4]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_5_NEG:%.*]] = sub nuw nsw i64 -6, [[INDVARS_IV]]
 ; CHECK-NEXT:    [[ADD_PTR_6:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds nuw (i8, ptr @ARR, i64 396), i64 [[INDVARS_IV_NEXT_5_NEG]]
-; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[ADD_PTR_6]], align 4, !tbaa [[TBAA3]]
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[ADD_PTR_6]], align 4, !tbaa [[INT_TBAA3]]
 ; CHECK-NEXT:    [[ADD_6:%.*]] = add i32 [[TMP6]], [[ADD_5]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_6_NEG:%.*]] = sub nuw nsw i64 -7, [[INDVARS_IV]]
 ; CHECK-NEXT:    [[ADD_PTR_7:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds nuw (i8, ptr @ARR, i64 396), i64 [[INDVARS_IV_NEXT_6_NEG]]
-; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[ADD_PTR_7]], align 4, !tbaa [[TBAA3]]
+; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[ADD_PTR_7]], align 4, !tbaa [[INT_TBAA3]]
 ; CHECK-NEXT:    [[ADD_7]] = add i32 [[TMP7]], [[ADD_6]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_7]] = add nuw nsw i64 [[INDVARS_IV]], 8
 ; CHECK-NEXT:    [[EXITCOND_NOT_7:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_7]], 32
-; CHECK-NEXT:    br i1 [[EXITCOND_NOT_7]], label [[FOR_BODY4_1:%.*]], label [[FOR_BODY4]], !llvm.loop [[LOOP7:![0-9]+]]
-; CHECK:       for.body4.1:
-; CHECK-NEXT:    [[INDVARS_IV_1:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_1_7:%.*]], [[FOR_BODY4_1]] ], [ 0, [[FOR_BODY4]] ]
-; CHECK-NEXT:    [[SUM_11_1:%.*]] = phi i32 [ [[ADD_1_7:%.*]], [[FOR_BODY4_1]] ], [ [[ADD_7]], [[FOR_BODY4]] ]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT_7]], label %[[FOR_BODY4_1:.*]], label %[[FOR_BODY4]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK:       [[FOR_BODY4_1]]:
+; CHECK-NEXT:    [[INDVARS_IV_1:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_1_7:%.*]], %[[FOR_BODY4_1]] ], [ 0, %[[FOR_BODY4]] ]
+; CHECK-NEXT:    [[SUM_11_1:%.*]] = phi i32 [ [[ADD_1_7:%.*]], %[[FOR_BODY4_1]] ], [ [[ADD_7]], %[[FOR_BODY4]] ]
 ; CHECK-NEXT:    [[IDX_NEG_1:%.*]] = sub nsw i64 0, [[INDVARS_IV_1]]
 ; CHECK-NEXT:    [[ADD_PTR_1:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds nuw (i8, ptr @ARR, i64 396), i64 [[IDX_NEG_1]]
-; CHECK-NEXT:    [[TMP8:%.*]] = load i32, ptr [[ADD_PTR_1]], align 4, !tbaa [[TBAA3]]
+; CHECK-NEXT:    [[TMP8:%.*]] = load i32, ptr [[ADD_PTR_1]], align 4, !tbaa [[INT_TBAA3]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_1_NEG:%.*]] = xor i64 [[INDVARS_IV_1]], -1
 ; CHECK-NEXT:    [[ADD_PTR_1_1:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds nuw (i8, ptr @ARR, i64 396), i64 [[INDVARS_IV_NEXT_1_NEG]]
-; CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[ADD_PTR_1_1]], align 4, !tbaa [[TBAA3]]
+; CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[ADD_PTR_1_1]], align 4, !tbaa [[INT_TBAA3]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = add i32 [[TMP8]], [[TMP9]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_1_1_NEG:%.*]] = sub nuw nsw i64 -2, [[INDVARS_IV_1]]
 ; CHECK-NEXT:    [[ADD_PTR_1_2:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds nuw (i8, ptr @ARR, i64 396), i64 [[INDVARS_IV_NEXT_1_1_NEG]]
-; CHECK-NEXT:    [[TMP11:%.*]] = load i32, ptr [[ADD_PTR_1_2]], align 4, !tbaa [[TBAA3]]
+; CHECK-NEXT:    [[TMP11:%.*]] = load i32, ptr [[ADD_PTR_1_2]], align 4, !tbaa [[INT_TBAA3]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = add i32 [[TMP10]], [[TMP11]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_1_2_NEG:%.*]] = sub nuw nsw i64 -3, [[INDVARS_IV_1]]
 ; CHECK-NEXT:    [[ADD_PTR_1_3:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds nuw (i8, ptr @ARR, i64 396), i64 [[INDVARS_IV_NEXT_1_2_NEG]]
-; CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[ADD_PTR_1_3]], align 4, !tbaa [[TBAA3]]
+; CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[ADD_PTR_1_3]], align 4, !tbaa [[INT_TBAA3]]
 ; CHECK-NEXT:    [[TMP14:%.*]] = add i32 [[TMP12]], [[TMP13]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_1_3_NEG:%.*]] = sub nuw nsw i64 -4, [[INDVARS_IV_1]]
 ; CHECK-NEXT:    [[ADD_PTR_1_4:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds nuw (i8, ptr @ARR, i64 396), i64 [[INDVARS_IV_NEXT_1_3_NEG]]
-; CHECK-NEXT:    [[TMP15:%.*]] = load i32, ptr [[ADD_PTR_1_4]], align 4, !tbaa [[TBAA3]]
+; CHECK-NEXT:    [[TMP15:%.*]] = load i32, ptr [[ADD_PTR_1_4]], align 4, !tbaa [[INT_TBAA3]]
 ; CHECK-NEXT:    [[TMP16:%.*]] = add i32 [[TMP14]], [[TMP15]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_1_4_NEG:%.*]] = sub nuw nsw i64 -5, [[INDVARS_IV_1]]
 ; CHECK-NEXT:    [[ADD_PTR_1_5:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds nuw (i8, ptr @ARR, i64 396), i64 [[INDVARS_IV_NEXT_1_4_NEG]]
-; CHECK-NEXT:    [[TMP17:%.*]] = load i32, ptr [[ADD_PTR_1_5]], align 4, !tbaa [[TBAA3]]
+; CHECK-NEXT:    [[TMP17:%.*]] = load i32, ptr [[ADD_PTR_1_5]], align 4, !tbaa [[INT_TBAA3]]
 ; CHECK-NEXT:    [[TMP18:%.*]] = add i32 [[TMP16]], [[TMP17]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_1_5_NEG:%.*]] = sub nuw nsw i64 -6, [[INDVARS_IV_1]]
 ; CHECK-NEXT:    [[ADD_PTR_1_6:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds nuw (i8, ptr @ARR, i64 396), i64 [[INDVARS_IV_NEXT_1_5_NEG]]
-; CHECK-NEXT:    [[TMP19:%.*]] = load i32, ptr [[ADD_PTR_1_6]], align 4, !tbaa [[TBAA3]]
+; CHECK-NEXT:    [[TMP19:%.*]] = load i32, ptr [[ADD_PTR_1_6]], align 4, !tbaa [[INT_TBAA3]]
 ; CHECK-NEXT:    [[TMP20:%.*]] = add i32 [[TMP18]], [[TMP19]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_1_6_NEG:%.*]] = sub nuw nsw i64 -7, [[INDVARS_IV_1]]
 ; CHECK-NEXT:    [[ADD_PTR_1_7:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds nuw (i8, ptr @ARR, i64 396), i64 [[INDVARS_IV_NEXT_1_6_NEG]]
-; CHECK-NEXT:    [[TMP21:%.*]] = load i32, ptr [[ADD_PTR_1_7]], align 4, !tbaa [[TBAA3]]
+; CHECK-NEXT:    [[TMP21:%.*]] = load i32, ptr [[ADD_PTR_1_7]], align 4, !tbaa [[INT_TBAA3]]
 ; CHECK-NEXT:    [[TMP22:%.*]] = add i32 [[TMP20]], [[TMP21]]
 ; CHECK-NEXT:    [[TMP23:%.*]] = shl i32 [[TMP22]], 1
 ; CHECK-NEXT:    [[ADD_1_7]] = add i32 [[TMP23]], [[SUM_11_1]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_1_7]] = add nuw nsw i64 [[INDVARS_IV_1]], 8
 ; CHECK-NEXT:    [[EXITCOND_1_NOT_7:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_1_7]], 32
-; CHECK-NEXT:    br i1 [[EXITCOND_1_NOT_7]], label [[FOR_BODY4_2:%.*]], label [[FOR_BODY4_1]], !llvm.loop [[LOOP7]]
-; CHECK:       for.body4.2:
-; CHECK-NEXT:    [[INDVARS_IV_2:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_2_7:%.*]], [[FOR_BODY4_2]] ], [ 0, [[FOR_BODY4_1]] ]
-; CHECK-NEXT:    [[SUM_11_2:%.*]] = phi i32 [ [[ADD_2_7:%.*]], [[FOR_BODY4_2]] ], [ [[ADD_1_7]], [[FOR_BODY4_1]] ]
+; CHECK-NEXT:    br i1 [[EXITCOND_1_NOT_7]], label %[[FOR_BODY4_2:.*]], label %[[FOR_BODY4_1]], !llvm.loop [[LOOP7]]
+; CHECK:       [[FOR_BODY4_2]]:
+; CHECK-NEXT:    [[INDVARS_IV_2:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_2_7:%.*]], %[[FOR_BODY4_2]] ], [ 0, %[[FOR_BODY4_1]] ]
+; CHECK-NEXT:    [[SUM_11_2:%.*]] = phi i32 [ [[ADD_2_7:%.*]], %[[FOR_BODY4_2]] ], [ [[ADD_1_7]], %[[FOR_BODY4_1]] ]
 ; CHECK-NEXT:    [[IDX_NEG_2:%.*]] = sub nsw i64 0, [[INDVARS_IV_2]]
 ; CHECK-NEXT:    [[ADD_PTR_2:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds nuw (i8, ptr @ARR, i64 396), i64 [[IDX_NEG_2]]
-; CHECK-NEXT:    [[TMP24:%.*]] = load i32, ptr [[ADD_PTR_2]], align 4, !tbaa [[TBAA3]]
+; CHECK-NEXT:    [[TMP24:%.*]] = load i32, ptr [[ADD_PTR_2]], align 4, !tbaa [[INT_TBAA3]]
 ; CHECK-NEXT:    [[MUL_2:%.*]] = mul i32 [[TMP24]], 3
 ; CHECK-NEXT:    [[ADD_2:%.*]] = add i32 [[MUL_2]], [[SUM_11_2]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_2_NEG:%.*]] = xor i64 [[INDVARS_IV_2]], -1
 ; CHECK-NEXT:    [[ADD_PTR_2_1:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds nuw (i8, ptr @ARR, i64 396), i64 [[INDVARS_IV_NEXT_2_NEG]]
-; CHECK-NEXT:    [[TMP25:%.*]] = load i32, ptr [[ADD_PTR_2_1]], align 4, !tbaa [[TBAA3]]
+; CHECK-NEXT:    [[TMP25:%.*]] = load i32, ptr [[ADD_PTR_2_1]], align 4, !tbaa [[INT_TBAA3]]
 ; CHECK-NEXT:    [[MUL_2_1:%.*]] = mul i32 [[TMP25]], 3
 ; CHECK-NEXT:    [[ADD_2_1:%.*]] = add i32 [[MUL_2_1]], [[ADD_2]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_2_1_NEG:%.*]] = sub nuw nsw i64 -2, [[INDVARS_IV_2]]
 ; CHECK-NEXT:    [[ADD_PTR_2_2:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds nuw (i8, ptr @ARR, i64 396), i64 [[INDVARS_IV_NEXT_2_1_NEG]]
-; CHECK-NEXT:    [[TMP26:%.*]] = load i32, ptr [[ADD_PTR_2_2]], align 4, !tbaa [[TBAA3]]
+; CHECK-NEXT:    [[TMP26:%.*]] = load i32, ptr [[ADD_PTR_2_2]], align 4, !tbaa [[INT_TBAA3]]
 ; CHECK-NEXT:    [[MUL_2_2:%.*]] = mul i32 [[TMP26]], 3
 ; CHECK-NEXT:    [[ADD_2_2:%.*]] = add i32 [[MUL_2_2]], [[ADD_2_1]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_2_2_NEG:%.*]] = sub nuw nsw i64 -3, [[INDVARS_IV_2]]
 ; CHECK-NEXT:    [[ADD_PTR_2_3:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds nuw (i8, ptr @ARR, i64 396), i64 [[INDVARS_IV_NEXT_2_2_NEG]]
-; CHECK-NEXT:    [[TMP27:%.*]] = load i32, ptr [[ADD_PTR_2_3]], align 4, !tbaa [[TBAA3]]
+; CHECK-NEXT:    [[TMP27:%.*]] = load i32, ptr [[ADD_PTR_2_3]], align 4, !tbaa [[INT_TBAA3]]
 ; CHECK-NEXT:    [[MUL_2_3:%.*]] = mul i32 [[TMP27]], 3
 ; CHECK-NEXT:    [[ADD_2_3:%.*]] = add i32 [[MUL_2_3]], [[ADD_2_2]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_2_3_NEG:%.*]] = sub nuw nsw i64 -4, [[INDVARS_IV_2]]
 ; CHECK-NEXT:    [[ADD_PTR_2_4:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds nuw (i8, ptr @ARR, i64 396), i64 [[INDVARS_IV_NEXT_2_3_NEG]]
-; CHECK-NEXT:    [[TMP28:%.*]] = load i32, ptr [[ADD_PTR_2_4]], align 4, !tbaa [[TBAA3]]
+; CHECK-NEXT:    [[TMP28:%.*]] = load i32, ptr [[ADD_PTR_2_4]], align 4, !tbaa [[INT_TBAA3]]
 ; CHECK-NEXT:    [[MUL_2_4:%.*]] = mul i32 [[TMP28]], 3
 ; CHECK-NEXT:    [[ADD_2_4:%.*]] = add i32 [[MUL_2_4]], [[ADD_2_3]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_2_4_NEG:%.*]] = sub nuw nsw i64 -5, [[INDVARS_IV_2]]
 ; CHECK-NEXT:    [[ADD_PTR_2_5:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds nuw (i8, ptr @ARR, i64 396), i64 [[INDVARS_IV_NEXT_2_4_NEG]]
-; CHECK-NEXT:    [[TMP29:%.*]] = load i32, ptr [[ADD_PTR_2_5]], align 4, !tbaa [[TBAA3]]
+; CHECK-NEXT:    [[TMP29:%.*]] = load i32, ptr [[ADD_PTR_2_5]], align 4, !tbaa [[INT_TBAA3]]
 ; CHECK-NEXT:    [[MUL_2_5:%.*]] = mul i32 [[TMP29]], 3
 ; CHECK-NEXT:    [[ADD_2_5:%.*]] = add i32 [[MUL_2_5]], [[ADD_2_4]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_2_5_NEG:%.*]] = sub nuw nsw i64 -6, [[INDVARS_IV_2]]
 ; CHECK-NEXT:    [[ADD_PTR_2_6:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds nuw (i8, ptr @ARR, i64 396), i64 [[INDVARS_IV_NEXT_2_5_NEG]]
-; CHECK-NEXT:    [[TMP30:%.*]] = load i32, ptr [[ADD_PTR_2_6]], align 4, !tbaa [[TBAA3]]
+; CHECK-NEXT:    [[TMP30:%.*]] = load i32, ptr [[ADD_PTR_2_6]], align 4, !tbaa [[INT_TBAA3]]
 ; CHECK-NEXT:    [[MUL_2_6:%.*]] = mul i32 [[TMP30]], 3
 ; CHECK-NEXT:    [[ADD_2_6:%.*]] = add i32 [[MUL_2_6]], [[ADD_2_5]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_2_6_NEG:%.*]] = sub nuw nsw i64 -7, [[INDVARS_IV_2]]
 ; CHECK-NEXT:    [[ADD_PTR_2_7:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds nuw (i8, ptr @ARR, i64 396), i64 [[INDVARS_IV_NEXT_2_6_NEG]]
-; CHECK-NEXT:    [[TMP31:%.*]] = load i32, ptr [[ADD_PTR_2_7]], align 4, !tbaa [[TBAA3]]
+; CHECK-NEXT:    [[TMP31:%.*]] = load i32, ptr [[ADD_PTR_2_7]], align 4, !tbaa [[INT_TBAA3]]
 ; CHECK-NEXT:    [[MUL_2_7:%.*]] = mul i32 [[TMP31]], 3
 ; CHECK-NEXT:    [[ADD_2_7]] = add i32 [[MUL_2_7]], [[ADD_2_6]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_2_7]] = add nuw nsw i64 [[INDVARS_IV_2]], 8
 ; CHECK-NEXT:    [[EXITCOND_2_NOT_7:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_2_7]], 32
-; CHECK-NEXT:    br i1 [[EXITCOND_2_NOT_7]], label [[FOR_INC5_2:%.*]], label [[FOR_BODY4_2]], !llvm.loop [[LOOP7]]
-; CHECK:       for.inc5.2:
+; CHECK-NEXT:    br i1 [[EXITCOND_2_NOT_7]], label %[[FOR_INC5_2:.*]], label %[[FOR_BODY4_2]], !llvm.loop [[LOOP7]]
+; CHECK:       [[FOR_INC5_2]]:
 ; CHECK-NEXT:    ret i32 [[ADD_2_7]]
 ;
 entry:
@@ -210,3 +211,11 @@ attributes #2 = { argmemonly nocallback nofree nosync nounwind willreturn }
 !7 = distinct !{!7, !8}
 !8 = !{!"llvm.loop.mustprogress"}
 !9 = distinct !{!9, !8}
+;.
+; CHECK: [[INT_TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0}
+; CHECK: [[META4]] = !{!"int", [[META5:![0-9]+]], i64 0}
+; CHECK: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0}
+; CHECK: [[META6]] = !{!"Simple C/C++ TBAA"}
+; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META8:![0-9]+]]}
+; CHECK: [[META8]] = !{!"llvm.loop.mustprogress"}
+;.
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/SROA-after-final-loop-unrolling-2.ll b/llvm/test/Transforms/PhaseOrdering/X86/SROA-after-final-loop-unrolling-2.ll
index 7fe3f33..f42101f 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/SROA-after-final-loop-unrolling-2.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/SROA-after-final-loop-unrolling-2.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt < %s -O3 -S                   | FileCheck %s
 ; RUN: opt < %s -passes="default<O3>" -S | FileCheck %s
 
@@ -20,27 +20,28 @@ $_ZNSt14__array_traitsIiLm2EE6_S_refERA2_Kim = comdat any
 
 ; Function Attrs: mustprogress nounwind uwtable
 define dso_local void @foo(i32 noundef %arg, ptr noundef nonnull align 4 dereferenceable(8) %arg1) #0 {
-; CHECK-LABEL: @foo(
-; CHECK-NEXT:  bb:
-; CHECK-NEXT:    [[I9:%.*]] = sdiv i32 [[ARG:%.*]], 128
+; CHECK-LABEL: define dso_local void @foo(
+; CHECK-SAME: i32 noundef [[ARG:%.*]], ptr noundef nonnull writeonly align 4 captures(none) dereferenceable(8) [[ARG1:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[BB:.*]]:
+; CHECK-NEXT:    [[I9:%.*]] = sdiv i32 [[ARG]], 128
 ; CHECK-NEXT:    [[I10:%.*]] = shl nsw i32 [[I9]], 7
 ; CHECK-NEXT:    [[ARG_OFF:%.*]] = add i32 [[ARG]], 127
 ; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[ARG_OFF]], 255
-; CHECK-NEXT:    br i1 [[TMP0]], label [[BB12:%.*]], label [[BB13:%.*]]
-; CHECK:       bb12.loopexit:
+; CHECK-NEXT:    br i1 [[TMP0]], label %[[BB12:.*]], label %[[BB13:.*]]
+; CHECK:       [[BB12_LOOPEXIT:.*]]:
 ; CHECK-NEXT:    [[I3_SROA_8_0_INSERT_EXT:%.*]] = zext i32 [[I21_3:%.*]] to i64
 ; CHECK-NEXT:    [[I3_SROA_8_0_INSERT_SHIFT:%.*]] = shl nuw i64 [[I3_SROA_8_0_INSERT_EXT]], 32
 ; CHECK-NEXT:    [[I3_SROA_0_0_INSERT_EXT:%.*]] = zext i32 [[I21_2:%.*]] to i64
 ; CHECK-NEXT:    [[I3_SROA_0_0_INSERT_INSERT:%.*]] = or disjoint i64 [[I3_SROA_8_0_INSERT_SHIFT]], [[I3_SROA_0_0_INSERT_EXT]]
-; CHECK-NEXT:    br label [[BB12]]
-; CHECK:       bb12:
-; CHECK-NEXT:    [[TMP1:%.*]] = phi i64 [ [[I3_SROA_0_0_INSERT_INSERT]], [[BB12_LOOPEXIT:%.*]] ], [ 180388626456, [[BB:%.*]] ]
-; CHECK-NEXT:    store i64 [[TMP1]], ptr [[ARG1:%.*]], align 4, !tbaa [[TBAA5:![0-9]+]]
+; CHECK-NEXT:    br label %[[BB12]]
+; CHECK:       [[BB12]]:
+; CHECK-NEXT:    [[TMP1:%.*]] = phi i64 [ [[I3_SROA_0_0_INSERT_INSERT]], %[[BB12_LOOPEXIT]] ], [ 180388626456, %[[BB]] ]
+; CHECK-NEXT:    store i64 [[TMP1]], ptr [[ARG1]], align 4, !tbaa [[CHAR_TBAA5:![0-9]+]]
 ; CHECK-NEXT:    ret void
-; CHECK:       bb13:
-; CHECK-NEXT:    [[I3_SROA_8_0:%.*]] = phi i32 [ [[I21_3]], [[BB13]] ], [ 42, [[BB]] ]
-; CHECK-NEXT:    [[I3_SROA_0_0:%.*]] = phi i32 [ [[I21_2]], [[BB13]] ], [ 24, [[BB]] ]
-; CHECK-NEXT:    [[I4_05:%.*]] = phi i32 [ [[I24_3:%.*]], [[BB13]] ], [ 0, [[BB]] ]
+; CHECK:       [[BB13]]:
+; CHECK-NEXT:    [[I3_SROA_8_0:%.*]] = phi i32 [ [[I21_3]], %[[BB13]] ], [ 42, %[[BB]] ]
+; CHECK-NEXT:    [[I3_SROA_0_0:%.*]] = phi i32 [ [[I21_2]], %[[BB13]] ], [ 24, %[[BB]] ]
+; CHECK-NEXT:    [[I4_05:%.*]] = phi i32 [ [[I24_3:%.*]], %[[BB13]] ], [ 0, %[[BB]] ]
 ; CHECK-NEXT:    [[I21:%.*]] = mul nsw i32 [[I3_SROA_0_0]], [[I4_05]]
 ; CHECK-NEXT:    [[I24:%.*]] = or disjoint i32 [[I4_05]], 1
 ; CHECK-NEXT:    [[I21_1:%.*]] = mul nsw i32 [[I3_SROA_8_0]], [[I24]]
@@ -50,7 +51,7 @@ define dso_local void @foo(i32 noundef %arg, ptr noundef nonnull align 4 derefer
 ; CHECK-NEXT:    [[I21_3]] = mul nsw i32 [[I21_1]], [[I24_2]]
 ; CHECK-NEXT:    [[I24_3]] = add nuw nsw i32 [[I4_05]], 4
 ; CHECK-NEXT:    [[I11_NOT_3:%.*]] = icmp eq i32 [[I24_3]], [[I10]]
-; CHECK-NEXT:    br i1 [[I11_NOT_3]], label [[BB12_LOOPEXIT]], label [[BB13]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK-NEXT:    br i1 [[I11_NOT_3]], label %[[BB12_LOOPEXIT]], label %[[BB13]], !llvm.loop [[LOOP8:![0-9]+]]
 ;
 bb:
   %i = alloca i32, align 4
@@ -166,3 +167,11 @@ attributes #3 = { nounwind }
 !14 = !{!7, !7, i64 0}
 !15 = !{!16, !16, i64 0}
 !16 = !{!"long", !7, i64 0}
+;.
+; CHECK: [[CHAR_TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0}
+; CHECK: [[META6]] = !{!"omnipotent char", [[META7:![0-9]+]], i64 0}
+; CHECK: [[META7]] = !{!"Simple C++ TBAA"}
+; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META9:![0-9]+]], [[META10:![0-9]+]]}
+; CHECK: [[META9]] = !{!"llvm.loop.mustprogress"}
+; CHECK: [[META10]] = !{!"llvm.loop.isvectorized", i32 1}
+;.
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/hoist-load-of-baseptr.ll b/llvm/test/Transforms/PhaseOrdering/X86/hoist-load-of-baseptr.ll
index 00453e7..7954ff0 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/hoist-load-of-baseptr.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/hoist-load-of-baseptr.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt -passes="default<O1>" -S < %s | FileCheck --check-prefixes=O1 %s
 ; RUN: opt -passes="default<O2>" -S < %s | FileCheck --check-prefixes=O2 %s
 ; RUN: opt -passes="default<O3>" -S < %s | FileCheck --check-prefixes=O3 %s
@@ -14,125 +14,125 @@ target triple = "x86_64-unknown-linux-gnu"
 $_ZNSt6vectorIiSaIiEEixEm = comdat any
 
 define dso_local void @_Z7computeRSt6vectorIiSaIiEEy(ptr noundef nonnull align 8 dereferenceable(24) %data, i64 noundef %numElems) {
-; O1-LABEL: define {{[^@]+}}@_Z7computeRSt6vectorIiSaIiEEy
-; O1-SAME: (ptr noundef nonnull readonly align 8 captures(none) dereferenceable(24) [[DATA:%.*]], i64 noundef [[NUMELEMS:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
-; O1-NEXT:  entry:
+; O1-LABEL: define dso_local void @_Z7computeRSt6vectorIiSaIiEEy(
+; O1-SAME: ptr noundef nonnull readonly align 8 captures(none) dereferenceable(24) [[DATA:%.*]], i64 noundef [[NUMELEMS:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; O1-NEXT:  [[ENTRY:.*]]:
 ; O1-NEXT:    [[CMP24_NOT:%.*]] = icmp eq i64 [[NUMELEMS]], 0
 ; O1-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[DATA]], align 8
-; O1-NEXT:    br label [[FOR_COND1_PREHEADER:%.*]]
-; O1:       for.cond1.preheader:
-; O1-NEXT:    [[I_06:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC7:%.*]], [[FOR_COND_CLEANUP3:%.*]] ]
-; O1-NEXT:    br i1 [[CMP24_NOT]], label [[FOR_COND_CLEANUP3]], label [[FOR_BODY4:%.*]]
-; O1:       for.cond.cleanup:
+; O1-NEXT:    br label %[[FOR_COND1_PREHEADER:.*]]
+; O1:       [[FOR_COND1_PREHEADER]]:
+; O1-NEXT:    [[I_06:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INC7:%.*]], %[[FOR_COND_CLEANUP3:.*]] ]
+; O1-NEXT:    br i1 [[CMP24_NOT]], label %[[FOR_COND_CLEANUP3]], label %[[FOR_BODY4:.*]]
+; O1:       [[FOR_COND_CLEANUP:.*]]:
 ; O1-NEXT:    ret void
-; O1:       for.cond.cleanup3:
+; O1:       [[FOR_COND_CLEANUP3]]:
 ; O1-NEXT:    [[INC7]] = add nuw nsw i64 [[I_06]], 1
 ; O1-NEXT:    [[EXITCOND7_NOT:%.*]] = icmp eq i64 [[INC7]], 100
-; O1-NEXT:    br i1 [[EXITCOND7_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER]], !llvm.loop [[LOOP0:![0-9]+]]
-; O1:       for.body4:
-; O1-NEXT:    [[J_05:%.*]] = phi i64 [ [[INC5:%.*]], [[FOR_BODY4]] ], [ 0, [[FOR_COND1_PREHEADER]] ]
+; O1-NEXT:    br i1 [[EXITCOND7_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_COND1_PREHEADER]], !llvm.loop [[LOOP0:![0-9]+]]
+; O1:       [[FOR_BODY4]]:
+; O1-NEXT:    [[J_05:%.*]] = phi i64 [ [[INC5:%.*]], %[[FOR_BODY4]] ], [ 0, %[[FOR_COND1_PREHEADER]] ]
 ; O1-NEXT:    [[ADD_PTR_I:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 [[J_05]]
-; O1-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ADD_PTR_I]], align 4, !tbaa [[TBAA2:![0-9]+]]
+; O1-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ADD_PTR_I]], align 4, !tbaa [[INT_TBAA2:![0-9]+]]
 ; O1-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP1]], 1
-; O1-NEXT:    store i32 [[INC]], ptr [[ADD_PTR_I]], align 4, !tbaa [[TBAA2]]
+; O1-NEXT:    store i32 [[INC]], ptr [[ADD_PTR_I]], align 4, !tbaa [[INT_TBAA2]]
 ; O1-NEXT:    [[INC5]] = add nuw i64 [[J_05]], 1
 ; O1-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC5]], [[NUMELEMS]]
-; O1-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP3]], label [[FOR_BODY4]], !llvm.loop [[LOOP6:![0-9]+]]
+; O1-NEXT:    br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP3]], label %[[FOR_BODY4]], !llvm.loop [[LOOP6:![0-9]+]]
 ;
-; O2-LABEL: define {{[^@]+}}@_Z7computeRSt6vectorIiSaIiEEy
-; O2-SAME: (ptr noundef nonnull readonly align 8 captures(none) dereferenceable(24) [[DATA:%.*]], i64 noundef [[NUMELEMS:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
-; O2-NEXT:  entry:
+; O2-LABEL: define dso_local void @_Z7computeRSt6vectorIiSaIiEEy(
+; O2-SAME: ptr noundef nonnull readonly align 8 captures(none) dereferenceable(24) [[DATA:%.*]], i64 noundef [[NUMELEMS:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; O2-NEXT:  [[ENTRY:.*]]:
 ; O2-NEXT:    [[CMP24_NOT:%.*]] = icmp eq i64 [[NUMELEMS]], 0
 ; O2-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[DATA]], align 8
 ; O2-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[NUMELEMS]], 8
 ; O2-NEXT:    [[N_VEC:%.*]] = and i64 [[NUMELEMS]], -8
 ; O2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[NUMELEMS]], [[N_VEC]]
-; O2-NEXT:    br label [[FOR_COND1_PREHEADER:%.*]]
-; O2:       for.cond1.preheader:
-; O2-NEXT:    [[I_06:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC7:%.*]], [[FOR_COND_CLEANUP3:%.*]] ]
-; O2-NEXT:    br i1 [[CMP24_NOT]], label [[FOR_COND_CLEANUP3]], label [[FOR_BODY4_PREHEADER:%.*]]
-; O2:       for.body4.preheader:
-; O2-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY4_PREHEADER9:%.*]], label [[VECTOR_BODY:%.*]]
-; O2:       vector.body:
-; O2-NEXT:    [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ], [ 0, [[FOR_BODY4_PREHEADER]] ]
+; O2-NEXT:    br label %[[FOR_COND1_PREHEADER:.*]]
+; O2:       [[FOR_COND1_PREHEADER]]:
+; O2-NEXT:    [[I_06:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INC7:%.*]], %[[FOR_COND_CLEANUP3:.*]] ]
+; O2-NEXT:    br i1 [[CMP24_NOT]], label %[[FOR_COND_CLEANUP3]], label %[[FOR_BODY4_PREHEADER:.*]]
+; O2:       [[FOR_BODY4_PREHEADER]]:
+; O2-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[FOR_BODY4_PREHEADER9:.*]], label %[[VECTOR_BODY:.*]]
+; O2:       [[VECTOR_BODY]]:
+; O2-NEXT:    [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ], [ 0, %[[FOR_BODY4_PREHEADER]] ]
 ; O2-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 [[INDEX]]
 ; O2-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 16
-; O2-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]]
-; O2-NEXT:    [[WIDE_LOAD8:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4, !tbaa [[TBAA0]]
+; O2-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4, !tbaa [[INT_TBAA0:![0-9]+]]
+; O2-NEXT:    [[WIDE_LOAD8:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4, !tbaa [[INT_TBAA0]]
 ; O2-NEXT:    [[TMP3:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], splat (i32 1)
 ; O2-NEXT:    [[TMP4:%.*]] = add nsw <4 x i32> [[WIDE_LOAD8]], splat (i32 1)
-; O2-NEXT:    store <4 x i32> [[TMP3]], ptr [[TMP1]], align 4, !tbaa [[TBAA0]]
-; O2-NEXT:    store <4 x i32> [[TMP4]], ptr [[TMP2]], align 4, !tbaa [[TBAA0]]
+; O2-NEXT:    store <4 x i32> [[TMP3]], ptr [[TMP1]], align 4, !tbaa [[INT_TBAA0]]
+; O2-NEXT:    store <4 x i32> [[TMP4]], ptr [[TMP2]], align 4, !tbaa [[INT_TBAA0]]
 ; O2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
 ; O2-NEXT:    [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; O2-NEXT:    br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
-; O2:       middle.block:
-; O2-NEXT:    br i1 [[CMP_N]], label [[FOR_COND_CLEANUP3]], label [[FOR_BODY4_PREHEADER9]]
-; O2:       for.body4.preheader9:
-; O2-NEXT:    [[J_05_PH:%.*]] = phi i64 [ 0, [[FOR_BODY4_PREHEADER]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
-; O2-NEXT:    br label [[FOR_BODY4:%.*]]
-; O2:       for.cond.cleanup:
+; O2-NEXT:    br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; O2:       [[MIDDLE_BLOCK]]:
+; O2-NEXT:    br i1 [[CMP_N]], label %[[FOR_COND_CLEANUP3]], label %[[FOR_BODY4_PREHEADER9]]
+; O2:       [[FOR_BODY4_PREHEADER9]]:
+; O2-NEXT:    [[J_05_PH:%.*]] = phi i64 [ 0, %[[FOR_BODY4_PREHEADER]] ], [ [[N_VEC]], %[[MIDDLE_BLOCK]] ]
+; O2-NEXT:    br label %[[FOR_BODY4:.*]]
+; O2:       [[FOR_COND_CLEANUP:.*]]:
 ; O2-NEXT:    ret void
-; O2:       for.cond.cleanup3:
+; O2:       [[FOR_COND_CLEANUP3]]:
 ; O2-NEXT:    [[INC7]] = add nuw nsw i64 [[I_06]], 1
 ; O2-NEXT:    [[EXITCOND7_NOT:%.*]] = icmp eq i64 [[INC7]], 100
-; O2-NEXT:    br i1 [[EXITCOND7_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER]], !llvm.loop [[LOOP8:![0-9]+]]
-; O2:       for.body4:
-; O2-NEXT:    [[J_05:%.*]] = phi i64 [ [[INC5:%.*]], [[FOR_BODY4]] ], [ [[J_05_PH]], [[FOR_BODY4_PREHEADER9]] ]
+; O2-NEXT:    br i1 [[EXITCOND7_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_COND1_PREHEADER]], !llvm.loop [[LOOP8:![0-9]+]]
+; O2:       [[FOR_BODY4]]:
+; O2-NEXT:    [[J_05:%.*]] = phi i64 [ [[INC5:%.*]], %[[FOR_BODY4]] ], [ [[J_05_PH]], %[[FOR_BODY4_PREHEADER9]] ]
 ; O2-NEXT:    [[ADD_PTR_I:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 [[J_05]]
-; O2-NEXT:    [[TMP6:%.*]] = load i32, ptr [[ADD_PTR_I]], align 4, !tbaa [[TBAA0]]
+; O2-NEXT:    [[TMP6:%.*]] = load i32, ptr [[ADD_PTR_I]], align 4, !tbaa [[INT_TBAA0]]
 ; O2-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP6]], 1
-; O2-NEXT:    store i32 [[INC]], ptr [[ADD_PTR_I]], align 4, !tbaa [[TBAA0]]
+; O2-NEXT:    store i32 [[INC]], ptr [[ADD_PTR_I]], align 4, !tbaa [[INT_TBAA0]]
 ; O2-NEXT:    [[INC5]] = add nuw i64 [[J_05]], 1
 ; O2-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC5]], [[NUMELEMS]]
-; O2-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP3]], label [[FOR_BODY4]], !llvm.loop [[LOOP9:![0-9]+]]
+; O2-NEXT:    br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP3]], label %[[FOR_BODY4]], !llvm.loop [[LOOP9:![0-9]+]]
 ;
-; O3-LABEL: define {{[^@]+}}@_Z7computeRSt6vectorIiSaIiEEy
-; O3-SAME: (ptr noundef nonnull readonly align 8 captures(none) dereferenceable(24) [[DATA:%.*]], i64 noundef [[NUMELEMS:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
-; O3-NEXT:  entry:
+; O3-LABEL: define dso_local void @_Z7computeRSt6vectorIiSaIiEEy(
+; O3-SAME: ptr noundef nonnull readonly align 8 captures(none) dereferenceable(24) [[DATA:%.*]], i64 noundef [[NUMELEMS:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; O3-NEXT:  [[ENTRY:.*:]]
 ; O3-NEXT:    [[CMP24_NOT:%.*]] = icmp eq i64 [[NUMELEMS]], 0
 ; O3-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[DATA]], align 8
-; O3-NEXT:    br i1 [[CMP24_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER_US_PREHEADER:%.*]]
-; O3:       for.cond1.preheader.us.preheader:
+; O3-NEXT:    br i1 [[CMP24_NOT]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_COND1_PREHEADER_US_PREHEADER:.*]]
+; O3:       [[FOR_COND1_PREHEADER_US_PREHEADER]]:
 ; O3-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[NUMELEMS]], 8
 ; O3-NEXT:    [[N_VEC:%.*]] = and i64 [[NUMELEMS]], -8
 ; O3-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[NUMELEMS]], [[N_VEC]]
-; O3-NEXT:    br label [[FOR_COND1_PREHEADER_US:%.*]]
-; O3:       for.cond1.preheader.us:
-; O3-NEXT:    [[I_06_US:%.*]] = phi i64 [ [[INC7_US:%.*]], [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US:%.*]] ], [ 0, [[FOR_COND1_PREHEADER_US_PREHEADER]] ]
-; O3-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY4_US_PREHEADER:%.*]], label [[VECTOR_BODY:%.*]]
-; O3:       vector.body:
-; O3-NEXT:    [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ], [ 0, [[FOR_COND1_PREHEADER_US]] ]
+; O3-NEXT:    br label %[[FOR_COND1_PREHEADER_US:.*]]
+; O3:       [[FOR_COND1_PREHEADER_US]]:
+; O3-NEXT:    [[I_06_US:%.*]] = phi i64 [ [[INC7_US:%.*]], %[[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US:.*]] ], [ 0, %[[FOR_COND1_PREHEADER_US_PREHEADER]] ]
+; O3-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[FOR_BODY4_US_PREHEADER:.*]], label %[[VECTOR_BODY:.*]]
+; O3:       [[VECTOR_BODY]]:
+; O3-NEXT:    [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ], [ 0, %[[FOR_COND1_PREHEADER_US]] ]
 ; O3-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 [[INDEX]]
 ; O3-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 16
-; O3-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]]
-; O3-NEXT:    [[WIDE_LOAD9:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4, !tbaa [[TBAA0]]
+; O3-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4, !tbaa [[INT_TBAA0:![0-9]+]]
+; O3-NEXT:    [[WIDE_LOAD9:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4, !tbaa [[INT_TBAA0]]
 ; O3-NEXT:    [[TMP3:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], splat (i32 1)
 ; O3-NEXT:    [[TMP4:%.*]] = add nsw <4 x i32> [[WIDE_LOAD9]], splat (i32 1)
-; O3-NEXT:    store <4 x i32> [[TMP3]], ptr [[TMP1]], align 4, !tbaa [[TBAA0]]
-; O3-NEXT:    store <4 x i32> [[TMP4]], ptr [[TMP2]], align 4, !tbaa [[TBAA0]]
+; O3-NEXT:    store <4 x i32> [[TMP3]], ptr [[TMP1]], align 4, !tbaa [[INT_TBAA0]]
+; O3-NEXT:    store <4 x i32> [[TMP4]], ptr [[TMP2]], align 4, !tbaa [[INT_TBAA0]]
 ; O3-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
 ; O3-NEXT:    [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; O3-NEXT:    br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
-; O3:       middle.block:
-; O3-NEXT:    br i1 [[CMP_N]], label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US]], label [[FOR_BODY4_US_PREHEADER]]
-; O3:       for.body4.us.preheader:
-; O3-NEXT:    [[J_05_US_PH:%.*]] = phi i64 [ 0, [[FOR_COND1_PREHEADER_US]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
-; O3-NEXT:    br label [[FOR_BODY4_US:%.*]]
-; O3:       for.body4.us:
-; O3-NEXT:    [[J_05_US:%.*]] = phi i64 [ [[INC5_US:%.*]], [[FOR_BODY4_US]] ], [ [[J_05_US_PH]], [[FOR_BODY4_US_PREHEADER]] ]
+; O3-NEXT:    br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; O3:       [[MIDDLE_BLOCK]]:
+; O3-NEXT:    br i1 [[CMP_N]], label %[[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US]], label %[[FOR_BODY4_US_PREHEADER]]
+; O3:       [[FOR_BODY4_US_PREHEADER]]:
+; O3-NEXT:    [[J_05_US_PH:%.*]] = phi i64 [ 0, %[[FOR_COND1_PREHEADER_US]] ], [ [[N_VEC]], %[[MIDDLE_BLOCK]] ]
+; O3-NEXT:    br label %[[FOR_BODY4_US:.*]]
+; O3:       [[FOR_BODY4_US]]:
+; O3-NEXT:    [[J_05_US:%.*]] = phi i64 [ [[INC5_US:%.*]], %[[FOR_BODY4_US]] ], [ [[J_05_US_PH]], %[[FOR_BODY4_US_PREHEADER]] ]
 ; O3-NEXT:    [[ADD_PTR_I_US:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 [[J_05_US]]
-; O3-NEXT:    [[TMP6:%.*]] = load i32, ptr [[ADD_PTR_I_US]], align 4, !tbaa [[TBAA0]]
+; O3-NEXT:    [[TMP6:%.*]] = load i32, ptr [[ADD_PTR_I_US]], align 4, !tbaa [[INT_TBAA0]]
 ; O3-NEXT:    [[INC_US:%.*]] = add nsw i32 [[TMP6]], 1
-; O3-NEXT:    store i32 [[INC_US]], ptr [[ADD_PTR_I_US]], align 4, !tbaa [[TBAA0]]
+; O3-NEXT:    store i32 [[INC_US]], ptr [[ADD_PTR_I_US]], align 4, !tbaa [[INT_TBAA0]]
 ; O3-NEXT:    [[INC5_US]] = add nuw i64 [[J_05_US]], 1
 ; O3-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC5_US]], [[NUMELEMS]]
-; O3-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US]], label [[FOR_BODY4_US]], !llvm.loop [[LOOP8:![0-9]+]]
-; O3:       for.cond1.for.cond.cleanup3_crit_edge.us:
+; O3-NEXT:    br i1 [[EXITCOND_NOT]], label %[[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US]], label %[[FOR_BODY4_US]], !llvm.loop [[LOOP8:![0-9]+]]
+; O3:       [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US]]:
 ; O3-NEXT:    [[INC7_US]] = add nuw nsw i64 [[I_06_US]], 1
 ; O3-NEXT:    [[EXITCOND8_NOT:%.*]] = icmp eq i64 [[INC7_US]], 100
-; O3-NEXT:    br i1 [[EXITCOND8_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_COND1_PREHEADER_US]], !llvm.loop [[LOOP9:![0-9]+]]
-; O3:       for.cond.cleanup:
+; O3-NEXT:    br i1 [[EXITCOND8_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_COND1_PREHEADER_US]], !llvm.loop [[LOOP9:![0-9]+]]
+; O3:       [[FOR_COND_CLEANUP]]:
 ; O3-NEXT:    ret void
 ;
 entry:
@@ -237,3 +237,34 @@ declare void @llvm.lifetime.end.p0(ptr nocapture)
 !15 = !{!"long", !5, i64 0}
 !16 = !{!17, !4, i64 0}
 !17 = !{!"_ZTSNSt12_Vector_baseIiSaIiEE17_Vector_impl_dataE", !4, i64 0, !4, i64 8, !4, i64 16}
+;.
+; O1: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]]}
+; O1: [[META1]] = !{!"llvm.loop.mustprogress"}
+; O1: [[INT_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0}
+; O1: [[META3]] = !{!"int", [[META4:![0-9]+]], i64 0}
+; O1: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0}
+; O1: [[META5]] = !{!"Simple C++ TBAA"}
+; O1: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]]}
+;.
+; O2: [[INT_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
+; O2: [[META1]] = !{!"int", [[META2:![0-9]+]], i64 0}
+; O2: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0}
+; O2: [[META3]] = !{!"Simple C++ TBAA"}
+; O2: [[LOOP4]] = distinct !{[[LOOP4]], [[META5:![0-9]+]], [[META6:![0-9]+]], [[META7:![0-9]+]]}
+; O2: [[META5]] = !{!"llvm.loop.mustprogress"}
+; O2: [[META6]] = !{!"llvm.loop.isvectorized", i32 1}
+; O2: [[META7]] = !{!"llvm.loop.unroll.runtime.disable"}
+; O2: [[LOOP8]] = distinct !{[[LOOP8]], [[META5]]}
+; O2: [[LOOP9]] = distinct !{[[LOOP9]], [[META5]], [[META7]], [[META6]]}
+;.
+; O3: [[INT_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
+; O3: [[META1]] = !{!"int", [[META2:![0-9]+]], i64 0}
+; O3: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0}
+; O3: [[META3]] = !{!"Simple C++ TBAA"}
+; O3: [[LOOP4]] = distinct !{[[LOOP4]], [[META5:![0-9]+]], [[META6:![0-9]+]], [[META7:![0-9]+]]}
+; O3: [[META5]] = !{!"llvm.loop.mustprogress"}
+; O3: [[META6]] = !{!"llvm.loop.isvectorized", i32 1}
+; O3: [[META7]] = !{!"llvm.loop.unroll.runtime.disable"}
+; O3: [[LOOP8]] = distinct !{[[LOOP8]], [[META5]], [[META7]], [[META6]]}
+; O3: [[LOOP9]] = distinct !{[[LOOP9]], [[META5]]}
+;.
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/preserve-access-group.ll b/llvm/test/Transforms/PhaseOrdering/X86/preserve-access-group.ll
index cb37846..ac73651 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/preserve-access-group.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/preserve-access-group.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt -passes="default<O3>" -S %s | FileCheck %s
 
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
@@ -23,18 +23,18 @@ define void @test(i32 noundef %nface, i32 noundef %ncell, ptr noalias noundef %f
 ; CHECK:       [[VECTOR_BODY]]:
 ; CHECK-NEXT:    [[INDVARS_IV_EPIL:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw i32, ptr [[FACE_CELL]], i64 [[INDVARS_IV_EPIL]]
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP10]], align 4, !tbaa [[TBAA0:![0-9]+]], !llvm.access.group [[ACC_GRP4:![0-9]+]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP10]], align 4, !tbaa [[INT_TBAA0:![0-9]+]], !llvm.access.group [[ACC_GRP4:![0-9]+]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw i32, ptr [[INVARIANT_GEP]], i64 [[INDVARS_IV_EPIL]]
-; CHECK-NEXT:    [[WIDE_LOAD12:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4, !tbaa [[TBAA0]], !llvm.access.group [[ACC_GRP4]]
+; CHECK-NEXT:    [[WIDE_LOAD12:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4, !tbaa [[INT_TBAA0]], !llvm.access.group [[ACC_GRP4]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = sext <4 x i32> [[WIDE_LOAD]] to <4 x i64>
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds double, ptr [[Y]], <4 x i64> [[TMP3]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = sext <4 x i32> [[WIDE_LOAD12]] to <4 x i64>
 ; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds double, ptr [[X]], <4 x i64> [[TMP5]]
-; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = tail call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> [[TMP4]], i32 8, <4 x i1> splat (i1 true), <4 x double> poison), !tbaa [[TBAA5:![0-9]+]], !llvm.access.group [[ACC_GRP4]]
-; CHECK-NEXT:    [[WIDE_MASKED_GATHER13:%.*]] = tail call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> [[TMP6]], i32 8, <4 x i1> splat (i1 true), <4 x double> poison), !tbaa [[TBAA5]], !llvm.access.group [[ACC_GRP4]]
+; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = tail call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> [[TMP4]], i32 8, <4 x i1> splat (i1 true), <4 x double> poison), !tbaa [[DOUBLE_TBAA5:![0-9]+]], !llvm.access.group [[ACC_GRP4]]
+; CHECK-NEXT:    [[WIDE_MASKED_GATHER13:%.*]] = tail call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> [[TMP6]], i32 8, <4 x i1> splat (i1 true), <4 x double> poison), !tbaa [[DOUBLE_TBAA5]], !llvm.access.group [[ACC_GRP4]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = fcmp fast olt <4 x double> [[WIDE_MASKED_GATHER]], [[WIDE_MASKED_GATHER13]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = select <4 x i1> [[TMP7]], <4 x double> [[WIDE_MASKED_GATHER13]], <4 x double> [[WIDE_MASKED_GATHER]]
-; CHECK-NEXT:    tail call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> [[TMP8]], <4 x ptr> [[TMP4]], i32 8, <4 x i1> splat (i1 true)), !tbaa [[TBAA5]], !llvm.access.group [[ACC_GRP4]]
+; CHECK-NEXT:    tail call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> [[TMP8]], <4 x ptr> [[TMP4]], i32 8, <4 x i1> splat (i1 true)), !tbaa [[DOUBLE_TBAA5]], !llvm.access.group [[ACC_GRP4]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDVARS_IV_EPIL]], 4
 ; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[UNROLL_ITER]]
 ; CHECK-NEXT:    br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
@@ -49,18 +49,18 @@ define void @test(i32 noundef %nface, i32 noundef %ncell, ptr noalias noundef %f
 ; CHECK:       [[FOR_BODY]]:
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_2:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ], [ [[INDVARS_IV_PH]], %[[FOR_BODY_PREHEADER14]] ]
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[FACE_CELL]], i64 [[INDVARS_IV_NEXT_2]]
-; CHECK-NEXT:    [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA0]], !llvm.access.group [[ACC_GRP4]]
+; CHECK-NEXT:    [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA0]], !llvm.access.group [[ACC_GRP4]]
 ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds nuw i32, ptr [[INVARIANT_GEP]], i64 [[INDVARS_IV_NEXT_2]]
-; CHECK-NEXT:    [[TMP23:%.*]] = load i32, ptr [[GEP]], align 4, !tbaa [[TBAA0]], !llvm.access.group [[ACC_GRP4]]
+; CHECK-NEXT:    [[TMP23:%.*]] = load i32, ptr [[GEP]], align 4, !tbaa [[INT_TBAA0]], !llvm.access.group [[ACC_GRP4]]
 ; CHECK-NEXT:    [[IDXPROM3_3:%.*]] = sext i32 [[TMP22]] to i64
 ; CHECK-NEXT:    [[ARRAYIDX4_3:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[IDXPROM3_3]]
 ; CHECK-NEXT:    [[IDXPROM5_3:%.*]] = sext i32 [[TMP23]] to i64
 ; CHECK-NEXT:    [[ARRAYIDX6_3:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[IDXPROM5_3]]
-; CHECK-NEXT:    [[TMP24:%.*]] = load double, ptr [[ARRAYIDX4_3]], align 8, !tbaa [[TBAA5]], !llvm.access.group [[ACC_GRP4]]
-; CHECK-NEXT:    [[TMP25:%.*]] = load double, ptr [[ARRAYIDX6_3]], align 8, !tbaa [[TBAA5]], !llvm.access.group [[ACC_GRP4]]
+; CHECK-NEXT:    [[TMP24:%.*]] = load double, ptr [[ARRAYIDX4_3]], align 8, !tbaa [[DOUBLE_TBAA5]], !llvm.access.group [[ACC_GRP4]]
+; CHECK-NEXT:    [[TMP25:%.*]] = load double, ptr [[ARRAYIDX6_3]], align 8, !tbaa [[DOUBLE_TBAA5]], !llvm.access.group [[ACC_GRP4]]
 ; CHECK-NEXT:    [[CMP_I_3:%.*]] = fcmp fast olt double [[TMP24]], [[TMP25]]
 ; CHECK-NEXT:    [[TMP26:%.*]] = select i1 [[CMP_I_3]], double [[TMP25]], double [[TMP24]]
-; CHECK-NEXT:    store double [[TMP26]], ptr [[ARRAYIDX4_3]], align 8, !tbaa [[TBAA5]], !llvm.access.group [[ACC_GRP4]]
+; CHECK-NEXT:    store double [[TMP26]], ptr [[ARRAYIDX4_3]], align 8, !tbaa [[DOUBLE_TBAA5]], !llvm.access.group [[ACC_GRP4]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV_NEXT_2]], 1
 ; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[TMP0]]
 ; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
@@ -190,12 +190,12 @@ attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: re
 !18 = !{!"llvm.loop.vectorize.enable", i1 true}
 
 ;.
-; CHECK: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
+; CHECK: [[INT_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
 ; CHECK: [[META1]] = !{!"int", [[META2:![0-9]+]], i64 0}
 ; CHECK: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0}
 ; CHECK: [[META3]] = !{!"Simple C++ TBAA"}
 ; CHECK: [[ACC_GRP4]] = distinct !{}
-; CHECK: [[TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0}
+; CHECK: [[DOUBLE_TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0}
 ; CHECK: [[META6]] = !{!"double", [[META2]], i64 0}
 ; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META8:![0-9]+]], [[META9:![0-9]+]], [[META10:![0-9]+]], [[META11:![0-9]+]]}
 ; CHECK: [[META8]] = !{!"llvm.loop.mustprogress"}
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/speculation-vs-tbaa.ll b/llvm/test/Transforms/PhaseOrdering/X86/speculation-vs-tbaa.ll
index ec387d6..fcdb683 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/speculation-vs-tbaa.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/speculation-vs-tbaa.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt -passes="default<O1>" -S < %s | FileCheck --check-prefixes=O1 %s
 ; RUN: opt -passes="default<O2>" -S < %s | FileCheck --check-prefixes=O23 %s
 ; RUN: opt -passes="default<O3>" -S < %s | FileCheck --check-prefixes=O23 %s
@@ -9,57 +9,59 @@ target triple = "x86_64-unknown-linux-gnu"
 ; We should retain the TBAA on the load here, not lose it.
 
 define void @licm(ptr align 8 dereferenceable(8) %_M_start.i, i64 %numElem) {
-; O1-LABEL: @licm(
-; O1-NEXT:  entry:
-; O1-NEXT:    [[CMP1_NOT:%.*]] = icmp eq i64 [[NUMELEM:%.*]], 0
-; O1-NEXT:    br i1 [[CMP1_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_LR_PH:%.*]]
-; O1:       for.body.lr.ph:
-; O1-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[_M_START_I:%.*]], align 8, !tbaa [[TBAA3:![0-9]+]]
-; O1-NEXT:    br label [[FOR_BODY:%.*]]
-; O1:       for.body:
-; O1-NEXT:    [[K_02:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
+; O1-LABEL: define void @licm(
+; O1-SAME: ptr readonly align 8 captures(none) dereferenceable(8) [[_M_START_I:%.*]], i64 [[NUMELEM:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; O1-NEXT:  [[ENTRY:.*:]]
+; O1-NEXT:    [[CMP1_NOT:%.*]] = icmp eq i64 [[NUMELEM]], 0
+; O1-NEXT:    br i1 [[CMP1_NOT]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_BODY_LR_PH:.*]]
+; O1:       [[FOR_BODY_LR_PH]]:
+; O1-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[_M_START_I]], align 8, !tbaa [[ANYPTR_TBAA3:![0-9]+]]
+; O1-NEXT:    br label %[[FOR_BODY:.*]]
+; O1:       [[FOR_BODY]]:
+; O1-NEXT:    [[K_02:%.*]] = phi i64 [ 0, %[[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
 ; O1-NEXT:    [[ADD_PTR_I:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i64 [[K_02]]
-; O1-NEXT:    store double 2.000000e+00, ptr [[ADD_PTR_I]], align 8, !tbaa [[TBAA8:![0-9]+]]
+; O1-NEXT:    store double 2.000000e+00, ptr [[ADD_PTR_I]], align 8, !tbaa [[DOUBLE_TBAA8:![0-9]+]]
 ; O1-NEXT:    [[INC]] = add nuw i64 [[K_02]], 1
 ; O1-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[NUMELEM]]
-; O1-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]]
-; O1:       for.cond.cleanup:
+; O1-NEXT:    br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]]
+; O1:       [[FOR_COND_CLEANUP]]:
 ; O1-NEXT:    ret void
 ;
-; O23-LABEL: @licm(
-; O23-NEXT:  entry:
-; O23-NEXT:    [[CMP1_NOT:%.*]] = icmp eq i64 [[NUMELEM:%.*]], 0
-; O23-NEXT:    br i1 [[CMP1_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_LR_PH:%.*]]
-; O23:       for.body.lr.ph:
-; O23-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[_M_START_I:%.*]], align 8, !tbaa [[TBAA3:![0-9]+]]
+; O23-LABEL: define void @licm(
+; O23-SAME: ptr readonly align 8 captures(none) dereferenceable(8) [[_M_START_I:%.*]], i64 [[NUMELEM:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; O23-NEXT:  [[ENTRY:.*:]]
+; O23-NEXT:    [[CMP1_NOT:%.*]] = icmp eq i64 [[NUMELEM]], 0
+; O23-NEXT:    br i1 [[CMP1_NOT]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_BODY_LR_PH:.*]]
+; O23:       [[FOR_BODY_LR_PH]]:
+; O23-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[_M_START_I]], align 8, !tbaa [[ANYPTR_TBAA3:![0-9]+]]
 ; O23-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[NUMELEM]], 4
-; O23-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY_PREHEADER:%.*]], label [[VECTOR_PH:%.*]]
-; O23:       vector.ph:
+; O23-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[FOR_BODY_PREHEADER:.*]], label %[[VECTOR_PH:.*]]
+; O23:       [[VECTOR_PH]]:
 ; O23-NEXT:    [[N_VEC:%.*]] = and i64 [[NUMELEM]], -4
-; O23-NEXT:    br label [[VECTOR_BODY:%.*]]
-; O23:       vector.body:
-; O23-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; O23-NEXT:    br label %[[VECTOR_BODY:.*]]
+; O23:       [[VECTOR_BODY]]:
+; O23-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
 ; O23-NEXT:    [[TMP1:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i64 [[INDEX]]
 ; O23-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 16
-; O23-NEXT:    store <2 x double> splat (double 2.000000e+00), ptr [[TMP1]], align 8, !tbaa [[TBAA8:![0-9]+]]
-; O23-NEXT:    store <2 x double> splat (double 2.000000e+00), ptr [[TMP2]], align 8, !tbaa [[TBAA8]]
+; O23-NEXT:    store <2 x double> splat (double 2.000000e+00), ptr [[TMP1]], align 8, !tbaa [[DOUBLE_TBAA8:![0-9]+]]
+; O23-NEXT:    store <2 x double> splat (double 2.000000e+00), ptr [[TMP2]], align 8, !tbaa [[DOUBLE_TBAA8]]
 ; O23-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; O23-NEXT:    [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; O23-NEXT:    br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
-; O23:       middle.block:
+; O23-NEXT:    br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
+; O23:       [[MIDDLE_BLOCK]]:
 ; O23-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[NUMELEM]], [[N_VEC]]
-; O23-NEXT:    br i1 [[CMP_N]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY_PREHEADER]]
-; O23:       for.body.preheader:
-; O23-NEXT:    [[K_02_PH:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
-; O23-NEXT:    br label [[FOR_BODY:%.*]]
-; O23:       for.body:
-; O23-NEXT:    [[K_02:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[K_02_PH]], [[FOR_BODY_PREHEADER]] ]
+; O23-NEXT:    br i1 [[CMP_N]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY_PREHEADER]]
+; O23:       [[FOR_BODY_PREHEADER]]:
+; O23-NEXT:    [[K_02_PH:%.*]] = phi i64 [ 0, %[[FOR_BODY_LR_PH]] ], [ [[N_VEC]], %[[MIDDLE_BLOCK]] ]
+; O23-NEXT:    br label %[[FOR_BODY:.*]]
+; O23:       [[FOR_BODY]]:
+; O23-NEXT:    [[K_02:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ [[K_02_PH]], %[[FOR_BODY_PREHEADER]] ]
 ; O23-NEXT:    [[ADD_PTR_I:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i64 [[K_02]]
-; O23-NEXT:    store double 2.000000e+00, ptr [[ADD_PTR_I]], align 8, !tbaa [[TBAA8]]
+; O23-NEXT:    store double 2.000000e+00, ptr [[ADD_PTR_I]], align 8, !tbaa [[DOUBLE_TBAA8]]
 ; O23-NEXT:    [[INC]] = add nuw i64 [[K_02]], 1
 ; O23-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[NUMELEM]]
-; O23-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
-; O23:       for.cond.cleanup:
+; O23-NEXT:    br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
+; O23:       [[FOR_COND_CLEANUP]]:
 ; O23-NEXT:    ret void
 ;
 entry:
@@ -94,3 +96,24 @@ for.cond.cleanup:                                 ; preds = %for.cond
 !7 = !{!"Simple C++ TBAA"}
 !8 = !{!9, !9, i64 0}
 !9 = !{!"double", !6, i64 0}
+;.
+; O1: [[ANYPTR_TBAA3]] = !{[[META4:![0-9]+]], [[META5:![0-9]+]], i64 0}
+; O1: [[META4]] = !{!"_ZTSNSt12_Vector_baseIdSaIdEE17_Vector_impl_dataE", [[META5]], i64 0, [[META5]], i64 8, [[META5]], i64 16}
+; O1: [[META5]] = !{!"any pointer", [[META6:![0-9]+]], i64 0}
+; O1: [[META6]] = !{!"omnipotent char", [[META7:![0-9]+]], i64 0}
+; O1: [[META7]] = !{!"Simple C++ TBAA"}
+; O1: [[DOUBLE_TBAA8]] = !{[[META9:![0-9]+]], [[META9]], i64 0}
+; O1: [[META9]] = !{!"double", [[META6]], i64 0}
+;.
+; O23: [[ANYPTR_TBAA3]] = !{[[META4:![0-9]+]], [[META5:![0-9]+]], i64 0}
+; O23: [[META4]] = !{!"_ZTSNSt12_Vector_baseIdSaIdEE17_Vector_impl_dataE", [[META5]], i64 0, [[META5]], i64 8, [[META5]], i64 16}
+; O23: [[META5]] = !{!"any pointer", [[META6:![0-9]+]], i64 0}
+; O23: [[META6]] = !{!"omnipotent char", [[META7:![0-9]+]], i64 0}
+; O23: [[META7]] = !{!"Simple C++ TBAA"}
+; O23: [[DOUBLE_TBAA8]] = !{[[META9:![0-9]+]], [[META9]], i64 0}
+; O23: [[META9]] = !{!"double", [[META6]], i64 0}
+; O23: [[LOOP10]] = distinct !{[[LOOP10]], [[META11:![0-9]+]], [[META12:![0-9]+]]}
+; O23: [[META11]] = !{!"llvm.loop.isvectorized", i32 1}
+; O23: [[META12]] = !{!"llvm.loop.unroll.runtime.disable"}
+; O23: [[LOOP13]] = distinct !{[[LOOP13]], [[META12]], [[META11]]}
+;.
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/spurious-peeling.ll b/llvm/test/Transforms/PhaseOrdering/X86/spurious-peeling.ll
index 438a93c7..574132c 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/spurious-peeling.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/spurious-peeling.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt -passes="default<O1>" -S < %s | FileCheck --check-prefixes=O1 %s
 ; RUN: opt -passes="default<O2>" -S < %s | FileCheck --check-prefixes=O23 %s
 ; RUN: opt -passes="default<O3>" -S < %s | FileCheck --check-prefixes=O23 %s
@@ -13,65 +13,65 @@ target triple = "x86_64-unknown-linux-gnu"
 $_ZN12FloatVecPair6vecIncEv = comdat any
 
 define dso_local void @_Z13vecIncFromPtrP12FloatVecPair(ptr %FVP) {
-; O1-LABEL: define {{[^@]+}}@_Z13vecIncFromPtrP12FloatVecPair
-; O1-SAME: (ptr readonly captures(none) [[FVP:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
-; O1-NEXT:  entry:
+; O1-LABEL: define dso_local void @_Z13vecIncFromPtrP12FloatVecPair(
+; O1-SAME: ptr readonly captures(none) [[FVP:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; O1-NEXT:  [[ENTRY:.*:]]
 ; O1-NEXT:    [[VSRC23_I:%.*]] = getelementptr inbounds nuw i8, ptr [[FVP]], i64 16
-; O1-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[VSRC23_I]], align 8, !tbaa [[TBAA0:![0-9]+]]
+; O1-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[VSRC23_I]], align 8, !tbaa [[ANYPTR_TBAA0:![0-9]+]]
 ; O1-NEXT:    [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0:%.*]], ptr [[TMP0]], i64 undef
 ; O1-NEXT:    [[SIZE4_I:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_I_I]], i64 8
-; O1-NEXT:    [[TMP1:%.*]] = load i32, ptr [[SIZE4_I]], align 8, !tbaa [[TBAA6:![0-9]+]]
+; O1-NEXT:    [[TMP1:%.*]] = load i32, ptr [[SIZE4_I]], align 8, !tbaa [[INT_TBAA6:![0-9]+]]
 ; O1-NEXT:    [[CMP56_NOT_I:%.*]] = icmp eq i32 [[TMP1]], 0
-; O1-NEXT:    br i1 [[CMP56_NOT_I]], label [[_ZN12FLOATVECPAIR6VECINCEV_EXIT:%.*]], label [[FOR_BODY7_LR_PH_I:%.*]]
-; O1:       for.body7.lr.ph.i:
-; O1-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX_I_I]], align 8, !tbaa [[TBAA8:![0-9]+]]
+; O1-NEXT:    br i1 [[CMP56_NOT_I]], label %[[_ZN12FLOATVECPAIR6VECINCEV_EXIT:.*]], label %[[FOR_BODY7_LR_PH_I:.*]]
+; O1:       [[FOR_BODY7_LR_PH_I]]:
+; O1-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX_I_I]], align 8, !tbaa [[ANYPTR_TBAA8:![0-9]+]]
 ; O1-NEXT:    [[ARRAYIDX_I3_I:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 undef
-; O1-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[FVP]], align 8, !tbaa [[TBAA0]]
+; O1-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[FVP]], align 8, !tbaa [[ANYPTR_TBAA0]]
 ; O1-NEXT:    [[ARRAYIDX_I4_I:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0]], ptr [[TMP3]], i64 undef
-; O1-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[ARRAYIDX_I4_I]], align 8, !tbaa [[TBAA8]]
+; O1-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[ARRAYIDX_I4_I]], align 8, !tbaa [[ANYPTR_TBAA8]]
 ; O1-NEXT:    [[ARRAYIDX_I5_I:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 undef
-; O1-NEXT:    br label [[FOR_BODY7_I:%.*]]
-; O1:       for.body7.i:
-; O1-NEXT:    [[J_07_I:%.*]] = phi i32 [ 0, [[FOR_BODY7_LR_PH_I]] ], [ [[INC_I:%.*]], [[FOR_BODY7_I]] ]
-; O1-NEXT:    [[TMP5:%.*]] = load float, ptr [[ARRAYIDX_I3_I]], align 4, !tbaa [[TBAA9:![0-9]+]]
-; O1-NEXT:    [[TMP6:%.*]] = load float, ptr [[ARRAYIDX_I5_I]], align 4, !tbaa [[TBAA9]]
+; O1-NEXT:    br label %[[FOR_BODY7_I:.*]]
+; O1:       [[FOR_BODY7_I]]:
+; O1-NEXT:    [[J_07_I:%.*]] = phi i32 [ 0, %[[FOR_BODY7_LR_PH_I]] ], [ [[INC_I:%.*]], %[[FOR_BODY7_I]] ]
+; O1-NEXT:    [[TMP5:%.*]] = load float, ptr [[ARRAYIDX_I3_I]], align 4, !tbaa [[FLOAT_TBAA9:![0-9]+]]
+; O1-NEXT:    [[TMP6:%.*]] = load float, ptr [[ARRAYIDX_I5_I]], align 4, !tbaa [[FLOAT_TBAA9]]
 ; O1-NEXT:    [[ADD_I:%.*]] = fadd float [[TMP5]], [[TMP6]]
-; O1-NEXT:    store float [[ADD_I]], ptr [[ARRAYIDX_I5_I]], align 4, !tbaa [[TBAA9]]
+; O1-NEXT:    store float [[ADD_I]], ptr [[ARRAYIDX_I5_I]], align 4, !tbaa [[FLOAT_TBAA9]]
 ; O1-NEXT:    [[INC_I]] = add nuw i32 [[J_07_I]], 1
 ; O1-NEXT:    [[EXITCOND_NOT_I:%.*]] = icmp eq i32 [[INC_I]], [[TMP1]]
-; O1-NEXT:    br i1 [[EXITCOND_NOT_I]], label [[_ZN12FLOATVECPAIR6VECINCEV_EXIT]], label [[FOR_BODY7_I]], !llvm.loop [[LOOP11:![0-9]+]]
-; O1:       _ZN12FloatVecPair6vecIncEv.exit:
+; O1-NEXT:    br i1 [[EXITCOND_NOT_I]], label %[[_ZN12FLOATVECPAIR6VECINCEV_EXIT]], label %[[FOR_BODY7_I]], !llvm.loop [[LOOP11:![0-9]+]]
+; O1:       [[_ZN12FLOATVECPAIR6VECINCEV_EXIT]]:
 ; O1-NEXT:    ret void
 ;
-; O23-LABEL: define {{[^@]+}}@_Z13vecIncFromPtrP12FloatVecPair
-; O23-SAME: (ptr readonly captures(none) [[FVP:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
-; O23-NEXT:  entry:
+; O23-LABEL: define dso_local void @_Z13vecIncFromPtrP12FloatVecPair(
+; O23-SAME: ptr readonly captures(none) [[FVP:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; O23-NEXT:  [[ENTRY:.*:]]
 ; O23-NEXT:    [[VSRC23_I:%.*]] = getelementptr inbounds nuw i8, ptr [[FVP]], i64 16
-; O23-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[VSRC23_I]], align 8, !tbaa [[TBAA0:![0-9]+]]
+; O23-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[VSRC23_I]], align 8, !tbaa [[ANYPTR_TBAA0:![0-9]+]]
 ; O23-NEXT:    [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0:%.*]], ptr [[TMP0]], i64 undef
 ; O23-NEXT:    [[SIZE4_I:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_I_I]], i64 8
-; O23-NEXT:    [[TMP1:%.*]] = load i32, ptr [[SIZE4_I]], align 8, !tbaa [[TBAA6:![0-9]+]]
+; O23-NEXT:    [[TMP1:%.*]] = load i32, ptr [[SIZE4_I]], align 8, !tbaa [[INT_TBAA6:![0-9]+]]
 ; O23-NEXT:    [[CMP56_NOT_I:%.*]] = icmp eq i32 [[TMP1]], 0
-; O23-NEXT:    br i1 [[CMP56_NOT_I]], label [[_ZN12FLOATVECPAIR6VECINCEV_EXIT:%.*]], label [[FOR_BODY7_LR_PH_I:%.*]]
-; O23:       for.body7.lr.ph.i:
-; O23-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX_I_I]], align 8, !tbaa [[TBAA8:![0-9]+]]
+; O23-NEXT:    br i1 [[CMP56_NOT_I]], label %[[_ZN12FLOATVECPAIR6VECINCEV_EXIT:.*]], label %[[FOR_BODY7_LR_PH_I:.*]]
+; O23:       [[FOR_BODY7_LR_PH_I]]:
+; O23-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX_I_I]], align 8, !tbaa [[ANYPTR_TBAA8:![0-9]+]]
 ; O23-NEXT:    [[ARRAYIDX_I3_I:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 undef
-; O23-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[FVP]], align 8, !tbaa [[TBAA0]]
+; O23-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[FVP]], align 8, !tbaa [[ANYPTR_TBAA0]]
 ; O23-NEXT:    [[ARRAYIDX_I4_I:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0]], ptr [[TMP3]], i64 undef
-; O23-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[ARRAYIDX_I4_I]], align 8, !tbaa [[TBAA8]]
+; O23-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[ARRAYIDX_I4_I]], align 8, !tbaa [[ANYPTR_TBAA8]]
 ; O23-NEXT:    [[ARRAYIDX_I5_I:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 undef
-; O23-NEXT:    [[DOTPRE_I:%.*]] = load float, ptr [[ARRAYIDX_I5_I]], align 4, !tbaa [[TBAA9:![0-9]+]]
-; O23-NEXT:    br label [[FOR_BODY7_I:%.*]]
-; O23:       for.body7.i:
-; O23-NEXT:    [[TMP5:%.*]] = phi float [ [[DOTPRE_I]], [[FOR_BODY7_LR_PH_I]] ], [ [[ADD_I:%.*]], [[FOR_BODY7_I]] ]
-; O23-NEXT:    [[J_07_I:%.*]] = phi i32 [ 0, [[FOR_BODY7_LR_PH_I]] ], [ [[INC_I:%.*]], [[FOR_BODY7_I]] ]
-; O23-NEXT:    [[TMP6:%.*]] = load float, ptr [[ARRAYIDX_I3_I]], align 4, !tbaa [[TBAA9]]
+; O23-NEXT:    [[DOTPRE_I:%.*]] = load float, ptr [[ARRAYIDX_I5_I]], align 4, !tbaa [[FLOAT_TBAA9:![0-9]+]]
+; O23-NEXT:    br label %[[FOR_BODY7_I:.*]]
+; O23:       [[FOR_BODY7_I]]:
+; O23-NEXT:    [[TMP5:%.*]] = phi float [ [[DOTPRE_I]], %[[FOR_BODY7_LR_PH_I]] ], [ [[ADD_I:%.*]], %[[FOR_BODY7_I]] ]
+; O23-NEXT:    [[J_07_I:%.*]] = phi i32 [ 0, %[[FOR_BODY7_LR_PH_I]] ], [ [[INC_I:%.*]], %[[FOR_BODY7_I]] ]
+; O23-NEXT:    [[TMP6:%.*]] = load float, ptr [[ARRAYIDX_I3_I]], align 4, !tbaa [[FLOAT_TBAA9]]
 ; O23-NEXT:    [[ADD_I]] = fadd float [[TMP5]], [[TMP6]]
-; O23-NEXT:    store float [[ADD_I]], ptr [[ARRAYIDX_I5_I]], align 4, !tbaa [[TBAA9]]
+; O23-NEXT:    store float [[ADD_I]], ptr [[ARRAYIDX_I5_I]], align 4, !tbaa [[FLOAT_TBAA9]]
 ; O23-NEXT:    [[INC_I]] = add nuw i32 [[J_07_I]], 1
 ; O23-NEXT:    [[EXITCOND_NOT_I:%.*]] = icmp eq i32 [[INC_I]], [[TMP1]]
-; O23-NEXT:    br i1 [[EXITCOND_NOT_I]], label [[_ZN12FLOATVECPAIR6VECINCEV_EXIT]], label [[FOR_BODY7_I]], !llvm.loop [[LOOP11:![0-9]+]]
-; O23:       _ZN12FloatVecPair6vecIncEv.exit:
+; O23-NEXT:    br i1 [[EXITCOND_NOT_I]], label %[[_ZN12FLOATVECPAIR6VECINCEV_EXIT]], label %[[FOR_BODY7_I]], !llvm.loop [[LOOP11:![0-9]+]]
+; O23:       [[_ZN12FLOATVECPAIR6VECINCEV_EXIT]]:
 ; O23-NEXT:    ret void
 ;
 entry:
@@ -163,3 +163,32 @@ entry:
 !12 = !{!13, !1, i64 0}
 !13 = !{!"_ZTS14HomemadeVectorIS_IfLj8EELj8EE", !1, i64 0, !5, i64 8}
 !14 = !{!7, !1, i64 0}
+;.
+; O1: [[ANYPTR_TBAA0]] = !{[[META1:![0-9]+]], [[META2:![0-9]+]], i64 0}
+; O1: [[META1]] = !{!"_ZTS14HomemadeVectorIS_IfLj8EELj8EE", [[META2]], i64 0, [[META5:![0-9]+]], i64 8}
+; O1: [[META2]] = !{!"any pointer", [[META3:![0-9]+]], i64 0}
+; O1: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0}
+; O1: [[META4]] = !{!"Simple C++ TBAA"}
+; O1: [[META5]] = !{!"int", [[META3]], i64 0}
+; O1: [[INT_TBAA6]] = !{[[META7:![0-9]+]], [[META5]], i64 8}
+; O1: [[META7]] = !{!"_ZTS14HomemadeVectorIfLj8EE", [[META2]], i64 0, [[META5]], i64 8}
+; O1: [[ANYPTR_TBAA8]] = !{[[META7]], [[META2]], i64 0}
+; O1: [[FLOAT_TBAA9]] = !{[[META10:![0-9]+]], [[META10]], i64 0}
+; O1: [[META10]] = !{!"float", [[META3]], i64 0}
+; O1: [[LOOP11]] = distinct !{[[LOOP11]], [[META12:![0-9]+]]}
+; O1: [[META12]] = !{!"llvm.loop.mustprogress"}
+;.
+; O23: [[ANYPTR_TBAA0]] = !{[[META1:![0-9]+]], [[META2:![0-9]+]], i64 0}
+; O23: [[META1]] = !{!"_ZTS14HomemadeVectorIS_IfLj8EELj8EE", [[META2]], i64 0, [[META5:![0-9]+]], i64 8}
+; O23: [[META2]] = !{!"any pointer", [[META3:![0-9]+]], i64 0}
+; O23: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0}
+; O23: [[META4]] = !{!"Simple C++ TBAA"}
+; O23: [[META5]] = !{!"int", [[META3]], i64 0}
+; O23: [[INT_TBAA6]] = !{[[META7:![0-9]+]], [[META5]], i64 8}
+; O23: [[META7]] = !{!"_ZTS14HomemadeVectorIfLj8EE", [[META2]], i64 0, [[META5]], i64 8}
+; O23: [[ANYPTR_TBAA8]] = !{[[META7]], [[META2]], i64 0}
+; O23: [[FLOAT_TBAA9]] = !{[[META10:![0-9]+]], [[META10]], i64 0}
+; O23: [[META10]] = !{!"float", [[META3]], i64 0}
+; O23: [[LOOP11]] = distinct !{[[LOOP11]], [[META12:![0-9]+]]}
+; O23: [[META12]] = !{!"llvm.loop.mustprogress"}
+;.
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vdiv-nounroll.ll b/llvm/test/Transforms/PhaseOrdering/X86/vdiv-nounroll.ll
index 69a46b2..ae6f4a7 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/vdiv-nounroll.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/vdiv-nounroll.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt < %s -O3 -S | FileCheck %s
 
 target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
@@ -15,22 +15,23 @@ target triple = "x86_64-apple-macosx11.0.0"
 ;  }
 
 define void @vdiv(ptr %a, float %b) #0 {
-; CHECK-LABEL: @vdiv(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0
+; CHECK-LABEL: define void @vdiv(
+; CHECK-SAME: ptr captures(none) [[A:%.*]], float [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP0:%.*]] = fdiv fast <4 x float> splat (float 1.000000e+00), [[BROADCAST_SPLAT]]
-; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
-; CHECK:       vector.body:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw float, ptr [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4, !tbaa [[TBAA3:![0-9]+]]
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4, !tbaa [[FLOAT_TBAA3:![0-9]+]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = fmul fast <4 x float> [[WIDE_LOAD]], [[TMP0]]
-; CHECK-NEXT:    store <4 x float> [[TMP3]], ptr [[TMP1]], align 4, !tbaa [[TBAA3]]
+; CHECK-NEXT:    store <4 x float> [[TMP3]], ptr [[TMP1]], align 4, !tbaa [[FLOAT_TBAA3]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
-; CHECK-NEXT:    br i1 [[TMP5]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
-; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    br i1 [[TMP5]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK:       [[FOR_COND_CLEANUP]]:
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -97,3 +98,14 @@ attributes #2 = { nounwind }
 !11 = distinct !{!11, !12, !13}
 !12 = !{!"llvm.loop.mustprogress"}
 !13 = !{!"llvm.loop.unroll.disable"}
+;.
+; CHECK: [[FLOAT_TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0}
+; CHECK: [[META4]] = !{!"float", [[META5:![0-9]+]], i64 0}
+; CHECK: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0}
+; CHECK: [[META6]] = !{!"Simple C/C++ TBAA"}
+; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META8:![0-9]+]], [[META9:![0-9]+]], [[META10:![0-9]+]], [[META11:![0-9]+]]}
+; CHECK: [[META8]] = !{!"llvm.loop.mustprogress"}
+; CHECK: [[META9]] = !{!"llvm.loop.unroll.disable"}
+; CHECK: [[META10]] = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK: [[META11]] = !{!"llvm.loop.unroll.runtime.disable"}
+;.
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll b/llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll
index 7817c23..f7bc01e 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt < %s -O3 -S                                        | FileCheck %s
 ; RUN: opt < %s -passes="default<O3>" -S | FileCheck %s
 
@@ -12,41 +12,42 @@ target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16
 target triple = "x86_64-apple-macosx10.15.0"
 
 define void @vdiv(ptr %x, ptr %y, double %a, i32 %N) #0 {
-; CHECK-LABEL: @vdiv(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CMP1:%.*]] = icmp sgt i32 [[N:%.*]], 0
-; CHECK-NEXT:    br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
-; CHECK:       iter.check:
-; CHECK-NEXT:    [[X4:%.*]] = ptrtoint ptr [[X:%.*]] to i64
-; CHECK-NEXT:    [[Y5:%.*]] = ptrtoint ptr [[Y:%.*]] to i64
+; CHECK-LABEL: define void @vdiv(
+; CHECK-SAME: ptr writeonly captures(none) [[X:%.*]], ptr readonly captures(none) [[Y:%.*]], double [[A:%.*]], i32 [[N:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp sgt i32 [[N]], 0
+; CHECK-NEXT:    br i1 [[CMP1]], label %[[FOR_BODY_PREHEADER:.*]], label %[[FOR_END:.*]]
+; CHECK:       [[FOR_BODY_PREHEADER]]:
+; CHECK-NEXT:    [[X4:%.*]] = ptrtoint ptr [[X]] to i64
+; CHECK-NEXT:    [[Y5:%.*]] = ptrtoint ptr [[Y]] to i64
 ; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[N]] to i64
 ; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4
 ; CHECK-NEXT:    [[TMP0:%.*]] = sub i64 [[X4]], [[Y5]]
 ; CHECK-NEXT:    [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 128
 ; CHECK-NEXT:    [[OR_COND:%.*]] = select i1 [[MIN_ITERS_CHECK]], i1 true, i1 [[DIFF_CHECK]]
-; CHECK-NEXT:    br i1 [[OR_COND]], label [[FOR_BODY_PREHEADER9:%.*]], label [[VECTOR_PH:%.*]]
-; CHECK:       vector.main.loop.iter.check:
+; CHECK-NEXT:    br i1 [[OR_COND]], label %[[FOR_BODY_PREHEADER9:.*]], label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
 ; CHECK-NEXT:    [[MIN_ITERS_CHECK6:%.*]] = icmp ult i32 [[N]], 16
-; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK6]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH1:%.*]]
-; CHECK:       vector.ph:
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK6]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH1:.*]]
+; CHECK:       [[VECTOR_PH1]]:
 ; CHECK-NEXT:    [[N_VEC:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 2147483632
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[A:%.*]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[A]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = fdiv fast <4 x double> splat (double 1.000000e+00), [[BROADCAST_SPLAT]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = fdiv fast <4 x double> splat (double 1.000000e+00), [[BROADCAST_SPLAT]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = fdiv fast <4 x double> splat (double 1.000000e+00), [[BROADCAST_SPLAT]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = fdiv fast <4 x double> splat (double 1.000000e+00), [[BROADCAST_SPLAT]]
-; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
-; CHECK:       vector.body:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH1]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH1]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw double, ptr [[Y]], i64 [[INDEX]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP5]], i64 32
 ; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP5]], i64 64
 ; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP5]], i64 96
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x double>, ptr [[TMP5]], align 8, !tbaa [[TBAA3:![0-9]+]]
-; CHECK-NEXT:    [[WIDE_LOAD6:%.*]] = load <4 x double>, ptr [[TMP6]], align 8, !tbaa [[TBAA3]]
-; CHECK-NEXT:    [[WIDE_LOAD7:%.*]] = load <4 x double>, ptr [[TMP7]], align 8, !tbaa [[TBAA3]]
-; CHECK-NEXT:    [[WIDE_LOAD8:%.*]] = load <4 x double>, ptr [[TMP8]], align 8, !tbaa [[TBAA3]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x double>, ptr [[TMP5]], align 8, !tbaa [[DOUBLE_TBAA3:![0-9]+]]
+; CHECK-NEXT:    [[WIDE_LOAD6:%.*]] = load <4 x double>, ptr [[TMP6]], align 8, !tbaa [[DOUBLE_TBAA3]]
+; CHECK-NEXT:    [[WIDE_LOAD7:%.*]] = load <4 x double>, ptr [[TMP7]], align 8, !tbaa [[DOUBLE_TBAA3]]
+; CHECK-NEXT:    [[WIDE_LOAD8:%.*]] = load <4 x double>, ptr [[TMP8]], align 8, !tbaa [[DOUBLE_TBAA3]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = fmul fast <4 x double> [[WIDE_LOAD]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = fmul fast <4 x double> [[WIDE_LOAD6]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = fmul fast <4 x double> [[WIDE_LOAD7]], [[TMP3]]
@@ -55,67 +56,67 @@ define void @vdiv(ptr %x, ptr %y, double %a, i32 %N) #0 {
 ; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP13]], i64 32
 ; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP13]], i64 64
 ; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP13]], i64 96
-; CHECK-NEXT:    store <4 x double> [[TMP9]], ptr [[TMP13]], align 8, !tbaa [[TBAA3]]
-; CHECK-NEXT:    store <4 x double> [[TMP10]], ptr [[TMP14]], align 8, !tbaa [[TBAA3]]
-; CHECK-NEXT:    store <4 x double> [[TMP11]], ptr [[TMP15]], align 8, !tbaa [[TBAA3]]
-; CHECK-NEXT:    store <4 x double> [[TMP12]], ptr [[TMP16]], align 8, !tbaa [[TBAA3]]
+; CHECK-NEXT:    store <4 x double> [[TMP9]], ptr [[TMP13]], align 8, !tbaa [[DOUBLE_TBAA3]]
+; CHECK-NEXT:    store <4 x double> [[TMP10]], ptr [[TMP14]], align 8, !tbaa [[DOUBLE_TBAA3]]
+; CHECK-NEXT:    store <4 x double> [[TMP11]], ptr [[TMP15]], align 8, !tbaa [[DOUBLE_TBAA3]]
+; CHECK-NEXT:    store <4 x double> [[TMP12]], ptr [[TMP16]], align 8, !tbaa [[DOUBLE_TBAA3]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
 ; CHECK-NEXT:    [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
-; CHECK:       middle.block:
+; CHECK-NEXT:    br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK:       [[MIDDLE_BLOCK]]:
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[WIDE_TRIP_COUNT]]
-; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_END]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
-; CHECK:       vec.epilog.iter.check:
+; CHECK-NEXT:    br i1 [[CMP_N]], label %[[FOR_END]], label %[[VEC_EPILOG_ITER_CHECK:.*]]
+; CHECK:       [[VEC_EPILOG_ITER_CHECK]]:
 ; CHECK-NEXT:    [[N_VEC_REMAINING:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 12
 ; CHECK-NEXT:    [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp eq i64 [[N_VEC_REMAINING]], 0
-; CHECK-NEXT:    br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[FOR_BODY_PREHEADER9]], label [[VEC_EPILOG_PH]]
-; CHECK:       vec.epilog.ph:
-; CHECK-NEXT:    [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_PH]] ]
+; CHECK-NEXT:    br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[FOR_BODY_PREHEADER9]], label %[[VEC_EPILOG_PH]]
+; CHECK:       [[VEC_EPILOG_PH]]:
+; CHECK-NEXT:    [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_PH]] ]
 ; CHECK-NEXT:    [[N_VEC11:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 2147483644
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT14:%.*]] = insertelement <4 x double> poison, double [[A]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT15:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT14]], <4 x double> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP38:%.*]] = fdiv fast <4 x double> splat (double 1.000000e+00), [[BROADCAST_SPLAT15]]
-; CHECK-NEXT:    br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
-; CHECK:       vec.epilog.vector.body:
-; CHECK-NEXT:    [[INDEX12:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT16:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
+; CHECK-NEXT:    br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
+; CHECK:       [[VEC_EPILOG_VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX12:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT16:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP39:%.*]] = getelementptr inbounds nuw double, ptr [[Y]], i64 [[INDEX12]]
-; CHECK-NEXT:    [[WIDE_LOAD13:%.*]] = load <4 x double>, ptr [[TMP39]], align 8, !tbaa [[TBAA3]]
+; CHECK-NEXT:    [[WIDE_LOAD13:%.*]] = load <4 x double>, ptr [[TMP39]], align 8, !tbaa [[DOUBLE_TBAA3]]
 ; CHECK-NEXT:    [[TMP40:%.*]] = fmul fast <4 x double> [[WIDE_LOAD13]], [[TMP38]]
 ; CHECK-NEXT:    [[TMP41:%.*]] = getelementptr inbounds nuw double, ptr [[X]], i64 [[INDEX12]]
-; CHECK-NEXT:    store <4 x double> [[TMP40]], ptr [[TMP41]], align 8, !tbaa [[TBAA3]]
+; CHECK-NEXT:    store <4 x double> [[TMP40]], ptr [[TMP41]], align 8, !tbaa [[DOUBLE_TBAA3]]
 ; CHECK-NEXT:    [[INDEX_NEXT16]] = add nuw i64 [[INDEX12]], 4
 ; CHECK-NEXT:    [[TMP42:%.*]] = icmp eq i64 [[INDEX_NEXT16]], [[N_VEC11]]
-; CHECK-NEXT:    br i1 [[TMP42]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
-; CHECK:       vec.epilog.middle.block:
+; CHECK-NEXT:    br i1 [[TMP42]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
+; CHECK:       [[VEC_EPILOG_MIDDLE_BLOCK]]:
 ; CHECK-NEXT:    [[CMP_N17:%.*]] = icmp eq i64 [[N_VEC11]], [[WIDE_TRIP_COUNT]]
-; CHECK-NEXT:    br i1 [[CMP_N17]], label [[FOR_END]], label [[FOR_BODY_PREHEADER9]]
-; CHECK:       for.body.preheader:
-; CHECK-NEXT:    [[INDVARS_IV_PH:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[N_VEC11]], [[VEC_EPILOG_MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    br i1 [[CMP_N17]], label %[[FOR_END]], label %[[FOR_BODY_PREHEADER9]]
+; CHECK:       [[FOR_BODY_PREHEADER9]]:
+; CHECK-NEXT:    [[INDVARS_IV_PH:%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[N_VEC11]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ]
 ; CHECK-NEXT:    [[TMP43:%.*]] = sub nsw i64 [[WIDE_TRIP_COUNT]], [[INDVARS_IV_PH]]
 ; CHECK-NEXT:    [[XTRAITER:%.*]] = and i64 [[TMP43]], 7
 ; CHECK-NEXT:    [[LCMP_MOD_NOT:%.*]] = icmp eq i64 [[XTRAITER]], 0
-; CHECK-NEXT:    br i1 [[LCMP_MOD_NOT]], label [[FOR_BODY_PROL_LOOPEXIT:%.*]], label [[FOR_BODY_PROL_PREHEADER:%.*]]
-; CHECK:       for.body.prol.preheader:
+; CHECK-NEXT:    br i1 [[LCMP_MOD_NOT]], label %[[FOR_BODY_PROL_LOOPEXIT:.*]], label %[[FOR_BODY_PROL_PREHEADER:.*]]
+; CHECK:       [[FOR_BODY_PROL_PREHEADER]]:
 ; CHECK-NEXT:    [[TMP18:%.*]] = fdiv fast double 1.000000e+00, [[A]]
-; CHECK-NEXT:    br label [[FOR_BODY_PROL:%.*]]
-; CHECK:       for.body.prol:
-; CHECK-NEXT:    [[INDVARS_IV_PROL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_PROL:%.*]], [[FOR_BODY_PROL]] ], [ [[INDVARS_IV_PH]], [[FOR_BODY_PROL_PREHEADER]] ]
-; CHECK-NEXT:    [[PROL_ITER:%.*]] = phi i64 [ [[PROL_ITER_NEXT:%.*]], [[FOR_BODY_PROL]] ], [ 0, [[FOR_BODY_PROL_PREHEADER]] ]
+; CHECK-NEXT:    br label %[[FOR_BODY_PROL:.*]]
+; CHECK:       [[FOR_BODY_PROL]]:
+; CHECK-NEXT:    [[INDVARS_IV_PROL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_PROL:%.*]], %[[FOR_BODY_PROL]] ], [ [[INDVARS_IV_PH]], %[[FOR_BODY_PROL_PREHEADER]] ]
+; CHECK-NEXT:    [[PROL_ITER:%.*]] = phi i64 [ [[PROL_ITER_NEXT:%.*]], %[[FOR_BODY_PROL]] ], [ 0, %[[FOR_BODY_PROL_PREHEADER]] ]
 ; CHECK-NEXT:    [[ARRAYIDX_PROL:%.*]] = getelementptr inbounds nuw double, ptr [[Y]], i64 [[INDVARS_IV_PROL]]
-; CHECK-NEXT:    [[T0_PROL:%.*]] = load double, ptr [[ARRAYIDX_PROL]], align 8, !tbaa [[TBAA3]]
+; CHECK-NEXT:    [[T0_PROL:%.*]] = load double, ptr [[ARRAYIDX_PROL]], align 8, !tbaa [[DOUBLE_TBAA3]]
 ; CHECK-NEXT:    [[TMP19:%.*]] = fmul fast double [[T0_PROL]], [[TMP18]]
 ; CHECK-NEXT:    [[ARRAYIDX2_PROL:%.*]] = getelementptr inbounds nuw double, ptr [[X]], i64 [[INDVARS_IV_PROL]]
-; CHECK-NEXT:    store double [[TMP19]], ptr [[ARRAYIDX2_PROL]], align 8, !tbaa [[TBAA3]]
+; CHECK-NEXT:    store double [[TMP19]], ptr [[ARRAYIDX2_PROL]], align 8, !tbaa [[DOUBLE_TBAA3]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_PROL]] = add nuw nsw i64 [[INDVARS_IV_PROL]], 1
 ; CHECK-NEXT:    [[PROL_ITER_NEXT]] = add i64 [[PROL_ITER]], 1
 ; CHECK-NEXT:    [[PROL_ITER_CMP_NOT:%.*]] = icmp eq i64 [[PROL_ITER_NEXT]], [[XTRAITER]]
-; CHECK-NEXT:    br i1 [[PROL_ITER_CMP_NOT]], label [[FOR_BODY_PROL_LOOPEXIT]], label [[FOR_BODY_PROL]], !llvm.loop [[LOOP11:![0-9]+]]
-; CHECK:       for.body.prol.loopexit:
-; CHECK-NEXT:    [[INDVARS_IV_UNR:%.*]] = phi i64 [ [[INDVARS_IV_PH]], [[FOR_BODY_PREHEADER9]] ], [ [[INDVARS_IV_NEXT_PROL]], [[FOR_BODY_PROL]] ]
+; CHECK-NEXT:    br i1 [[PROL_ITER_CMP_NOT]], label %[[FOR_BODY_PROL_LOOPEXIT]], label %[[FOR_BODY_PROL]], !llvm.loop [[LOOP11:![0-9]+]]
+; CHECK:       [[FOR_BODY_PROL_LOOPEXIT]]:
+; CHECK-NEXT:    [[INDVARS_IV_UNR:%.*]] = phi i64 [ [[INDVARS_IV_PH]], %[[FOR_BODY_PREHEADER9]] ], [ [[INDVARS_IV_NEXT_PROL]], %[[FOR_BODY_PROL]] ]
 ; CHECK-NEXT:    [[TMP20:%.*]] = sub nsw i64 [[INDVARS_IV_PH]], [[WIDE_TRIP_COUNT]]
 ; CHECK-NEXT:    [[TMP21:%.*]] = icmp ugt i64 [[TMP20]], -8
-; CHECK-NEXT:    br i1 [[TMP21]], label [[FOR_END]], label [[FOR_BODY_PREHEADER9_NEW:%.*]]
-; CHECK:       for.body.preheader.new:
+; CHECK-NEXT:    br i1 [[TMP21]], label %[[FOR_END]], label %[[FOR_BODY_PREHEADER9_NEW:.*]]
+; CHECK:       [[FOR_BODY_PREHEADER9_NEW]]:
 ; CHECK-NEXT:    [[TMP22:%.*]] = fdiv fast double 1.000000e+00, [[A]]
 ; CHECK-NEXT:    [[TMP23:%.*]] = fdiv fast double 1.000000e+00, [[A]]
 ; CHECK-NEXT:    [[TMP24:%.*]] = fdiv fast double 1.000000e+00, [[A]]
@@ -124,60 +125,60 @@ define void @vdiv(ptr %x, ptr %y, double %a, i32 %N) #0 {
 ; CHECK-NEXT:    [[TMP27:%.*]] = fdiv fast double 1.000000e+00, [[A]]
 ; CHECK-NEXT:    [[TMP28:%.*]] = fdiv fast double 1.000000e+00, [[A]]
 ; CHECK-NEXT:    [[TMP29:%.*]] = fdiv fast double 1.000000e+00, [[A]]
-; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
-; CHECK:       for.body:
-; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_UNR]], [[FOR_BODY_PREHEADER9_NEW]] ], [ [[INDVARS_IV_NEXT_7:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
+; CHECK:       [[FOR_BODY]]:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_UNR]], %[[FOR_BODY_PREHEADER9_NEW]] ], [ [[INDVARS_IV_NEXT_7:%.*]], %[[FOR_BODY]] ]
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw double, ptr [[Y]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[T0:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA3]]
+; CHECK-NEXT:    [[T0:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !tbaa [[DOUBLE_TBAA3]]
 ; CHECK-NEXT:    [[TMP30:%.*]] = fmul fast double [[T0]], [[TMP22]]
 ; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw double, ptr [[X]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    store double [[TMP30]], ptr [[ARRAYIDX2]], align 8, !tbaa [[TBAA3]]
+; CHECK-NEXT:    store double [[TMP30]], ptr [[ARRAYIDX2]], align 8, !tbaa [[DOUBLE_TBAA3]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds nuw double, ptr [[Y]], i64 [[INDVARS_IV_NEXT]]
-; CHECK-NEXT:    [[T0_1:%.*]] = load double, ptr [[ARRAYIDX_1]], align 8, !tbaa [[TBAA3]]
+; CHECK-NEXT:    [[T0_1:%.*]] = load double, ptr [[ARRAYIDX_1]], align 8, !tbaa [[DOUBLE_TBAA3]]
 ; CHECK-NEXT:    [[TMP31:%.*]] = fmul fast double [[T0_1]], [[TMP23]]
 ; CHECK-NEXT:    [[ARRAYIDX2_1:%.*]] = getelementptr inbounds nuw double, ptr [[X]], i64 [[INDVARS_IV_NEXT]]
-; CHECK-NEXT:    store double [[TMP31]], ptr [[ARRAYIDX2_1]], align 8, !tbaa [[TBAA3]]
+; CHECK-NEXT:    store double [[TMP31]], ptr [[ARRAYIDX2_1]], align 8, !tbaa [[DOUBLE_TBAA3]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_1:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 2
 ; CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds nuw double, ptr [[Y]], i64 [[INDVARS_IV_NEXT_1]]
-; CHECK-NEXT:    [[T0_2:%.*]] = load double, ptr [[ARRAYIDX_2]], align 8, !tbaa [[TBAA3]]
+; CHECK-NEXT:    [[T0_2:%.*]] = load double, ptr [[ARRAYIDX_2]], align 8, !tbaa [[DOUBLE_TBAA3]]
 ; CHECK-NEXT:    [[TMP32:%.*]] = fmul fast double [[T0_2]], [[TMP24]]
 ; CHECK-NEXT:    [[ARRAYIDX2_2:%.*]] = getelementptr inbounds nuw double, ptr [[X]], i64 [[INDVARS_IV_NEXT_1]]
-; CHECK-NEXT:    store double [[TMP32]], ptr [[ARRAYIDX2_2]], align 8, !tbaa [[TBAA3]]
+; CHECK-NEXT:    store double [[TMP32]], ptr [[ARRAYIDX2_2]], align 8, !tbaa [[DOUBLE_TBAA3]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_2:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 3
 ; CHECK-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds nuw double, ptr [[Y]], i64 [[INDVARS_IV_NEXT_2]]
-; CHECK-NEXT:    [[T0_3:%.*]] = load double, ptr [[ARRAYIDX_3]], align 8, !tbaa [[TBAA3]]
+; CHECK-NEXT:    [[T0_3:%.*]] = load double, ptr [[ARRAYIDX_3]], align 8, !tbaa [[DOUBLE_TBAA3]]
 ; CHECK-NEXT:    [[TMP33:%.*]] = fmul fast double [[T0_3]], [[TMP25]]
 ; CHECK-NEXT:    [[ARRAYIDX2_3:%.*]] = getelementptr inbounds nuw double, ptr [[X]], i64 [[INDVARS_IV_NEXT_2]]
-; CHECK-NEXT:    store double [[TMP33]], ptr [[ARRAYIDX2_3]], align 8, !tbaa [[TBAA3]]
+; CHECK-NEXT:    store double [[TMP33]], ptr [[ARRAYIDX2_3]], align 8, !tbaa [[DOUBLE_TBAA3]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_3:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 4
 ; CHECK-NEXT:    [[ARRAYIDX_4:%.*]] = getelementptr inbounds nuw double, ptr [[Y]], i64 [[INDVARS_IV_NEXT_3]]
-; CHECK-NEXT:    [[T0_4:%.*]] = load double, ptr [[ARRAYIDX_4]], align 8, !tbaa [[TBAA3]]
+; CHECK-NEXT:    [[T0_4:%.*]] = load double, ptr [[ARRAYIDX_4]], align 8, !tbaa [[DOUBLE_TBAA3]]
 ; CHECK-NEXT:    [[TMP34:%.*]] = fmul fast double [[T0_4]], [[TMP26]]
 ; CHECK-NEXT:    [[ARRAYIDX2_4:%.*]] = getelementptr inbounds nuw double, ptr [[X]], i64 [[INDVARS_IV_NEXT_3]]
-; CHECK-NEXT:    store double [[TMP34]], ptr [[ARRAYIDX2_4]], align 8, !tbaa [[TBAA3]]
+; CHECK-NEXT:    store double [[TMP34]], ptr [[ARRAYIDX2_4]], align 8, !tbaa [[DOUBLE_TBAA3]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_4:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 5
 ; CHECK-NEXT:    [[ARRAYIDX_5:%.*]] = getelementptr inbounds nuw double, ptr [[Y]], i64 [[INDVARS_IV_NEXT_4]]
-; CHECK-NEXT:    [[T0_5:%.*]] = load double, ptr [[ARRAYIDX_5]], align 8, !tbaa [[TBAA3]]
+; CHECK-NEXT:    [[T0_5:%.*]] = load double, ptr [[ARRAYIDX_5]], align 8, !tbaa [[DOUBLE_TBAA3]]
 ; CHECK-NEXT:    [[TMP35:%.*]] = fmul fast double [[T0_5]], [[TMP27]]
 ; CHECK-NEXT:    [[ARRAYIDX2_5:%.*]] = getelementptr inbounds nuw double, ptr [[X]], i64 [[INDVARS_IV_NEXT_4]]
-; CHECK-NEXT:    store double [[TMP35]], ptr [[ARRAYIDX2_5]], align 8, !tbaa [[TBAA3]]
+; CHECK-NEXT:    store double [[TMP35]], ptr [[ARRAYIDX2_5]], align 8, !tbaa [[DOUBLE_TBAA3]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_5:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 6
 ; CHECK-NEXT:    [[ARRAYIDX_6:%.*]] = getelementptr inbounds nuw double, ptr [[Y]], i64 [[INDVARS_IV_NEXT_5]]
-; CHECK-NEXT:    [[T0_6:%.*]] = load double, ptr [[ARRAYIDX_6]], align 8, !tbaa [[TBAA3]]
+; CHECK-NEXT:    [[T0_6:%.*]] = load double, ptr [[ARRAYIDX_6]], align 8, !tbaa [[DOUBLE_TBAA3]]
 ; CHECK-NEXT:    [[TMP36:%.*]] = fmul fast double [[T0_6]], [[TMP28]]
 ; CHECK-NEXT:    [[ARRAYIDX2_6:%.*]] = getelementptr inbounds nuw double, ptr [[X]], i64 [[INDVARS_IV_NEXT_5]]
-; CHECK-NEXT:    store double [[TMP36]], ptr [[ARRAYIDX2_6]], align 8, !tbaa [[TBAA3]]
+; CHECK-NEXT:    store double [[TMP36]], ptr [[ARRAYIDX2_6]], align 8, !tbaa [[DOUBLE_TBAA3]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_6:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 7
 ; CHECK-NEXT:    [[ARRAYIDX_7:%.*]] = getelementptr inbounds nuw double, ptr [[Y]], i64 [[INDVARS_IV_NEXT_6]]
-; CHECK-NEXT:    [[T0_7:%.*]] = load double, ptr [[ARRAYIDX_7]], align 8, !tbaa [[TBAA3]]
+; CHECK-NEXT:    [[T0_7:%.*]] = load double, ptr [[ARRAYIDX_7]], align 8, !tbaa [[DOUBLE_TBAA3]]
 ; CHECK-NEXT:    [[TMP37:%.*]] = fmul fast double [[T0_7]], [[TMP29]]
 ; CHECK-NEXT:    [[ARRAYIDX2_7:%.*]] = getelementptr inbounds nuw double, ptr [[X]], i64 [[INDVARS_IV_NEXT_6]]
-; CHECK-NEXT:    store double [[TMP37]], ptr [[ARRAYIDX2_7]], align 8, !tbaa [[TBAA3]]
+; CHECK-NEXT:    store double [[TMP37]], ptr [[ARRAYIDX2_7]], align 8, !tbaa [[DOUBLE_TBAA3]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_7]] = add nuw nsw i64 [[INDVARS_IV]], 8
 ; CHECK-NEXT:    [[EXITCOND_NOT_7:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_7]], [[WIDE_TRIP_COUNT]]
-; CHECK-NEXT:    br i1 [[EXITCOND_NOT_7]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
-; CHECK:       for.end:
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT_7]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
+; CHECK:       [[FOR_END]]:
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -223,3 +224,16 @@ attributes #0 = { nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="
 !5 = !{!"omnipotent char", !6, i64 0}
 !6 = !{!"Simple C/C++ TBAA"}
 
+;.
+; CHECK: [[DOUBLE_TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0}
+; CHECK: [[META4]] = !{!"double", [[META5:![0-9]+]], i64 0}
+; CHECK: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0}
+; CHECK: [[META6]] = !{!"Simple C/C++ TBAA"}
+; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META8:![0-9]+]], [[META9:![0-9]+]]}
+; CHECK: [[META8]] = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK: [[META9]] = !{!"llvm.loop.unroll.runtime.disable"}
+; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META8]], [[META9]]}
+; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META12:![0-9]+]]}
+; CHECK: [[META12]] = !{!"llvm.loop.unroll.disable"}
+; CHECK: [[LOOP13]] = distinct !{[[LOOP13]], [[META8]]}
+;.
diff --git a/llvm/test/Transforms/PhaseOrdering/loop-access-checks.ll b/llvm/test/Transforms/PhaseOrdering/loop-access-checks.ll
index ae0e591..5253c42 100644
--- a/llvm/test/Transforms/PhaseOrdering/loop-access-checks.ll
+++ b/llvm/test/Transforms/PhaseOrdering/loop-access-checks.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt -passes='default<O2>' -S %s | FileCheck %s
 
 ; Slightly reduced test case for a loop iterating over a std::span with libc++ hardening.
@@ -18,30 +18,30 @@
 %"struct.std::__1::__bounded_iter" = type { ptr, ptr, ptr }
 
 define void @test_fill_with_foreach([2 x i64] %elems.coerce) {
-; CHECK-LABEL: define void @test_fill_with_foreach
-; CHECK-SAME: ([2 x i64] [[ELEMS_COERCE:%.*]]) local_unnamed_addr {
-; CHECK-NEXT:  entry:
+; CHECK-LABEL: define void @test_fill_with_foreach(
+; CHECK-SAME: [2 x i64] [[ELEMS_COERCE:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[ELEMS_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i64] [[ELEMS_COERCE]], 0
 ; CHECK-NEXT:    [[TMP0:%.*]] = inttoptr i64 [[ELEMS_COERCE_FCA_0_EXTRACT]] to ptr
 ; CHECK-NEXT:    [[ELEMS_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i64] [[ELEMS_COERCE]], 1
 ; CHECK-NEXT:    [[ADD_PTR_I_IDX:%.*]] = shl nsw i64 [[ELEMS_COERCE_FCA_1_EXTRACT]], 2
 ; CHECK-NEXT:    [[ADD_PTR_I:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 [[ADD_PTR_I_IDX]]
 ; CHECK-NEXT:    [[CMP_NOT_I_I_I_I:%.*]] = icmp slt i64 [[ELEMS_COERCE_FCA_1_EXTRACT]], 0
-; CHECK-NEXT:    br i1 [[CMP_NOT_I_I_I_I]], label [[ERROR:%.*]], label [[FOR_COND_PREHEADER_SPLIT:%.*]]
-; CHECK:       for.cond.preheader.split:
+; CHECK-NEXT:    br i1 [[CMP_NOT_I_I_I_I]], label %[[ERROR:.*]], label %[[FOR_COND_PREHEADER_SPLIT:.*]]
+; CHECK:       [[FOR_COND_PREHEADER_SPLIT]]:
 ; CHECK-NEXT:    [[CMP_I_NOT2:%.*]] = icmp eq i64 [[ELEMS_COERCE_FCA_1_EXTRACT]], 0
-; CHECK-NEXT:    br i1 [[CMP_I_NOT2]], label [[COMMON_RET:%.*]], label [[FOR_BODY:%.*]]
-; CHECK:       common.ret:
+; CHECK-NEXT:    br i1 [[CMP_I_NOT2]], label %[[COMMON_RET:.*]], label %[[FOR_BODY:.*]]
+; CHECK:       [[COMMON_RET]]:
 ; CHECK-NEXT:    ret void
-; CHECK:       error:
+; CHECK:       [[ERROR]]:
 ; CHECK-NEXT:    tail call void @error()
-; CHECK-NEXT:    br label [[COMMON_RET]]
-; CHECK:       for.body:
-; CHECK-NEXT:    [[__BEGIN1_SROA_0_03:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], [[FOR_BODY]] ], [ [[TMP0]], [[FOR_COND_PREHEADER_SPLIT]] ]
+; CHECK-NEXT:    br label %[[COMMON_RET]]
+; CHECK:       [[FOR_BODY]]:
+; CHECK-NEXT:    [[__BEGIN1_SROA_0_03:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], %[[FOR_BODY]] ], [ [[TMP0]], %[[FOR_COND_PREHEADER_SPLIT]] ]
 ; CHECK-NEXT:    tail call void @use(ptr noundef nonnull align 4 dereferenceable(4) [[__BEGIN1_SROA_0_03]])
 ; CHECK-NEXT:    [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__BEGIN1_SROA_0_03]], i64 4
 ; CHECK-NEXT:    [[CMP_I_NOT:%.*]] = icmp eq ptr [[INCDEC_PTR_I]], [[ADD_PTR_I]]
-; CHECK-NEXT:    br i1 [[CMP_I_NOT]], label [[COMMON_RET]], label [[FOR_BODY]]
+; CHECK-NEXT:    br i1 [[CMP_I_NOT]], label %[[COMMON_RET]], label %[[FOR_BODY]]
 ;
 entry:
   %elems = alloca %"class.std::__1::span", align 8
@@ -131,29 +131,29 @@ declare void @llvm.lifetime.end.p0(ptr nocapture)
 %Vector_impl_data = type { ptr, ptr, ptr }
 
 define void @foo(ptr noundef nonnull align 8 dereferenceable(24) noalias %vec) #0 {
-; CHECK-LABEL: define void @foo
-; CHECK-SAME: (ptr noalias noundef nonnull readonly align 8 captures(none) dereferenceable(24) [[VEC:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT:  entry:
+; CHECK-LABEL: define void @foo(
+; CHECK-SAME: ptr noalias noundef nonnull readonly align 8 captures(none) dereferenceable(24) [[VEC:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*]]:
 ; CHECK-NEXT:    [[_M_FINISH_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[VEC]], i64 8
-; CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[_M_FINISH_I_I]], align 8, !tbaa [[TBAA0:![0-9]+]]
-; CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[VEC]], align 8, !tbaa [[TBAA5:![0-9]+]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[_M_FINISH_I_I]], align 8, !tbaa [[ANYPTR_TBAA0:![0-9]+]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[VEC]], align 8, !tbaa [[ANYPTR_TBAA5:![0-9]+]]
 ; CHECK-NEXT:    [[SUB_PTR_LHS_CAST_I_I:%.*]] = ptrtoint ptr [[TMP0]] to i64
 ; CHECK-NEXT:    [[SUB_PTR_RHS_CAST_I_I:%.*]] = ptrtoint ptr [[TMP1]] to i64
 ; CHECK-NEXT:    [[SUB_PTR_SUB_I_I:%.*]] = sub i64 [[SUB_PTR_LHS_CAST_I_I]], [[SUB_PTR_RHS_CAST_I_I]]
 ; CHECK-NEXT:    [[SUB_PTR_DIV_I_I:%.*]] = ashr exact i64 [[SUB_PTR_SUB_I_I]], 3
 ; CHECK-NEXT:    [[CMP_NOT9:%.*]] = icmp eq ptr [[TMP0]], [[TMP1]]
-; CHECK-NEXT:    br i1 [[CMP_NOT9]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY:%.*]]
-; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    br i1 [[CMP_NOT9]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_BODY:.*]]
+; CHECK:       [[FOR_COND_CLEANUP]]:
 ; CHECK-NEXT:    ret void
-; CHECK:       for.body:
-; CHECK-NEXT:    [[I_010:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK:       [[FOR_BODY]]:
+; CHECK-NEXT:    [[I_010:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ 0, %[[ENTRY]] ]
 ; CHECK-NEXT:    [[ADD_PTR_I:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i64 [[I_010]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = load double, ptr [[ADD_PTR_I]], align 8
 ; CHECK-NEXT:    [[ADD:%.*]] = fadd double [[TMP2]], 1.000000e+00
 ; CHECK-NEXT:    store double [[ADD]], ptr [[ADD_PTR_I]], align 8
 ; CHECK-NEXT:    [[INC]] = add nuw i64 [[I_010]], 1
 ; CHECK-NEXT:    [[CMP_NOT:%.*]] = icmp eq i64 [[INC]], [[SUB_PTR_DIV_I_I]]
-; CHECK-NEXT:    br i1 [[CMP_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]]
+; CHECK-NEXT:    br i1 [[CMP_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]]
 ;
 entry:
   %vec.addr = alloca ptr, align 8
@@ -270,29 +270,29 @@ declare void @abort()
 ; https://github.com/llvm/llvm-project/issues/63126
 
 define void @loop_with_signed_induction(ptr noundef nonnull align 8 dereferenceable(24) %vec) {
-; CHECK-LABEL: define void @loop_with_signed_induction
-; CHECK-SAME: (ptr noundef nonnull readonly align 8 captures(none) dereferenceable(24) [[VEC:%.*]]) local_unnamed_addr #[[ATTR0]] {
-; CHECK-NEXT:  entry:
+; CHECK-LABEL: define void @loop_with_signed_induction(
+; CHECK-SAME: ptr noundef nonnull readonly align 8 captures(none) dereferenceable(24) [[VEC:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*]]:
 ; CHECK-NEXT:    [[_M_FINISH_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[VEC]], i64 8
-; CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[_M_FINISH_I_I]], align 8, !tbaa [[TBAA0]]
-; CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[VEC]], align 8, !tbaa [[TBAA5]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[_M_FINISH_I_I]], align 8, !tbaa [[ANYPTR_TBAA0]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[VEC]], align 8, !tbaa [[ANYPTR_TBAA5]]
 ; CHECK-NEXT:    [[SUB_PTR_LHS_CAST_I_I:%.*]] = ptrtoint ptr [[TMP0]] to i64
 ; CHECK-NEXT:    [[SUB_PTR_RHS_CAST_I_I:%.*]] = ptrtoint ptr [[TMP1]] to i64
 ; CHECK-NEXT:    [[SUB_PTR_SUB_I_I:%.*]] = sub i64 [[SUB_PTR_LHS_CAST_I_I]], [[SUB_PTR_RHS_CAST_I_I]]
 ; CHECK-NEXT:    [[SUB_PTR_DIV_I_I:%.*]] = ashr exact i64 [[SUB_PTR_SUB_I_I]], 3
 ; CHECK-NEXT:    [[CMP9:%.*]] = icmp sgt i64 [[SUB_PTR_DIV_I_I]], 0
-; CHECK-NEXT:    br i1 [[CMP9]], label [[FOR_BODY:%.*]], label [[FOR_COND_CLEANUP:%.*]]
-; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    br i1 [[CMP9]], label %[[FOR_BODY:.*]], label %[[FOR_COND_CLEANUP:.*]]
+; CHECK:       [[FOR_COND_CLEANUP]]:
 ; CHECK-NEXT:    ret void
-; CHECK:       for.body:
-; CHECK-NEXT:    [[I_010:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK:       [[FOR_BODY]]:
+; CHECK-NEXT:    [[I_010:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ 0, %[[ENTRY]] ]
 ; CHECK-NEXT:    [[ADD_PTR_I:%.*]] = getelementptr inbounds nuw double, ptr [[TMP1]], i64 [[I_010]]
-; CHECK-NEXT:    [[TMP2:%.*]] = load double, ptr [[ADD_PTR_I]], align 8, !tbaa [[TBAA6:![0-9]+]]
+; CHECK-NEXT:    [[TMP2:%.*]] = load double, ptr [[ADD_PTR_I]], align 8, !tbaa [[DOUBLE_TBAA6:![0-9]+]]
 ; CHECK-NEXT:    [[ADD:%.*]] = fadd double [[TMP2]], 1.000000e+00
-; CHECK-NEXT:    store double [[ADD]], ptr [[ADD_PTR_I]], align 8, !tbaa [[TBAA6]]
+; CHECK-NEXT:    store double [[ADD]], ptr [[ADD_PTR_I]], align 8, !tbaa [[DOUBLE_TBAA6]]
 ; CHECK-NEXT:    [[INC]] = add nuw nsw i64 [[I_010]], 1
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i64 [[INC]], [[SUB_PTR_DIV_I_I]]
-; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP]]
 ;
 entry:
   %vec.addr = alloca ptr, align 8
@@ -343,22 +343,22 @@ for.end:
 
 
 define void @monkey(ptr noundef %arr, i32 noundef %len) {
-; CHECK-LABEL: define void @monkey
-; CHECK-SAME: (ptr noundef captures(none) [[ARR:%.*]], i32 noundef [[LEN:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] {
-; CHECK-NEXT:  entry:
+; CHECK-LABEL: define void @monkey(
+; CHECK-SAME: ptr noundef captures(none) [[ARR:%.*]], i32 noundef [[LEN:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*]]:
 ; CHECK-NEXT:    [[CMP8:%.*]] = icmp ugt i32 [[LEN]], 1
-; CHECK-NEXT:    br i1 [[CMP8]], label [[FOR_BODY4_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
-; CHECK:       for.body4.preheader:
-; CHECK-NEXT:    [[I_09:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_COND_CLEANUP3:%.*]] ], [ 1, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    br label [[FOR_BODY4:%.*]]
-; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    br i1 [[CMP8]], label %[[FOR_BODY4_PREHEADER:.*]], label %[[FOR_COND_CLEANUP:.*]]
+; CHECK:       [[FOR_BODY4_PREHEADER]]:
+; CHECK-NEXT:    [[I_09:%.*]] = phi i32 [ [[INC:%.*]], %[[FOR_COND_CLEANUP3:.*]] ], [ 1, %[[ENTRY]] ]
+; CHECK-NEXT:    br label %[[FOR_BODY4:.*]]
+; CHECK:       [[FOR_COND_CLEANUP]]:
 ; CHECK-NEXT:    ret void
-; CHECK:       for.cond.cleanup3:
+; CHECK:       [[FOR_COND_CLEANUP3]]:
 ; CHECK-NEXT:    [[INC]] = add nuw i32 [[I_09]], 1
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[INC]], [[LEN]]
-; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY4_PREHEADER]], label [[FOR_COND_CLEANUP]]
-; CHECK:       for.body4:
-; CHECK-NEXT:    [[K_07:%.*]] = phi i32 [ [[DEC:%.*]], [[FOR_BODY4]] ], [ [[I_09]], [[FOR_BODY4_PREHEADER]] ]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[FOR_BODY4_PREHEADER]], label %[[FOR_COND_CLEANUP]]
+; CHECK:       [[FOR_BODY4]]:
+; CHECK-NEXT:    [[K_07:%.*]] = phi i32 [ [[DEC:%.*]], %[[FOR_BODY4]] ], [ [[I_09]], %[[FOR_BODY4_PREHEADER]] ]
 ; CHECK-NEXT:    [[IDX_EXT_I:%.*]] = zext i32 [[K_07]] to i64
 ; CHECK-NEXT:    [[ADD_PTR_I:%.*]] = getelementptr inbounds nuw i32, ptr [[ARR]], i64 [[IDX_EXT_I]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ADD_PTR_I]], align 4
@@ -366,7 +366,7 @@ define void @monkey(ptr noundef %arr, i32 noundef %len) {
 ; CHECK-NEXT:    store i32 [[ADD]], ptr [[ADD_PTR_I]], align 4
 ; CHECK-NEXT:    [[DEC]] = add i32 [[K_07]], -1
 ; CHECK-NEXT:    [[CMP2_NOT:%.*]] = icmp eq i32 [[DEC]], 0
-; CHECK-NEXT:    br i1 [[CMP2_NOT]], label [[FOR_COND_CLEANUP3]], label [[FOR_BODY4]]
+; CHECK-NEXT:    br i1 [[CMP2_NOT]], label %[[FOR_COND_CLEANUP3]], label %[[FOR_BODY4]]
 ;
 entry:
   %arr.addr = alloca ptr, align 8
@@ -472,3 +472,13 @@ if.end:                                           ; preds = %entry
 !7 = !{!1, !2, i64 8}
 !8 = !{!9, !9, i64 0}
 !9 = !{!"double", !3, i64 0}
+;.
+; CHECK: [[ANYPTR_TBAA0]] = !{[[META1:![0-9]+]], [[META2:![0-9]+]], i64 8}
+; CHECK: [[META1]] = !{!"_ZTSNSt12_Vector_baseIdSaIdEE17_Vector_impl_dataE", [[META2]], i64 0, [[META2]], i64 8, [[META2]], i64 16}
+; CHECK: [[META2]] = !{!"any pointer", [[META3:![0-9]+]], i64 0}
+; CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0}
+; CHECK: [[META4]] = !{!"Simple C++ TBAA"}
+; CHECK: [[ANYPTR_TBAA5]] = !{[[META1]], [[META2]], i64 0}
+; CHECK: [[DOUBLE_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0}
+; CHECK: [[META7]] = !{!"double", [[META3]], i64 0}
+;.
diff --git a/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-pattern.ll b/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-pattern.ll
index aaca5a6..1a1fe20 100644
--- a/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-pattern.ll
+++ b/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-pattern.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 6
 ; RUN: opt -mtriple=x86_64-apple-darwin10.0.0 -passes=pre-isel-intrinsic-lowering -S -o - %s | FileCheck %s
 
 ;.
@@ -131,7 +131,7 @@ define void @memset_pattern_i64_x(ptr %a, i64 %x) nounwind {
 define void @memset_pattern_i64_128_tbaa(ptr %a) nounwind {
 ; CHECK-LABEL: define void @memset_pattern_i64_128_tbaa(
 ; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.2, i64 1024), !tbaa [[TBAA0:![0-9]+]]
+; CHECK-NEXT:    call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.2, i64 1024), !tbaa [[DOUBLE_TBAA0:![0-9]+]]
 ; CHECK-NEXT:    ret void
 ;
   tail call void @llvm.experimental.memset.pattern(ptr %a, i64 u0x400921fb54442d18, i64 128, i1 false), !tbaa !5
@@ -216,7 +216,7 @@ define void @memset_pattern_i64_x_fromnonconstptr(ptr %a, i64 %x, ptr %p) nounwi
 ; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) }
 ; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
 ;.
-; CHECK: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
+; CHECK: [[DOUBLE_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
 ; CHECK: [[META1]] = !{!"double", [[META2:![0-9]+]], i64 0}
 ; CHECK: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0}
 ; CHECK: [[META3]] = !{!"Simple C++ TBAA"}
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/32-bit.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/32-bit.ll
index 5d91e03..bfa18f8 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/32-bit.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/32-bit.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt -passes=slp-vectorizer -S < %s | FileCheck %s
 
 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
@@ -7,13 +7,13 @@ target triple = "aarch64-unknown-linux-gnu"
 %S = type { i8, i8, i8, i8 }
 
 define ptr @foo(ptr %this, ptr %rhs) {
-; CHECK-LABEL: define ptr @foo
-; CHECK-SAME: (ptr [[THIS:%.*]], ptr [[RHS:%.*]]) {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i8>, ptr [[RHS]], align 1, !tbaa [[TBAA0:![0-9]+]]
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i8>, ptr [[THIS]], align 1, !tbaa [[TBAA0]]
+; CHECK-LABEL: define ptr @foo(
+; CHECK-SAME: ptr [[THIS:%.*]], ptr [[RHS:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i8>, ptr [[RHS]], align 1, !tbaa [[BOOL_TBAA0:![0-9]+]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i8>, ptr [[THIS]], align 1, !tbaa [[BOOL_TBAA0]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = or <4 x i8> [[TMP0]], [[TMP1]]
-; CHECK-NEXT:    store <4 x i8> [[TMP2]], ptr [[THIS]], align 1, !tbaa [[TBAA0]]
+; CHECK-NEXT:    store <4 x i8> [[TMP2]], ptr [[THIS]], align 1, !tbaa [[BOOL_TBAA0]]
 ; CHECK-NEXT:    ret ptr [[THIS]]
 ;
 entry:
@@ -54,3 +54,9 @@ entry:
 !14 = !{!7, !8, i64 2}
 !15 = !{!7, !8, i64 3}
 
+;.
+; CHECK: [[BOOL_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
+; CHECK: [[META1]] = !{!"bool", [[META2:![0-9]+]], i64 0}
+; CHECK: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0}
+; CHECK: [[META3]] = !{!"Simple C++ TBAA"}
+;.
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/spillcost-di.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/spillcost-di.ll
index 9cb2badc2..76b1d18 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/spillcost-di.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/spillcost-di.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; Debug informations shouldn't affect spill cost.
 ; RUN: opt -S -passes=slp-vectorizer %s -o - | FileCheck %s
 
@@ -7,17 +7,18 @@ target triple = "aarch64"
 %struct.S = type { i64, i64 }
 
 define void @patatino(i64 %n, i64 %i, ptr %p) !dbg !7 {
-; CHECK-LABEL: @patatino(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:      #dbg_value(i64 [[N:%.*]], [[META18:![0-9]+]], !DIExpression(), [[META23:![0-9]+]])
-; CHECK-NEXT:      #dbg_value(i64 [[I:%.*]], [[META19:![0-9]+]], !DIExpression(), [[META24:![0-9]+]])
-; CHECK-NEXT:      #dbg_value(ptr [[P:%.*]], [[META20:![0-9]+]], !DIExpression(), [[META25:![0-9]+]])
+; CHECK-LABEL: define void @patatino(
+; CHECK-SAME: i64 [[N:%.*]], i64 [[I:%.*]], ptr [[P:%.*]]) !dbg [[DBG7:![0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:      #dbg_value(i64 [[N]], [[META18:![0-9]+]], !DIExpression(), [[META23:![0-9]+]])
+; CHECK-NEXT:      #dbg_value(i64 [[I]], [[META19:![0-9]+]], !DIExpression(), [[META24:![0-9]+]])
+; CHECK-NEXT:      #dbg_value(ptr [[P]], [[META20:![0-9]+]], !DIExpression(), [[META25:![0-9]+]])
 ; CHECK-NEXT:    [[X1:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], ptr [[P]], i64 [[N]], i32 0, !dbg [[DBG26:![0-9]+]]
 ; CHECK-NEXT:      #dbg_value(i64 poison, [[META21:![0-9]+]], !DIExpression(), [[META27:![0-9]+]])
 ; CHECK-NEXT:      #dbg_value(i64 poison, [[META22:![0-9]+]], !DIExpression(), [[META28:![0-9]+]])
 ; CHECK-NEXT:    [[X5:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[P]], i64 [[I]], i32 0, !dbg [[DBG29:![0-9]+]]
-; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i64>, ptr [[X1]], align 8, !dbg [[DBG26]], !tbaa [[TBAA30:![0-9]+]]
-; CHECK-NEXT:    store <2 x i64> [[TMP0]], ptr [[X5]], align 8, !dbg [[DBG34:![0-9]+]], !tbaa [[TBAA30]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i64>, ptr [[X1]], align 8, !dbg [[DBG26]], !tbaa [[LONG_TBAA30:![0-9]+]]
+; CHECK-NEXT:    store <2 x i64> [[TMP0]], ptr [[X5]], align 8, !dbg [[DBG34:![0-9]+]], !tbaa [[LONG_TBAA30]]
 ; CHECK-NEXT:    ret void, !dbg [[DBG35:![0-9]+]]
 ;
 entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/store-ptr.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/store-ptr.ll
index e32e5f8..2b6a414 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/store-ptr.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/store-ptr.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt < %s -passes=slp-vectorizer -S | FileCheck %s
 
 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
@@ -7,28 +7,29 @@ target triple = "aarch64"
 %struct.node = type { i64, i64, ptr, ptr }
 
 define void @copy(ptr nocapture noundef writeonly %x, ptr nocapture noundef readonly %y, i32 noundef %n) {
-; CHECK-LABEL: @copy(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CMP34:%.*]] = icmp sgt i32 [[N:%.*]], 0
-; CHECK-NEXT:    br i1 [[CMP34]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
-; CHECK:       for.body.preheader:
+; CHECK-LABEL: define void @copy(
+; CHECK-SAME: ptr noundef writeonly captures(none) [[X:%.*]], ptr noundef readonly captures(none) [[Y:%.*]], i32 noundef [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[CMP34:%.*]] = icmp sgt i32 [[N]], 0
+; CHECK-NEXT:    br i1 [[CMP34]], label %[[FOR_BODY_PREHEADER:.*]], label %[[FOR_COND_CLEANUP:.*]]
+; CHECK:       [[FOR_BODY_PREHEADER]]:
 ; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
-; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
-; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
+; CHECK:       [[FOR_COND_CLEANUP]]:
 ; CHECK-NEXT:    ret void
-; CHECK:       for.body:
-; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_NODE:%.*]], ptr [[Y:%.*]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds [[STRUCT_NODE]], ptr [[X:%.*]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA0:![0-9]+]]
-; CHECK-NEXT:    store <2 x i64> [[TMP0]], ptr [[ARRAYIDX2]], align 8, !tbaa [[TBAA0]]
+; CHECK:       [[FOR_BODY]]:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_NODE:%.*]], ptr [[Y]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds [[STRUCT_NODE]], ptr [[X]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 8, !tbaa [[LONG_TBAA0:![0-9]+]]
+; CHECK-NEXT:    store <2 x i64> [[TMP0]], ptr [[ARRAYIDX2]], align 8, !tbaa [[LONG_TBAA0]]
 ; CHECK-NEXT:    [[C:%.*]] = getelementptr inbounds [[STRUCT_NODE]], ptr [[Y]], i64 [[INDVARS_IV]], i32 2
 ; CHECK-NEXT:    [[C13:%.*]] = getelementptr inbounds [[STRUCT_NODE]], ptr [[X]], i64 [[INDVARS_IV]], i32 2
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x ptr>, ptr [[C]], align 8, !tbaa [[TBAA4:![0-9]+]]
-; CHECK-NEXT:    store <2 x ptr> [[TMP1]], ptr [[C13]], align 8, !tbaa [[TBAA4]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x ptr>, ptr [[C]], align 8, !tbaa [[ANYPTR_TBAA4:![0-9]+]]
+; CHECK-NEXT:    store <2 x ptr> [[TMP1]], ptr [[C13]], align 8, !tbaa [[ANYPTR_TBAA4]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
-; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]]
 ;
 entry:
   %cmp34 = icmp sgt i32 %n, 0
@@ -74,3 +75,11 @@ for.body:
 !11 = !{!5, !9, i64 16}
 !12 = !{!5, !9, i64 24}
 
+;.
+; CHECK: [[LONG_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
+; CHECK: [[META1]] = !{!"long", [[META2:![0-9]+]], i64 0}
+; CHECK: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0}
+; CHECK: [[META3]] = !{!"Simple C/C++ TBAA"}
+; CHECK: [[ANYPTR_TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0}
+; CHECK: [[META5]] = !{!"any pointer", [[META2]], i64 0}
+;.
diff --git a/llvm/test/Transforms/SLPVectorizer/SystemZ/vec-elt-insertion.ll b/llvm/test/Transforms/SLPVectorizer/SystemZ/vec-elt-insertion.ll
index 85b8157..541e761 100644
--- a/llvm/test/Transforms/SLPVectorizer/SystemZ/vec-elt-insertion.ll
+++ b/llvm/test/Transforms/SLPVectorizer/SystemZ/vec-elt-insertion.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt < %s -mtriple=s390x-unknown-linux -mcpu=z16 -S -passes=slp-vectorizer \
 ; RUN:   -pass-remarks-output=%t | FileCheck %s
 ; RUN: cat %t | FileCheck -check-prefix=REMARK %s
@@ -147,8 +147,8 @@ define void @fun3(ptr %0)  {
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i64 48
 ; CHECK-NEXT:    br label %[[BB5:.*]]
 ; CHECK:       [[BB5]]:
-; CHECK-NEXT:    store ptr null, ptr [[TMP3]], align 8, !tbaa [[TBAA0:![0-9]+]]
-; CHECK-NEXT:    [[TMP6:%.*]] = load ptr, ptr inttoptr (i64 64 to ptr), align 8, !tbaa [[TBAA8:![0-9]+]]
+; CHECK-NEXT:    store ptr null, ptr [[TMP3]], align 8, !tbaa [[ANYPTR_TBAA0:![0-9]+]]
+; CHECK-NEXT:    [[TMP6:%.*]] = load ptr, ptr inttoptr (i64 64 to ptr), align 8, !tbaa [[ANYPTR_TBAA8:![0-9]+]]
 ; CHECK-NEXT:    store ptr [[TMP6]], ptr [[TMP4]], align 8
 ; CHECK-NEXT:    [[TMP7:%.*]] = tail call i64 [[TMP0]](ptr noundef poison, i64 noundef poison)
 ; CHECK-NEXT:    br label %[[BB5]]
@@ -177,7 +177,7 @@ define void @fun3(ptr %0)  {
 !9 = !{!10, !7, i64 64}
 !10 = !{!"node", !6, i64 0, !3, i64 8, !7, i64 16, !7, i64 24, !7, i64 32, !7, i64 40, !7, i64 48, !7, i64 56, !7, i64 64, !7, i64 72, !6, i64 80, !6, i64 88, !3, i64 96, !3, i64 100}
 ;.
-; CHECK: [[TBAA0]] = !{[[META1:![0-9]+]], [[META6:![0-9]+]], i64 40}
+; CHECK: [[ANYPTR_TBAA0]] = !{[[META1:![0-9]+]], [[META6:![0-9]+]], i64 40}
 ; CHECK: [[META1]] = !{!"arc", [[META2:![0-9]+]], i64 0, [[META5:![0-9]+]], i64 8, [[META6]], i64 16, [[META6]], i64 24, [[META7:![0-9]+]], i64 32, [[META6]], i64 40, [[META6]], i64 48, [[META5]], i64 56, [[META5]], i64 64}
 ; CHECK: [[META2]] = !{!"int", [[META3:![0-9]+]], i64 0}
 ; CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0}
@@ -185,6 +185,6 @@ define void @fun3(ptr %0)  {
 ; CHECK: [[META5]] = !{!"long", [[META3]], i64 0}
 ; CHECK: [[META6]] = !{!"any pointer", [[META3]], i64 0}
 ; CHECK: [[META7]] = !{!"short", [[META3]], i64 0}
-; CHECK: [[TBAA8]] = !{[[META9:![0-9]+]], [[META6]], i64 64}
+; CHECK: [[ANYPTR_TBAA8]] = !{[[META9:![0-9]+]], [[META6]], i64 64}
 ; CHECK: [[META9]] = !{!"node", [[META5]], i64 0, [[META2]], i64 8, [[META6]], i64 16, [[META6]], i64 24, [[META6]], i64 32, [[META6]], i64 40, [[META6]], i64 48, [[META6]], i64 56, [[META6]], i64 64, [[META6]], i64 72, [[META5]], i64 80, [[META5]], i64 88, [[META2]], i64 96, [[META2]], i64 100}
 ;.
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll
index 95ae544..6f05210 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll
@@ -1,38 +1,39 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 | FileCheck %s
 
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-darwin13.3.0"
 
 define void @_foo(double %p1, double %p2, double %p3) #0 {
-; CHECK-LABEL: @_foo(
-; CHECK-NEXT:  entry:
+; CHECK-LABEL: define void @_foo(
+; CHECK-SAME: double [[P1:%.*]], double [[P2:%.*]], double [[P3:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[TAB1:%.*]] = alloca [256 x i32], align 16
 ; CHECK-NEXT:    [[TAB2:%.*]] = alloca [256 x i32], align 16
-; CHECK-NEXT:    br label [[BB1:%.*]]
-; CHECK:       bb1:
-; CHECK-NEXT:    [[MUL20:%.*]] = fmul double [[P3:%.*]], 1.638400e+04
+; CHECK-NEXT:    br label %[[BB1:.*]]
+; CHECK:       [[BB1]]:
+; CHECK-NEXT:    [[MUL20:%.*]] = fmul double [[P3]], 1.638400e+04
 ; CHECK-NEXT:    [[ADD:%.*]] = fadd double [[MUL20]], 8.192000e+03
-; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x double> poison, double [[P2:%.*]], i32 0
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[P1:%.*]], i32 1
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x double> poison, double [[P2]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[P1]], i32 1
 ; CHECK-NEXT:    [[TMP2:%.*]] = fmul <2 x double> [[TMP1]], splat (double 1.638400e+04)
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> <double poison, double 0.000000e+00>, double [[ADD]], i32 0
-; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
-; CHECK:       for.body:
-; CHECK-NEXT:    [[INDVARS_IV266:%.*]] = phi i64 [ 0, [[BB1]] ], [ [[INDVARS_IV_NEXT267:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[TMP4:%.*]] = phi <2 x double> [ [[TMP3]], [[BB1]] ], [ [[TMP6:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
+; CHECK:       [[FOR_BODY]]:
+; CHECK-NEXT:    [[INDVARS_IV266:%.*]] = phi i64 [ 0, %[[BB1]] ], [ [[INDVARS_IV_NEXT267:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT:    [[TMP4:%.*]] = phi <2 x double> [ [[TMP3]], %[[BB1]] ], [ [[TMP6:%.*]], %[[FOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <2 x i32> <i32 1, i32 poison>
 ; CHECK-NEXT:    [[X13:%.*]] = tail call i32 @_xfn(<2 x double> [[TMP5]])
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [256 x i32], ptr [[TAB1]], i64 0, i64 [[INDVARS_IV266]]
-; CHECK-NEXT:    store i32 [[X13]], ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA0:![0-9]+]]
+; CHECK-NEXT:    store i32 [[X13]], ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA0:![0-9]+]]
 ; CHECK-NEXT:    [[X14:%.*]] = tail call i32 @_xfn(<2 x double> [[TMP4]])
 ; CHECK-NEXT:    [[ARRAYIDX26:%.*]] = getelementptr inbounds [256 x i32], ptr [[TAB2]], i64 0, i64 [[INDVARS_IV266]]
-; CHECK-NEXT:    store i32 [[X14]], ptr [[ARRAYIDX26]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    store i32 [[X14]], ptr [[ARRAYIDX26]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[TMP6]] = fadd <2 x double> [[TMP2]], [[TMP4]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT267]] = add nuw nsw i64 [[INDVARS_IV266]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT267]], 256
-; CHECK-NEXT:    br i1 [[EXITCOND]], label [[RETURN:%.*]], label [[FOR_BODY]]
-; CHECK:       return:
+; CHECK-NEXT:    br i1 [[EXITCOND]], label %[[RETURN:.*]], label %[[FOR_BODY]]
+; CHECK:       [[RETURN]]:
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -78,3 +79,9 @@ declare i32 @_xfn(<2 x double>) #4
 !4 = !{!3, !3, i64 0}
 !5 = !{!"omnipotent char", !6, i64 0}
 !6 = !{!"Simple C/C++ TBAA"}
+;.
+; CHECK: [[INT_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
+; CHECK: [[META1]] = !{!"int", [[META2:![0-9]+]], i64 0}
+; CHECK: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0}
+; CHECK: [[META3]] = !{!"Simple C/C++ TBAA"}
+;.
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll
index 1e31772..2d9e1f7 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll
@@ -1,38 +1,39 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 | FileCheck %s
 
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-darwin13.3.0"
 
 define void @_foo(double %p1, double %p2, double %p3) #0 {
-; CHECK-LABEL: @_foo(
-; CHECK-NEXT:  entry:
+; CHECK-LABEL: define void @_foo(
+; CHECK-SAME: double [[P1:%.*]], double [[P2:%.*]], double [[P3:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[TAB1:%.*]] = alloca [256 x i32], align 16
 ; CHECK-NEXT:    [[TAB2:%.*]] = alloca [256 x i32], align 16
-; CHECK-NEXT:    br label [[BB1:%.*]]
-; CHECK:       bb1:
-; CHECK-NEXT:    [[MUL20:%.*]] = fmul double [[P3:%.*]], 1.638400e+04
+; CHECK-NEXT:    br label %[[BB1:.*]]
+; CHECK:       [[BB1]]:
+; CHECK-NEXT:    [[MUL20:%.*]] = fmul double [[P3]], 1.638400e+04
 ; CHECK-NEXT:    [[ADD:%.*]] = fadd double [[MUL20]], 8.192000e+03
-; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x double> poison, double [[P2:%.*]], i32 0
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[P1:%.*]], i32 1
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x double> poison, double [[P2]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[P1]], i32 1
 ; CHECK-NEXT:    [[TMP2:%.*]] = fmul <2 x double> [[TMP1]], splat (double 1.638400e+04)
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> <double poison, double 0.000000e+00>, double [[ADD]], i32 0
-; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
-; CHECK:       for.body:
-; CHECK-NEXT:    [[INDVARS_IV266:%.*]] = phi i64 [ 0, [[BB1]] ], [ [[INDVARS_IV_NEXT267:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[TMP4:%.*]] = phi <2 x double> [ [[TMP3]], [[BB1]] ], [ [[TMP6:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
+; CHECK:       [[FOR_BODY]]:
+; CHECK-NEXT:    [[INDVARS_IV266:%.*]] = phi i64 [ 0, %[[BB1]] ], [ [[INDVARS_IV_NEXT267:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT:    [[TMP4:%.*]] = phi <2 x double> [ [[TMP3]], %[[BB1]] ], [ [[TMP6:%.*]], %[[FOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <2 x i32> <i32 1, i32 poison>
 ; CHECK-NEXT:    [[X13:%.*]] = tail call i32 @_xfn(<2 x double> [[TMP5]])
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [256 x i32], ptr [[TAB1]], i64 0, i64 [[INDVARS_IV266]]
-; CHECK-NEXT:    store i32 [[X13]], ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA0:![0-9]+]]
+; CHECK-NEXT:    store i32 [[X13]], ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA0:![0-9]+]]
 ; CHECK-NEXT:    [[X14:%.*]] = tail call i32 @_xfn(<2 x double> [[TMP4]])
 ; CHECK-NEXT:    [[ARRAYIDX26:%.*]] = getelementptr inbounds [256 x i32], ptr [[TAB2]], i64 0, i64 [[INDVARS_IV266]]
-; CHECK-NEXT:    store i32 [[X14]], ptr [[ARRAYIDX26]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    store i32 [[X14]], ptr [[ARRAYIDX26]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[TMP6]] = fadd <2 x double> [[TMP2]], [[TMP4]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT267]] = add nuw nsw i64 [[INDVARS_IV266]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT267]], 256
-; CHECK-NEXT:    br i1 [[EXITCOND]], label [[RETURN:%.*]], label [[FOR_BODY]]
-; CHECK:       return:
+; CHECK-NEXT:    br i1 [[EXITCOND]], label %[[RETURN:.*]], label %[[FOR_BODY]]
+; CHECK:       [[RETURN]]:
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -78,3 +79,9 @@ declare i32 @_xfn(<2 x double>) #4
 !4 = !{!3, !3, i64 0}
 !5 = !{!"omnipotent char", !6, i64 0}
 !6 = !{!"Simple C/C++ TBAA"}
+;.
+; CHECK: [[INT_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
+; CHECK: [[META1]] = !{!"int", [[META2:![0-9]+]], i64 0}
+; CHECK: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0}
+; CHECK: [[META3]] = !{!"Simple C/C++ TBAA"}
+;.
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/metadata.ll b/llvm/test/Transforms/SLPVectorizer/X86/metadata.ll
index c4bdfa8..635ec32 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/metadata.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/metadata.ll
@@ -1,16 +1,17 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt < %s -passes=slp-vectorizer,dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
 
 define void @test1(ptr %a, ptr %b, ptr %c) {
-; CHECK-LABEL: @test1(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr [[A:%.*]], align 8, !tbaa [[TBAA0:![0-9]+]]
-; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x double>, ptr [[B:%.*]], align 8, !tbaa [[TBAA0]]
-; CHECK-NEXT:    [[TMP4:%.*]] = fmul <2 x double> [[TMP1]], [[TMP3]], !fpmath !4
-; CHECK-NEXT:    store <2 x double> [[TMP4]], ptr [[C:%.*]], align 8, !tbaa [[TBAA0]]
+; CHECK-LABEL: define void @test1(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x double>, ptr [[A]], align 8, !tbaa [[DOUBLE_TBAA0:![0-9]+]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr [[B]], align 8, !tbaa [[DOUBLE_TBAA0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = fmul <2 x double> [[TMP0]], [[TMP1]], !fpmath [[META4:![0-9]+]]
+; CHECK-NEXT:    store <2 x double> [[TMP2]], ptr [[C]], align 8, !tbaa [[DOUBLE_TBAA0]]
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -29,12 +30,13 @@ entry:
 }
 
 define void @test2(ptr %a, ptr %b, ptr %e) {
-; CHECK-LABEL: @test2(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr [[A:%.*]], align 8, !tbaa [[TBAA0]]
-; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x double>, ptr [[B:%.*]], align 8, !tbaa [[TBAA0]]
-; CHECK-NEXT:    [[TMP4:%.*]] = fmul <2 x double> [[TMP1]], [[TMP3]], !fpmath !5
-; CHECK-NEXT:    store <2 x double> [[TMP4]], ptr [[E:%.*]], align 8, !tbaa [[TBAA0]]
+; CHECK-LABEL: define void @test2(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[E:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x double>, ptr [[A]], align 8, !tbaa [[DOUBLE_TBAA0]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr [[B]], align 8, !tbaa [[DOUBLE_TBAA0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = fmul <2 x double> [[TMP0]], [[TMP1]], !fpmath [[META5:![0-9]+]]
+; CHECK-NEXT:    store <2 x double> [[TMP2]], ptr [[E]], align 8, !tbaa [[DOUBLE_TBAA0]]
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -52,10 +54,16 @@ entry:
   ret void
 }
 
-;CHECK-DAG: !4 = !{float 5.000000e+00}
-;CHECK-DAG: !5 = !{float 2.500000e+00}
 !0 = !{ float 5.0 }
 !1 = !{ float 2.5 }
 !2 = !{!"Simple C/C++ TBAA"}
 !3 = !{!"omnipotent char", !2}
 !4 = !{!"double", !3}
+;.
+; CHECK: [[DOUBLE_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
+; CHECK: [[META1]] = !{!"double", [[META2:![0-9]+]]}
+; CHECK: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]]}
+; CHECK: [[META3]] = !{!"Simple C/C++ TBAA"}
+; CHECK: [[META4]] = !{float 5.000000e+00}
+; CHECK: [[META5]] = !{float 2.500000e+00}
+;.
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr16899.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr16899.ll
index ff4ef60..1b76ee9 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/pr16899.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/pr16899.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=i386--netbsd -mcpu=i486 | FileCheck %s
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
 target triple = "i386--netbsd"
@@ -7,19 +7,20 @@ target triple = "i386--netbsd"
 
 ; Function Attrs: noreturn nounwind readonly
 define i32 @fn1() #0 {
-; CHECK-LABEL: @fn1(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr @a, align 4, !tbaa [[TBAA0:![0-9]+]]
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4, !tbaa [[TBAA4:![0-9]+]]
+; CHECK-LABEL: define i32 @fn1(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr @a, align 4, !tbaa [[ANYPTR_TBAA0:![0-9]+]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4, !tbaa [[INT_TBAA4:![0-9]+]]
 ; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1
-; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4, !tbaa [[TBAA4]]
-; CHECK-NEXT:    br label [[DO_BODY:%.*]]
-; CHECK:       do.body:
-; CHECK-NEXT:    [[C_0:%.*]] = phi i32 [ [[TMP2]], [[ENTRY:%.*]] ], [ [[ADD2:%.*]], [[DO_BODY]] ]
-; CHECK-NEXT:    [[B_0:%.*]] = phi i32 [ [[TMP1]], [[ENTRY]] ], [ [[ADD:%.*]], [[DO_BODY]] ]
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4, !tbaa [[INT_TBAA4]]
+; CHECK-NEXT:    br label %[[DO_BODY:.*]]
+; CHECK:       [[DO_BODY]]:
+; CHECK-NEXT:    [[C_0:%.*]] = phi i32 [ [[TMP2]], %[[ENTRY]] ], [ [[ADD2:%.*]], %[[DO_BODY]] ]
+; CHECK-NEXT:    [[B_0:%.*]] = phi i32 [ [[TMP1]], %[[ENTRY]] ], [ [[ADD:%.*]], %[[DO_BODY]] ]
 ; CHECK-NEXT:    [[ADD]] = add nsw i32 [[B_0]], [[C_0]]
 ; CHECK-NEXT:    [[ADD2]] = add nsw i32 [[ADD]], 1
-; CHECK-NEXT:    br label [[DO_BODY]]
+; CHECK-NEXT:    br label %[[DO_BODY]]
 ;
 entry:
   %0 = load ptr, ptr @a, align 4, !tbaa !4
@@ -44,3 +45,11 @@ attributes #0 = { noreturn nounwind readonly "less-precise-fpmad"="false" "frame
 !3 = !{!"int", !1}
 !4 = !{!0, !0, i64 0}
 !5 = !{!3, !3, i64 0}
+;.
+; CHECK: [[ANYPTR_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
+; CHECK: [[META1]] = !{!"any pointer", [[META2:![0-9]+]]}
+; CHECK: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]]}
+; CHECK: [[META3]] = !{!"Simple C/C++ TBAA"}
+; CHECK: [[INT_TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0}
+; CHECK: [[META5]] = !{!"int", [[META2]]}
+;.
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr40522.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr40522.ll
index 6fd2de8a..618c316 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/pr40522.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/pr40522.ll
@@ -1,15 +1,16 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt -passes=slp-vectorizer -S -mtriple=x86_64-- -mcpu=corei7 < %s | FileCheck %s
 
 define void @test1(float %a, float %b, float %c, float %d, ptr nocapture %p) {
-; CHECK-LABEL: @test1(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i32 0
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> [[TMP0]], float [[B:%.*]], i32 1
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float [[C:%.*]], i32 2
-; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float [[D:%.*]], i32 3
+; CHECK-LABEL: define void @test1(
+; CHECK-SAME: float [[A:%.*]], float [[B:%.*]], float [[C:%.*]], float [[D:%.*]], ptr captures(none) [[P:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x float> poison, float [[A]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> [[TMP0]], float [[B]], i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float [[C]], i32 2
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float [[D]], i32 3
 ; CHECK-NEXT:    [[TMP4:%.*]] = fptosi <4 x float> [[TMP3]] to <4 x i32>
-; CHECK-NEXT:    store <4 x i32> [[TMP4]], ptr [[P:%.*]], align 4, !tbaa [[TBAA0:![0-9]+]]
+; CHECK-NEXT:    store <4 x i32> [[TMP4]], ptr [[P]], align 4, !tbaa [[INT_TBAA0:![0-9]+]]
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -28,14 +29,15 @@ entry:
 }
 
 define void @test1_vec(float %a, float %b, float %c, float %d, ptr nocapture %p) {
-; CHECK-LABEL: @test1_vec(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i32 0
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> [[TMP0]], float [[B:%.*]], i32 1
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float [[C:%.*]], i32 2
-; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float [[D:%.*]], i32 3
+; CHECK-LABEL: define void @test1_vec(
+; CHECK-SAME: float [[A:%.*]], float [[B:%.*]], float [[C:%.*]], float [[D:%.*]], ptr captures(none) [[P:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x float> poison, float [[A]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> [[TMP0]], float [[B]], i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float [[C]], i32 2
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float [[D]], i32 3
 ; CHECK-NEXT:    [[TMP4:%.*]] = fptosi <4 x float> [[TMP3]] to <4 x i32>
-; CHECK-NEXT:    store <4 x i32> [[TMP4]], ptr [[P:%.*]], align 16, !tbaa [[TBAA0]]
+; CHECK-NEXT:    store <4 x i32> [[TMP4]], ptr [[P]], align 16, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -52,14 +54,15 @@ entry:
 }
 
 define void @test2(i32 %a, i32 %b, i32 %c, i32 %d, ptr nocapture %p) {
-; CHECK-LABEL: @test2(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[A:%.*]], i32 0
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i32> [[TMP0]], i32 [[B:%.*]], i32 1
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[C:%.*]], i32 2
-; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[D:%.*]], i32 3
+; CHECK-LABEL: define void @test2(
+; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]], i32 [[D:%.*]], ptr captures(none) [[P:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i32> [[TMP0]], i32 [[B]], i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[C]], i32 2
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[D]], i32 3
 ; CHECK-NEXT:    [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], splat (i32 1)
-; CHECK-NEXT:    store <4 x i32> [[TMP4]], ptr [[P:%.*]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    store <4 x i32> [[TMP4]], ptr [[P]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -78,13 +81,14 @@ entry:
 }
 
 define void @test2_vec(i32 %0, i32 %1, i32 %2, i32 %3, ptr nocapture %4) {
-; CHECK-LABEL: @test2_vec(
-; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0:%.*]], i32 0
-; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[TMP1:%.*]], i32 1
-; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[TMP2:%.*]], i32 2
-; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP3:%.*]], i32 3
+; CHECK-LABEL: define void @test2_vec(
+; CHECK-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], i32 [[TMP3:%.*]], ptr captures(none) [[TMP4:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i32 0
+; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[TMP1]], i32 1
+; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[TMP2]], i32 2
+; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP3]], i32 3
 ; CHECK-NEXT:    [[TMP10:%.*]] = add nsw <4 x i32> [[TMP9]], splat (i32 1)
-; CHECK-NEXT:    store <4 x i32> [[TMP10]], ptr [[TMP4:%.*]], align 16, !tbaa [[TBAA0]]
+; CHECK-NEXT:    store <4 x i32> [[TMP10]], ptr [[TMP4]], align 16, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    ret void
 ;
   %6 = add nsw i32 %0, 1
@@ -103,3 +107,9 @@ define void @test2_vec(i32 %0, i32 %1, i32 %2, i32 %3, ptr nocapture %4) {
 !3 = !{!"int", !4, i64 0}
 !4 = !{!"omnipotent char", !5, i64 0}
 !5 = !{!"Simple C++ TBAA"}
+;.
+; CHECK: [[INT_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
+; CHECK: [[META1]] = !{!"int", [[META2:![0-9]+]], i64 0}
+; CHECK: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0}
+; CHECK: [[META3]] = !{!"Simple C++ TBAA"}
+;.
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr46983.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr46983.ll
index 9e4f10e..9c8ba07 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/pr46983.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/pr46983.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2   -slp-threshold=-1 | FileCheck %s
 ; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu -mattr=+sse4.2 | FileCheck %s
 ; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu -mattr=+avx    | FileCheck %s
@@ -6,15 +6,16 @@
 ; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512bw,+avx512vl | FileCheck %s
 
 define void @store_i32(ptr nocapture %0, i32 %1, i32 %2) {
-; CHECK-LABEL: @store_i32(
-; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0:![0-9]+]]
-; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1:%.*]], i32 0
+; CHECK-LABEL: define void @store_i32(
+; CHECK-SAME: ptr captures(none) [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4, !tbaa [[INT_TBAA0:![0-9]+]]
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1]], i32 0
 ; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP7:%.*]] = mul <4 x i32> [[TMP4]], [[TMP6]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = lshr <4 x i32> [[TMP7]], splat (i32 15)
 ; CHECK-NEXT:    [[TMP9:%.*]] = icmp ult <4 x i32> [[TMP8]], splat (i32 255)
 ; CHECK-NEXT:    [[TMP10:%.*]] = select <4 x i1> [[TMP9]], <4 x i32> [[TMP8]], <4 x i32> splat (i32 255)
-; CHECK-NEXT:    store <4 x i32> [[TMP10]], ptr [[TMP0]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    store <4 x i32> [[TMP10]], ptr [[TMP0]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    ret void
 ;
   %4 = load i32, ptr %0, align 4, !tbaa !2
@@ -48,17 +49,18 @@ define void @store_i32(ptr nocapture %0, i32 %1, i32 %2) {
 }
 
 define void @store_i8(ptr nocapture %0, i32 %1, i32 %2) {
-; CHECK-LABEL: @store_i8(
-; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i8>, ptr [[TMP0:%.*]], align 1, !tbaa [[TBAA4:![0-9]+]]
+; CHECK-LABEL: define void @store_i8(
+; CHECK-SAME: ptr captures(none) [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1, !tbaa [[CHAR_TBAA4:![0-9]+]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i32>
-; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1:%.*]], i32 0
+; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1]], i32 0
 ; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP8:%.*]] = mul <4 x i32> [[TMP5]], [[TMP7]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = lshr <4 x i32> [[TMP8]], splat (i32 15)
 ; CHECK-NEXT:    [[TMP10:%.*]] = icmp ult <4 x i32> [[TMP9]], splat (i32 255)
 ; CHECK-NEXT:    [[TMP11:%.*]] = select <4 x i1> [[TMP10]], <4 x i32> [[TMP9]], <4 x i32> splat (i32 255)
 ; CHECK-NEXT:    [[TMP12:%.*]] = trunc <4 x i32> [[TMP11]] to <4 x i8>
-; CHECK-NEXT:    store <4 x i8> [[TMP12]], ptr [[TMP0]], align 1, !tbaa [[TBAA4]]
+; CHECK-NEXT:    store <4 x i8> [[TMP12]], ptr [[TMP0]], align 1, !tbaa [[CHAR_TBAA4]]
 ; CHECK-NEXT:    ret void
 ;
   %4 = load i8, ptr %0, align 1, !tbaa !6
@@ -100,9 +102,10 @@ define void @store_i8(ptr nocapture %0, i32 %1, i32 %2) {
 }
 
 define void @store_i64(ptr nocapture %0, i32 %1, i32 %2) {
-; CHECK-LABEL: @store_i64(
-; CHECK-NEXT:    [[TMP4:%.*]] = zext i32 [[TMP1:%.*]] to i64
-; CHECK-NEXT:    [[TMP5:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 8, !tbaa [[TBAA5:![0-9]+]]
+; CHECK-LABEL: define void @store_i64(
+; CHECK-SAME: ptr captures(none) [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP4:%.*]] = zext i32 [[TMP1]] to i64
+; CHECK-NEXT:    [[TMP5:%.*]] = load <4 x i64>, ptr [[TMP0]], align 8, !tbaa [[LONG_TBAA5:![0-9]+]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x i64> poison, i64 [[TMP4]], i32 0
 ; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <4 x i64> [[TMP6]], <4 x i64> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP8:%.*]] = mul <4 x i64> [[TMP5]], [[TMP7]]
@@ -111,7 +114,7 @@ define void @store_i64(ptr nocapture %0, i32 %1, i32 %2) {
 ; CHECK-NEXT:    [[TMP11:%.*]] = icmp ult <4 x i32> [[TMP10]], splat (i32 255)
 ; CHECK-NEXT:    [[TMP12:%.*]] = and <4 x i64> [[TMP9]], splat (i64 4294967295)
 ; CHECK-NEXT:    [[TMP13:%.*]] = select <4 x i1> [[TMP11]], <4 x i64> [[TMP12]], <4 x i64> splat (i64 255)
-; CHECK-NEXT:    store <4 x i64> [[TMP13]], ptr [[TMP0]], align 8, !tbaa [[TBAA5]]
+; CHECK-NEXT:    store <4 x i64> [[TMP13]], ptr [[TMP0]], align 8, !tbaa [[LONG_TBAA5]]
 ; CHECK-NEXT:    ret void
 ;
   %4 = zext i32 %1 to i64
@@ -160,3 +163,12 @@ define void @store_i64(ptr nocapture %0, i32 %1, i32 %2) {
 !6 = !{!4, !4, i64 0}
 !7 = !{!8, !8, i64 0}
 !8 = !{!"long", !4, i64 0}
+;.
+; CHECK: [[INT_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
+; CHECK: [[META1]] = !{!"int", [[META2:![0-9]+]], i64 0}
+; CHECK: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0}
+; CHECK: [[META3]] = !{!"Simple C++ TBAA"}
+; CHECK: [[CHAR_TBAA4]] = !{[[META2]], [[META2]], i64 0}
+; CHECK: [[LONG_TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0}
+; CHECK: [[META6]] = !{!"long", [[META2]], i64 0}
+;.
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr47629-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr47629-inseltpoison.ll
index db38a62..fde76f8 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/pr47629-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/pr47629-inseltpoison.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN:  opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux -mattr=+sse2     | FileCheck %s --check-prefixes=SSE
 ; RUN:  opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux -mattr=+avx      | FileCheck %s --check-prefixes=AVX
 ; RUN:  opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux -mattr=+avx2     | FileCheck %s --check-prefixes=AVX2
@@ -6,99 +6,104 @@
 ; RUN:  opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux -mattr=+avx512vl | FileCheck %s --check-prefixes=AVX512VL
 
 define void @gather_load(ptr noalias nocapture %0, ptr noalias nocapture readonly %1) {
-; SSE-LABEL: @gather_load(
-; SSE-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1:%.*]], i64 1
-; SSE-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]]
-; SSE-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 1
+; SSE-LABEL: define void @gather_load(
+; SSE-SAME: ptr noalias captures(none) [[TMP0:%.*]], ptr noalias readonly captures(none) [[TMP1:%.*]]) #[[ATTR0:[0-9]+]] {
+; SSE-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1
+; SSE-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[SHORT_TBAA0:![0-9]+]]
+; SSE-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 1
 ; SSE-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 11
-; SSE-NEXT:    [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[SHORT_TBAA0]]
 ; SSE-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 2
 ; SSE-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 4
-; SSE-NEXT:    [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[SHORT_TBAA0]]
 ; SSE-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 3
-; SSE-NEXT:    [[TMP12:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP12:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[SHORT_TBAA0]]
 ; SSE-NEXT:    [[TMP16:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i32 0
 ; SSE-NEXT:    [[TMP17:%.*]] = insertelement <4 x i32> [[TMP16]], i32 [[TMP7]], i32 1
 ; SSE-NEXT:    [[TMP15:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP10]], i32 2
 ; SSE-NEXT:    [[TMP13:%.*]] = insertelement <4 x i32> [[TMP15]], i32 [[TMP12]], i32 3
 ; SSE-NEXT:    [[TMP14:%.*]] = add nsw <4 x i32> [[TMP13]], <i32 1, i32 2, i32 3, i32 4>
-; SSE-NEXT:    store <4 x i32> [[TMP14]], ptr [[TMP0]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    store <4 x i32> [[TMP14]], ptr [[TMP0]], align 4, !tbaa [[SHORT_TBAA0]]
 ; SSE-NEXT:    ret void
 ;
-; AVX-LABEL: @gather_load(
-; AVX-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1:%.*]], i64 1
-; AVX-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]]
-; AVX-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 1
+; AVX-LABEL: define void @gather_load(
+; AVX-SAME: ptr noalias captures(none) [[TMP0:%.*]], ptr noalias readonly captures(none) [[TMP1:%.*]]) #[[ATTR0:[0-9]+]] {
+; AVX-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1
+; AVX-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[SHORT_TBAA0:![0-9]+]]
+; AVX-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 1
 ; AVX-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 11
-; AVX-NEXT:    [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 2
 ; AVX-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 4
-; AVX-NEXT:    [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 3
-; AVX-NEXT:    [[TMP12:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP12:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX-NEXT:    [[TMP16:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i32 0
 ; AVX-NEXT:    [[TMP17:%.*]] = insertelement <4 x i32> [[TMP16]], i32 [[TMP7]], i32 1
 ; AVX-NEXT:    [[TMP15:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP10]], i32 2
 ; AVX-NEXT:    [[TMP13:%.*]] = insertelement <4 x i32> [[TMP15]], i32 [[TMP12]], i32 3
 ; AVX-NEXT:    [[TMP14:%.*]] = add nsw <4 x i32> [[TMP13]], <i32 1, i32 2, i32 3, i32 4>
-; AVX-NEXT:    store <4 x i32> [[TMP14]], ptr [[TMP0]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    store <4 x i32> [[TMP14]], ptr [[TMP0]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX-NEXT:    ret void
 ;
-; AVX2-LABEL: @gather_load(
-; AVX2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1:%.*]], i64 1
-; AVX2-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]]
-; AVX2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 1
+; AVX2-LABEL: define void @gather_load(
+; AVX2-SAME: ptr noalias captures(none) [[TMP0:%.*]], ptr noalias readonly captures(none) [[TMP1:%.*]]) #[[ATTR0:[0-9]+]] {
+; AVX2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1
+; AVX2-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[SHORT_TBAA0:![0-9]+]]
+; AVX2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 1
 ; AVX2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 11
-; AVX2-NEXT:    [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX2-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 2
 ; AVX2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 4
-; AVX2-NEXT:    [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX2-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 3
-; AVX2-NEXT:    [[TMP12:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP12:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX2-NEXT:    [[TMP16:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i32 0
 ; AVX2-NEXT:    [[TMP17:%.*]] = insertelement <4 x i32> [[TMP16]], i32 [[TMP7]], i32 1
 ; AVX2-NEXT:    [[TMP15:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP10]], i32 2
 ; AVX2-NEXT:    [[TMP13:%.*]] = insertelement <4 x i32> [[TMP15]], i32 [[TMP12]], i32 3
 ; AVX2-NEXT:    [[TMP14:%.*]] = add nsw <4 x i32> [[TMP13]], <i32 1, i32 2, i32 3, i32 4>
-; AVX2-NEXT:    store <4 x i32> [[TMP14]], ptr [[TMP0]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    store <4 x i32> [[TMP14]], ptr [[TMP0]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX2-NEXT:    ret void
 ;
-; AVX512F-LABEL: @gather_load(
-; AVX512F-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1:%.*]], i64 1
-; AVX512F-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]]
-; AVX512F-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 1
+; AVX512F-LABEL: define void @gather_load(
+; AVX512F-SAME: ptr noalias captures(none) [[TMP0:%.*]], ptr noalias readonly captures(none) [[TMP1:%.*]]) #[[ATTR0:[0-9]+]] {
+; AVX512F-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1
+; AVX512F-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[SHORT_TBAA0:![0-9]+]]
+; AVX512F-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 1
 ; AVX512F-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 11
-; AVX512F-NEXT:    [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[TBAA0]]
+; AVX512F-NEXT:    [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX512F-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 2
 ; AVX512F-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 4
-; AVX512F-NEXT:    [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[TBAA0]]
+; AVX512F-NEXT:    [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX512F-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 3
-; AVX512F-NEXT:    [[TMP12:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
+; AVX512F-NEXT:    [[TMP12:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX512F-NEXT:    [[TMP16:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i32 0
 ; AVX512F-NEXT:    [[TMP17:%.*]] = insertelement <4 x i32> [[TMP16]], i32 [[TMP7]], i32 1
 ; AVX512F-NEXT:    [[TMP15:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP10]], i32 2
 ; AVX512F-NEXT:    [[TMP13:%.*]] = insertelement <4 x i32> [[TMP15]], i32 [[TMP12]], i32 3
 ; AVX512F-NEXT:    [[TMP14:%.*]] = add nsw <4 x i32> [[TMP13]], <i32 1, i32 2, i32 3, i32 4>
-; AVX512F-NEXT:    store <4 x i32> [[TMP14]], ptr [[TMP0]], align 4, !tbaa [[TBAA0]]
+; AVX512F-NEXT:    store <4 x i32> [[TMP14]], ptr [[TMP0]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX512F-NEXT:    ret void
 ;
-; AVX512VL-LABEL: @gather_load(
-; AVX512VL-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1:%.*]], i64 1
-; AVX512VL-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]]
-; AVX512VL-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 1
+; AVX512VL-LABEL: define void @gather_load(
+; AVX512VL-SAME: ptr noalias captures(none) [[TMP0:%.*]], ptr noalias readonly captures(none) [[TMP1:%.*]]) #[[ATTR0:[0-9]+]] {
+; AVX512VL-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1
+; AVX512VL-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[SHORT_TBAA0:![0-9]+]]
+; AVX512VL-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 1
 ; AVX512VL-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 11
-; AVX512VL-NEXT:    [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[TBAA0]]
+; AVX512VL-NEXT:    [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX512VL-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 2
 ; AVX512VL-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 4
-; AVX512VL-NEXT:    [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[TBAA0]]
+; AVX512VL-NEXT:    [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX512VL-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 3
-; AVX512VL-NEXT:    [[TMP12:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
+; AVX512VL-NEXT:    [[TMP12:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX512VL-NEXT:    [[TMP16:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i32 0
 ; AVX512VL-NEXT:    [[TMP17:%.*]] = insertelement <4 x i32> [[TMP16]], i32 [[TMP7]], i32 1
 ; AVX512VL-NEXT:    [[TMP15:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP10]], i32 2
 ; AVX512VL-NEXT:    [[TMP13:%.*]] = insertelement <4 x i32> [[TMP15]], i32 [[TMP12]], i32 3
 ; AVX512VL-NEXT:    [[TMP14:%.*]] = add nsw <4 x i32> [[TMP13]], <i32 1, i32 2, i32 3, i32 4>
-; AVX512VL-NEXT:    store <4 x i32> [[TMP14]], ptr [[TMP0]], align 4, !tbaa [[TBAA0]]
+; AVX512VL-NEXT:    store <4 x i32> [[TMP14]], ptr [[TMP0]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX512VL-NEXT:    ret void
 ;
   %3 = getelementptr inbounds i32, ptr %1, i64 1
@@ -121,78 +126,83 @@ define void @gather_load(ptr noalias nocapture %0, ptr noalias nocapture readonl
 }
 
 define void @gather_load_2(ptr noalias nocapture %0, ptr noalias nocapture readonly %1) {
-; SSE-LABEL: @gather_load_2(
-; SSE-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1:%.*]], i64 1
-; SSE-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
+; SSE-LABEL: define void @gather_load_2(
+; SSE-SAME: ptr noalias captures(none) [[TMP0:%.*]], ptr noalias readonly captures(none) [[TMP1:%.*]]) #[[ATTR0]] {
+; SSE-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1
+; SSE-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[SHORT_TBAA0]]
 ; SSE-NEXT:    [[TMP5:%.*]] = add nsw i32 [[TMP4]], 1
-; SSE-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 1
-; SSE-NEXT:    store i32 [[TMP5]], ptr [[TMP0]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 1
+; SSE-NEXT:    store i32 [[TMP5]], ptr [[TMP0]], align 4, !tbaa [[SHORT_TBAA0]]
 ; SSE-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 10
-; SSE-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[SHORT_TBAA0]]
 ; SSE-NEXT:    [[TMP9:%.*]] = add nsw i32 [[TMP8]], 2
 ; SSE-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 2
-; SSE-NEXT:    store i32 [[TMP9]], ptr [[TMP6]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    store i32 [[TMP9]], ptr [[TMP6]], align 4, !tbaa [[SHORT_TBAA0]]
 ; SSE-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 3
-; SSE-NEXT:    [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4, !tbaa [[SHORT_TBAA0]]
 ; SSE-NEXT:    [[TMP13:%.*]] = add nsw i32 [[TMP12]], 3
 ; SSE-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 3
-; SSE-NEXT:    store i32 [[TMP13]], ptr [[TMP10]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    store i32 [[TMP13]], ptr [[TMP10]], align 4, !tbaa [[SHORT_TBAA0]]
 ; SSE-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 5
-; SSE-NEXT:    [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4, !tbaa [[SHORT_TBAA0]]
 ; SSE-NEXT:    [[TMP17:%.*]] = add nsw i32 [[TMP16]], 4
-; SSE-NEXT:    store i32 [[TMP17]], ptr [[TMP14]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    store i32 [[TMP17]], ptr [[TMP14]], align 4, !tbaa [[SHORT_TBAA0]]
 ; SSE-NEXT:    ret void
 ;
-; AVX-LABEL: @gather_load_2(
-; AVX-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1:%.*]], i64 1
-; AVX-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
+; AVX-LABEL: define void @gather_load_2(
+; AVX-SAME: ptr noalias captures(none) [[TMP0:%.*]], ptr noalias readonly captures(none) [[TMP1:%.*]]) #[[ATTR0]] {
+; AVX-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1
+; AVX-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 10
-; AVX-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 3
-; AVX-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 5
-; AVX-NEXT:    [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX-NEXT:    [[TMP11:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i32 0
 ; AVX-NEXT:    [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP6]], i32 1
 ; AVX-NEXT:    [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP8]], i32 2
 ; AVX-NEXT:    [[TMP14:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP10]], i32 3
 ; AVX-NEXT:    [[TMP15:%.*]] = add nsw <4 x i32> [[TMP14]], <i32 1, i32 2, i32 3, i32 4>
-; AVX-NEXT:    store <4 x i32> [[TMP15]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    store <4 x i32> [[TMP15]], ptr [[TMP0]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX-NEXT:    ret void
 ;
-; AVX2-LABEL: @gather_load_2(
-; AVX2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1:%.*]], i64 1
-; AVX2-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
+; AVX2-LABEL: define void @gather_load_2(
+; AVX2-SAME: ptr noalias captures(none) [[TMP0:%.*]], ptr noalias readonly captures(none) [[TMP1:%.*]]) #[[ATTR0]] {
+; AVX2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1
+; AVX2-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 10
-; AVX2-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX2-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 3
-; AVX2-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 5
-; AVX2-NEXT:    [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX2-NEXT:    [[TMP11:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i32 0
 ; AVX2-NEXT:    [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP6]], i32 1
 ; AVX2-NEXT:    [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP8]], i32 2
 ; AVX2-NEXT:    [[TMP14:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP10]], i32 3
 ; AVX2-NEXT:    [[TMP15:%.*]] = add nsw <4 x i32> [[TMP14]], <i32 1, i32 2, i32 3, i32 4>
-; AVX2-NEXT:    store <4 x i32> [[TMP15]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    store <4 x i32> [[TMP15]], ptr [[TMP0]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX2-NEXT:    ret void
 ;
-; AVX512F-LABEL: @gather_load_2(
-; AVX512F-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1:%.*]], i64 1
-; AVX512F-NEXT:    [[TMP4:%.*]] = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr [[TMP3]], i32 4, <10 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 true>, <10 x i32> poison), !tbaa [[TBAA0]]
+; AVX512F-LABEL: define void @gather_load_2(
+; AVX512F-SAME: ptr noalias captures(none) [[TMP0:%.*]], ptr noalias readonly captures(none) [[TMP1:%.*]]) #[[ATTR0]] {
+; AVX512F-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1
+; AVX512F-NEXT:    [[TMP4:%.*]] = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr [[TMP3]], i32 4, <10 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 true>, <10 x i32> poison), !tbaa [[SHORT_TBAA0]]
 ; AVX512F-NEXT:    [[TMP5:%.*]] = shufflevector <10 x i32> [[TMP4]], <10 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 9>
 ; AVX512F-NEXT:    [[TMP6:%.*]] = add nsw <4 x i32> [[TMP5]], <i32 1, i32 3, i32 4, i32 2>
 ; AVX512F-NEXT:    [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> poison, <4 x i32> <i32 0, i32 3, i32 1, i32 2>
-; AVX512F-NEXT:    store <4 x i32> [[TMP7]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
+; AVX512F-NEXT:    store <4 x i32> [[TMP7]], ptr [[TMP0]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX512F-NEXT:    ret void
 ;
-; AVX512VL-LABEL: @gather_load_2(
-; AVX512VL-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1:%.*]], i64 1
-; AVX512VL-NEXT:    [[TMP4:%.*]] = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr [[TMP3]], i32 4, <10 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 true>, <10 x i32> poison), !tbaa [[TBAA0]]
+; AVX512VL-LABEL: define void @gather_load_2(
+; AVX512VL-SAME: ptr noalias captures(none) [[TMP0:%.*]], ptr noalias readonly captures(none) [[TMP1:%.*]]) #[[ATTR0]] {
+; AVX512VL-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1
+; AVX512VL-NEXT:    [[TMP4:%.*]] = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr [[TMP3]], i32 4, <10 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 true>, <10 x i32> poison), !tbaa [[SHORT_TBAA0]]
 ; AVX512VL-NEXT:    [[TMP5:%.*]] = shufflevector <10 x i32> [[TMP4]], <10 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 9>
 ; AVX512VL-NEXT:    [[TMP6:%.*]] = add nsw <4 x i32> [[TMP5]], <i32 1, i32 3, i32 4, i32 2>
 ; AVX512VL-NEXT:    [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> poison, <4 x i32> <i32 0, i32 3, i32 1, i32 2>
-; AVX512VL-NEXT:    store <4 x i32> [[TMP7]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
+; AVX512VL-NEXT:    store <4 x i32> [[TMP7]], ptr [[TMP0]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX512VL-NEXT:    ret void
 ;
   %3 = getelementptr inbounds i32, ptr %1, i64 1
@@ -219,63 +229,65 @@ define void @gather_load_2(ptr noalias nocapture %0, ptr noalias nocapture reado
 
 
 define void @gather_load_3(ptr noalias nocapture %0, ptr noalias nocapture readonly %1) {
-; SSE-LABEL: @gather_load_3(
-; SSE-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP1:%.*]], align 4, !tbaa [[TBAA0]]
+; SSE-LABEL: define void @gather_load_3(
+; SSE-SAME: ptr noalias captures(none) [[TMP0:%.*]], ptr noalias readonly captures(none) [[TMP1:%.*]]) #[[ATTR0]] {
+; SSE-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[SHORT_TBAA0]]
 ; SSE-NEXT:    [[TMP4:%.*]] = add i32 [[TMP3]], 1
-; SSE-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 1
-; SSE-NEXT:    store i32 [[TMP4]], ptr [[TMP0]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 1
+; SSE-NEXT:    store i32 [[TMP4]], ptr [[TMP0]], align 4, !tbaa [[SHORT_TBAA0]]
 ; SSE-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 11
-; SSE-NEXT:    [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[SHORT_TBAA0]]
 ; SSE-NEXT:    [[TMP8:%.*]] = add i32 [[TMP7]], 2
 ; SSE-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 2
-; SSE-NEXT:    store i32 [[TMP8]], ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    store i32 [[TMP8]], ptr [[TMP5]], align 4, !tbaa [[SHORT_TBAA0]]
 ; SSE-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 4
-; SSE-NEXT:    [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4, !tbaa [[SHORT_TBAA0]]
 ; SSE-NEXT:    [[TMP12:%.*]] = add i32 [[TMP11]], 3
 ; SSE-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 3
-; SSE-NEXT:    store i32 [[TMP12]], ptr [[TMP9]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    store i32 [[TMP12]], ptr [[TMP9]], align 4, !tbaa [[SHORT_TBAA0]]
 ; SSE-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 15
-; SSE-NEXT:    [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4, !tbaa [[SHORT_TBAA0]]
 ; SSE-NEXT:    [[TMP16:%.*]] = add i32 [[TMP15]], 4
 ; SSE-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 4
-; SSE-NEXT:    store i32 [[TMP16]], ptr [[TMP13]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    store i32 [[TMP16]], ptr [[TMP13]], align 4, !tbaa [[SHORT_TBAA0]]
 ; SSE-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 18
-; SSE-NEXT:    [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4, !tbaa [[SHORT_TBAA0]]
 ; SSE-NEXT:    [[TMP20:%.*]] = add i32 [[TMP19]], 1
 ; SSE-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 5
-; SSE-NEXT:    store i32 [[TMP20]], ptr [[TMP17]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    store i32 [[TMP20]], ptr [[TMP17]], align 4, !tbaa [[SHORT_TBAA0]]
 ; SSE-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 9
-; SSE-NEXT:    [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4, !tbaa [[SHORT_TBAA0]]
 ; SSE-NEXT:    [[TMP24:%.*]] = add i32 [[TMP23]], 2
 ; SSE-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 6
-; SSE-NEXT:    store i32 [[TMP24]], ptr [[TMP21]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    store i32 [[TMP24]], ptr [[TMP21]], align 4, !tbaa [[SHORT_TBAA0]]
 ; SSE-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 6
-; SSE-NEXT:    [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4, !tbaa [[SHORT_TBAA0]]
 ; SSE-NEXT:    [[TMP28:%.*]] = add i32 [[TMP27]], 3
 ; SSE-NEXT:    [[TMP29:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 7
-; SSE-NEXT:    store i32 [[TMP28]], ptr [[TMP25]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    store i32 [[TMP28]], ptr [[TMP25]], align 4, !tbaa [[SHORT_TBAA0]]
 ; SSE-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 21
-; SSE-NEXT:    [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4, !tbaa [[SHORT_TBAA0]]
 ; SSE-NEXT:    [[TMP32:%.*]] = add i32 [[TMP31]], 4
-; SSE-NEXT:    store i32 [[TMP32]], ptr [[TMP29]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    store i32 [[TMP32]], ptr [[TMP29]], align 4, !tbaa [[SHORT_TBAA0]]
 ; SSE-NEXT:    ret void
 ;
-; AVX-LABEL: @gather_load_3(
-; AVX-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP1:%.*]], align 4, !tbaa [[TBAA0]]
+; AVX-LABEL: define void @gather_load_3(
+; AVX-SAME: ptr noalias captures(none) [[TMP0:%.*]], ptr noalias readonly captures(none) [[TMP1:%.*]]) #[[ATTR0]] {
+; AVX-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 11
-; AVX-NEXT:    [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 4
-; AVX-NEXT:    [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 15
-; AVX-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 18
-; AVX-NEXT:    [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 9
-; AVX-NEXT:    [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 6
-; AVX-NEXT:    [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 21
-; AVX-NEXT:    [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX-NEXT:    [[TMP18:%.*]] = insertelement <8 x i32> poison, i32 [[TMP3]], i32 0
 ; AVX-NEXT:    [[TMP19:%.*]] = insertelement <8 x i32> [[TMP18]], i32 [[TMP5]], i32 1
 ; AVX-NEXT:    [[TMP20:%.*]] = insertelement <8 x i32> [[TMP19]], i32 [[TMP7]], i32 2
@@ -285,31 +297,34 @@ define void @gather_load_3(ptr noalias nocapture %0, ptr noalias nocapture reado
 ; AVX-NEXT:    [[TMP24:%.*]] = insertelement <8 x i32> [[TMP23]], i32 [[TMP15]], i32 6
 ; AVX-NEXT:    [[TMP25:%.*]] = insertelement <8 x i32> [[TMP24]], i32 [[TMP17]], i32 7
 ; AVX-NEXT:    [[TMP26:%.*]] = add <8 x i32> [[TMP25]], <i32 1, i32 2, i32 3, i32 4, i32 1, i32 2, i32 3, i32 4>
-; AVX-NEXT:    store <8 x i32> [[TMP26]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    store <8 x i32> [[TMP26]], ptr [[TMP0]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX-NEXT:    ret void
 ;
-; AVX2-LABEL: @gather_load_3(
-; AVX2-NEXT:    [[TMP3:%.*]] = call <22 x i32> @llvm.masked.load.v22i32.p0(ptr [[TMP1:%.*]], i32 4, <22 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true>, <22 x i32> poison), !tbaa [[TBAA0]]
+; AVX2-LABEL: define void @gather_load_3(
+; AVX2-SAME: ptr noalias captures(none) [[TMP0:%.*]], ptr noalias readonly captures(none) [[TMP1:%.*]]) #[[ATTR0]] {
+; AVX2-NEXT:    [[TMP3:%.*]] = call <22 x i32> @llvm.masked.load.v22i32.p0(ptr [[TMP1]], i32 4, <22 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true>, <22 x i32> poison), !tbaa [[SHORT_TBAA0]]
 ; AVX2-NEXT:    [[TMP4:%.*]] = shufflevector <22 x i32> [[TMP3]], <22 x i32> poison, <8 x i32> <i32 0, i32 4, i32 6, i32 9, i32 11, i32 15, i32 18, i32 21>
 ; AVX2-NEXT:    [[TMP5:%.*]] = add <8 x i32> [[TMP4]], <i32 1, i32 3, i32 3, i32 2, i32 2, i32 4, i32 1, i32 4>
 ; AVX2-NEXT:    [[TMP6:%.*]] = shufflevector <8 x i32> [[TMP5]], <8 x i32> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 6, i32 3, i32 2, i32 7>
-; AVX2-NEXT:    store <8 x i32> [[TMP6]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    store <8 x i32> [[TMP6]], ptr [[TMP0]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX2-NEXT:    ret void
 ;
-; AVX512F-LABEL: @gather_load_3(
-; AVX512F-NEXT:    [[TMP3:%.*]] = call <22 x i32> @llvm.masked.load.v22i32.p0(ptr [[TMP1:%.*]], i32 4, <22 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true>, <22 x i32> poison), !tbaa [[TBAA0]]
+; AVX512F-LABEL: define void @gather_load_3(
+; AVX512F-SAME: ptr noalias captures(none) [[TMP0:%.*]], ptr noalias readonly captures(none) [[TMP1:%.*]]) #[[ATTR0]] {
+; AVX512F-NEXT:    [[TMP3:%.*]] = call <22 x i32> @llvm.masked.load.v22i32.p0(ptr [[TMP1]], i32 4, <22 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true>, <22 x i32> poison), !tbaa [[SHORT_TBAA0]]
 ; AVX512F-NEXT:    [[TMP4:%.*]] = shufflevector <22 x i32> [[TMP3]], <22 x i32> poison, <8 x i32> <i32 0, i32 4, i32 6, i32 9, i32 11, i32 15, i32 18, i32 21>
 ; AVX512F-NEXT:    [[TMP5:%.*]] = add <8 x i32> [[TMP4]], <i32 1, i32 3, i32 3, i32 2, i32 2, i32 4, i32 1, i32 4>
 ; AVX512F-NEXT:    [[TMP6:%.*]] = shufflevector <8 x i32> [[TMP5]], <8 x i32> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 6, i32 3, i32 2, i32 7>
-; AVX512F-NEXT:    store <8 x i32> [[TMP6]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
+; AVX512F-NEXT:    store <8 x i32> [[TMP6]], ptr [[TMP0]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX512F-NEXT:    ret void
 ;
-; AVX512VL-LABEL: @gather_load_3(
-; AVX512VL-NEXT:    [[TMP3:%.*]] = call <22 x i32> @llvm.masked.load.v22i32.p0(ptr [[TMP1:%.*]], i32 4, <22 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true>, <22 x i32> poison), !tbaa [[TBAA0]]
+; AVX512VL-LABEL: define void @gather_load_3(
+; AVX512VL-SAME: ptr noalias captures(none) [[TMP0:%.*]], ptr noalias readonly captures(none) [[TMP1:%.*]]) #[[ATTR0]] {
+; AVX512VL-NEXT:    [[TMP3:%.*]] = call <22 x i32> @llvm.masked.load.v22i32.p0(ptr [[TMP1]], i32 4, <22 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true>, <22 x i32> poison), !tbaa [[SHORT_TBAA0]]
 ; AVX512VL-NEXT:    [[TMP4:%.*]] = shufflevector <22 x i32> [[TMP3]], <22 x i32> poison, <8 x i32> <i32 0, i32 4, i32 6, i32 9, i32 11, i32 15, i32 18, i32 21>
 ; AVX512VL-NEXT:    [[TMP5:%.*]] = add <8 x i32> [[TMP4]], <i32 1, i32 3, i32 3, i32 2, i32 2, i32 4, i32 1, i32 4>
 ; AVX512VL-NEXT:    [[TMP6:%.*]] = shufflevector <8 x i32> [[TMP5]], <8 x i32> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 6, i32 3, i32 2, i32 7>
-; AVX512VL-NEXT:    store <8 x i32> [[TMP6]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
+; AVX512VL-NEXT:    store <8 x i32> [[TMP6]], ptr [[TMP0]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX512VL-NEXT:    ret void
 ;
   %3 = load i32, ptr %1, align 4, !tbaa !2
@@ -354,9 +369,10 @@ define void @gather_load_3(ptr noalias nocapture %0, ptr noalias nocapture reado
 }
 
 define void @gather_load_4(ptr noalias nocapture %t0, ptr noalias nocapture readonly %t1) {
-; SSE-LABEL: @gather_load_4(
-; SSE-NEXT:    [[T5:%.*]] = getelementptr inbounds i32, ptr [[T0:%.*]], i64 1
-; SSE-NEXT:    [[T6:%.*]] = getelementptr inbounds i32, ptr [[T1:%.*]], i64 11
+; SSE-LABEL: define void @gather_load_4(
+; SSE-SAME: ptr noalias captures(none) [[T0:%.*]], ptr noalias readonly captures(none) [[T1:%.*]]) #[[ATTR0]] {
+; SSE-NEXT:    [[T5:%.*]] = getelementptr inbounds i32, ptr [[T0]], i64 1
+; SSE-NEXT:    [[T6:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 11
 ; SSE-NEXT:    [[T9:%.*]] = getelementptr inbounds i32, ptr [[T0]], i64 2
 ; SSE-NEXT:    [[T10:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 4
 ; SSE-NEXT:    [[T13:%.*]] = getelementptr inbounds i32, ptr [[T0]], i64 3
@@ -369,14 +385,14 @@ define void @gather_load_4(ptr noalias nocapture %t0, ptr noalias nocapture read
 ; SSE-NEXT:    [[T26:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 6
 ; SSE-NEXT:    [[T29:%.*]] = getelementptr inbounds i32, ptr [[T0]], i64 7
 ; SSE-NEXT:    [[T30:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 21
-; SSE-NEXT:    [[T3:%.*]] = load i32, ptr [[T1]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[T7:%.*]] = load i32, ptr [[T6]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[T11:%.*]] = load i32, ptr [[T10]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[T15:%.*]] = load i32, ptr [[T14]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[T19:%.*]] = load i32, ptr [[T18]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[T23:%.*]] = load i32, ptr [[T22]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[T27:%.*]] = load i32, ptr [[T26]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[T31:%.*]] = load i32, ptr [[T30]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[T3:%.*]] = load i32, ptr [[T1]], align 4, !tbaa [[SHORT_TBAA0]]
+; SSE-NEXT:    [[T7:%.*]] = load i32, ptr [[T6]], align 4, !tbaa [[SHORT_TBAA0]]
+; SSE-NEXT:    [[T11:%.*]] = load i32, ptr [[T10]], align 4, !tbaa [[SHORT_TBAA0]]
+; SSE-NEXT:    [[T15:%.*]] = load i32, ptr [[T14]], align 4, !tbaa [[SHORT_TBAA0]]
+; SSE-NEXT:    [[T19:%.*]] = load i32, ptr [[T18]], align 4, !tbaa [[SHORT_TBAA0]]
+; SSE-NEXT:    [[T23:%.*]] = load i32, ptr [[T22]], align 4, !tbaa [[SHORT_TBAA0]]
+; SSE-NEXT:    [[T27:%.*]] = load i32, ptr [[T26]], align 4, !tbaa [[SHORT_TBAA0]]
+; SSE-NEXT:    [[T31:%.*]] = load i32, ptr [[T30]], align 4, !tbaa [[SHORT_TBAA0]]
 ; SSE-NEXT:    [[T4:%.*]] = add i32 [[T3]], 1
 ; SSE-NEXT:    [[T8:%.*]] = add i32 [[T7]], 2
 ; SSE-NEXT:    [[T12:%.*]] = add i32 [[T11]], 3
@@ -385,32 +401,33 @@ define void @gather_load_4(ptr noalias nocapture %t0, ptr noalias nocapture read
 ; SSE-NEXT:    [[T24:%.*]] = add i32 [[T23]], 2
 ; SSE-NEXT:    [[T28:%.*]] = add i32 [[T27]], 3
 ; SSE-NEXT:    [[T32:%.*]] = add i32 [[T31]], 4
-; SSE-NEXT:    store i32 [[T4]], ptr [[T0]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    store i32 [[T8]], ptr [[T5]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    store i32 [[T12]], ptr [[T9]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    store i32 [[T16]], ptr [[T13]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    store i32 [[T20]], ptr [[T17]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    store i32 [[T24]], ptr [[T21]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    store i32 [[T28]], ptr [[T25]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    store i32 [[T32]], ptr [[T29]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    store i32 [[T4]], ptr [[T0]], align 4, !tbaa [[SHORT_TBAA0]]
+; SSE-NEXT:    store i32 [[T8]], ptr [[T5]], align 4, !tbaa [[SHORT_TBAA0]]
+; SSE-NEXT:    store i32 [[T12]], ptr [[T9]], align 4, !tbaa [[SHORT_TBAA0]]
+; SSE-NEXT:    store i32 [[T16]], ptr [[T13]], align 4, !tbaa [[SHORT_TBAA0]]
+; SSE-NEXT:    store i32 [[T20]], ptr [[T17]], align 4, !tbaa [[SHORT_TBAA0]]
+; SSE-NEXT:    store i32 [[T24]], ptr [[T21]], align 4, !tbaa [[SHORT_TBAA0]]
+; SSE-NEXT:    store i32 [[T28]], ptr [[T25]], align 4, !tbaa [[SHORT_TBAA0]]
+; SSE-NEXT:    store i32 [[T32]], ptr [[T29]], align 4, !tbaa [[SHORT_TBAA0]]
 ; SSE-NEXT:    ret void
 ;
-; AVX-LABEL: @gather_load_4(
-; AVX-NEXT:    [[T6:%.*]] = getelementptr inbounds i32, ptr [[T1:%.*]], i64 11
+; AVX-LABEL: define void @gather_load_4(
+; AVX-SAME: ptr noalias captures(none) [[T0:%.*]], ptr noalias readonly captures(none) [[T1:%.*]]) #[[ATTR0]] {
+; AVX-NEXT:    [[T6:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 11
 ; AVX-NEXT:    [[T10:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 4
 ; AVX-NEXT:    [[T14:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 15
 ; AVX-NEXT:    [[T18:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 18
 ; AVX-NEXT:    [[T22:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 9
 ; AVX-NEXT:    [[T26:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 6
 ; AVX-NEXT:    [[T30:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 21
-; AVX-NEXT:    [[T3:%.*]] = load i32, ptr [[T1]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[T7:%.*]] = load i32, ptr [[T6]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[T11:%.*]] = load i32, ptr [[T10]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[T15:%.*]] = load i32, ptr [[T14]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[T19:%.*]] = load i32, ptr [[T18]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[T23:%.*]] = load i32, ptr [[T22]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[T27:%.*]] = load i32, ptr [[T26]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[T31:%.*]] = load i32, ptr [[T30]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[T3:%.*]] = load i32, ptr [[T1]], align 4, !tbaa [[SHORT_TBAA0]]
+; AVX-NEXT:    [[T7:%.*]] = load i32, ptr [[T6]], align 4, !tbaa [[SHORT_TBAA0]]
+; AVX-NEXT:    [[T11:%.*]] = load i32, ptr [[T10]], align 4, !tbaa [[SHORT_TBAA0]]
+; AVX-NEXT:    [[T15:%.*]] = load i32, ptr [[T14]], align 4, !tbaa [[SHORT_TBAA0]]
+; AVX-NEXT:    [[T19:%.*]] = load i32, ptr [[T18]], align 4, !tbaa [[SHORT_TBAA0]]
+; AVX-NEXT:    [[T23:%.*]] = load i32, ptr [[T22]], align 4, !tbaa [[SHORT_TBAA0]]
+; AVX-NEXT:    [[T27:%.*]] = load i32, ptr [[T26]], align 4, !tbaa [[SHORT_TBAA0]]
+; AVX-NEXT:    [[T31:%.*]] = load i32, ptr [[T30]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX-NEXT:    [[TMP1:%.*]] = insertelement <8 x i32> poison, i32 [[T3]], i32 0
 ; AVX-NEXT:    [[TMP2:%.*]] = insertelement <8 x i32> [[TMP1]], i32 [[T7]], i32 1
 ; AVX-NEXT:    [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[T11]], i32 2
@@ -420,31 +437,34 @@ define void @gather_load_4(ptr noalias nocapture %t0, ptr noalias nocapture read
 ; AVX-NEXT:    [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[T27]], i32 6
 ; AVX-NEXT:    [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[T31]], i32 7
 ; AVX-NEXT:    [[TMP9:%.*]] = add <8 x i32> [[TMP8]], <i32 1, i32 2, i32 3, i32 4, i32 1, i32 2, i32 3, i32 4>
-; AVX-NEXT:    store <8 x i32> [[TMP9]], ptr [[T0:%.*]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    store <8 x i32> [[TMP9]], ptr [[T0]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX-NEXT:    ret void
 ;
-; AVX2-LABEL: @gather_load_4(
-; AVX2-NEXT:    [[TMP1:%.*]] = call <22 x i32> @llvm.masked.load.v22i32.p0(ptr [[T1:%.*]], i32 4, <22 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true>, <22 x i32> poison), !tbaa [[TBAA0]]
+; AVX2-LABEL: define void @gather_load_4(
+; AVX2-SAME: ptr noalias captures(none) [[T0:%.*]], ptr noalias readonly captures(none) [[T1:%.*]]) #[[ATTR0]] {
+; AVX2-NEXT:    [[TMP1:%.*]] = call <22 x i32> @llvm.masked.load.v22i32.p0(ptr [[T1]], i32 4, <22 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true>, <22 x i32> poison), !tbaa [[SHORT_TBAA0]]
 ; AVX2-NEXT:    [[TMP2:%.*]] = shufflevector <22 x i32> [[TMP1]], <22 x i32> poison, <8 x i32> <i32 0, i32 4, i32 6, i32 9, i32 11, i32 15, i32 18, i32 21>
 ; AVX2-NEXT:    [[TMP3:%.*]] = add <8 x i32> [[TMP2]], <i32 1, i32 3, i32 3, i32 2, i32 2, i32 4, i32 1, i32 4>
 ; AVX2-NEXT:    [[TMP4:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 6, i32 3, i32 2, i32 7>
-; AVX2-NEXT:    store <8 x i32> [[TMP4]], ptr [[T0:%.*]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    store <8 x i32> [[TMP4]], ptr [[T0]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX2-NEXT:    ret void
 ;
-; AVX512F-LABEL: @gather_load_4(
-; AVX512F-NEXT:    [[TMP1:%.*]] = call <22 x i32> @llvm.masked.load.v22i32.p0(ptr [[T1:%.*]], i32 4, <22 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true>, <22 x i32> poison), !tbaa [[TBAA0]]
+; AVX512F-LABEL: define void @gather_load_4(
+; AVX512F-SAME: ptr noalias captures(none) [[T0:%.*]], ptr noalias readonly captures(none) [[T1:%.*]]) #[[ATTR0]] {
+; AVX512F-NEXT:    [[TMP1:%.*]] = call <22 x i32> @llvm.masked.load.v22i32.p0(ptr [[T1]], i32 4, <22 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true>, <22 x i32> poison), !tbaa [[SHORT_TBAA0]]
 ; AVX512F-NEXT:    [[TMP2:%.*]] = shufflevector <22 x i32> [[TMP1]], <22 x i32> poison, <8 x i32> <i32 0, i32 4, i32 6, i32 9, i32 11, i32 15, i32 18, i32 21>
 ; AVX512F-NEXT:    [[TMP3:%.*]] = add <8 x i32> [[TMP2]], <i32 1, i32 3, i32 3, i32 2, i32 2, i32 4, i32 1, i32 4>
 ; AVX512F-NEXT:    [[TMP4:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 6, i32 3, i32 2, i32 7>
-; AVX512F-NEXT:    store <8 x i32> [[TMP4]], ptr [[T0:%.*]], align 4, !tbaa [[TBAA0]]
+; AVX512F-NEXT:    store <8 x i32> [[TMP4]], ptr [[T0]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX512F-NEXT:    ret void
 ;
-; AVX512VL-LABEL: @gather_load_4(
-; AVX512VL-NEXT:    [[TMP1:%.*]] = call <22 x i32> @llvm.masked.load.v22i32.p0(ptr [[T1:%.*]], i32 4, <22 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true>, <22 x i32> poison), !tbaa [[TBAA0]]
+; AVX512VL-LABEL: define void @gather_load_4(
+; AVX512VL-SAME: ptr noalias captures(none) [[T0:%.*]], ptr noalias readonly captures(none) [[T1:%.*]]) #[[ATTR0]] {
+; AVX512VL-NEXT:    [[TMP1:%.*]] = call <22 x i32> @llvm.masked.load.v22i32.p0(ptr [[T1]], i32 4, <22 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true>, <22 x i32> poison), !tbaa [[SHORT_TBAA0]]
 ; AVX512VL-NEXT:    [[TMP2:%.*]] = shufflevector <22 x i32> [[TMP1]], <22 x i32> poison, <8 x i32> <i32 0, i32 4, i32 6, i32 9, i32 11, i32 15, i32 18, i32 21>
 ; AVX512VL-NEXT:    [[TMP3:%.*]] = add <8 x i32> [[TMP2]], <i32 1, i32 3, i32 3, i32 2, i32 2, i32 4, i32 1, i32 4>
 ; AVX512VL-NEXT:    [[TMP4:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 6, i32 3, i32 2, i32 7>
-; AVX512VL-NEXT:    store <8 x i32> [[TMP4]], ptr [[T0:%.*]], align 4, !tbaa [[TBAA0]]
+; AVX512VL-NEXT:    store <8 x i32> [[TMP4]], ptr [[T0]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX512VL-NEXT:    ret void
 ;
   %t5 = getelementptr inbounds i32, ptr %t0, i64 1
@@ -494,17 +514,18 @@ define void @gather_load_4(ptr noalias nocapture %t0, ptr noalias nocapture read
 
 
 define void @gather_load_div(ptr noalias nocapture %0, ptr noalias nocapture readonly %1) {
-; SSE-LABEL: @gather_load_div(
-; SSE-NEXT:    [[TMP3:%.*]] = load float, ptr [[TMP1:%.*]], align 4, !tbaa [[TBAA0]]
+; SSE-LABEL: define void @gather_load_div(
+; SSE-SAME: ptr noalias captures(none) [[TMP0:%.*]], ptr noalias readonly captures(none) [[TMP1:%.*]]) #[[ATTR0]] {
+; SSE-NEXT:    [[TMP3:%.*]] = load float, ptr [[TMP1]], align 4, !tbaa [[SHORT_TBAA0]]
 ; SSE-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 10
 ; SSE-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 13
 ; SSE-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 3
 ; SSE-NEXT:    [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 44
-; SSE-NEXT:    [[TMP8:%.*]] = load float, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP0:%.*]], i64 4
-; SSE-NEXT:    [[TMP10:%.*]] = load <2 x float>, ptr [[TMP6]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP11:%.*]] = load <2 x float>, ptr [[TMP4]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP12:%.*]] = load <2 x float>, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP8:%.*]] = load float, ptr [[TMP7]], align 4, !tbaa [[SHORT_TBAA0]]
+; SSE-NEXT:    [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i64 4
+; SSE-NEXT:    [[TMP10:%.*]] = load <2 x float>, ptr [[TMP6]], align 4, !tbaa [[SHORT_TBAA0]]
+; SSE-NEXT:    [[TMP11:%.*]] = load <2 x float>, ptr [[TMP4]], align 4, !tbaa [[SHORT_TBAA0]]
+; SSE-NEXT:    [[TMP12:%.*]] = load <2 x float>, ptr [[TMP5]], align 4, !tbaa [[SHORT_TBAA0]]
 ; SSE-NEXT:    [[TMP13:%.*]] = insertelement <4 x float> poison, float [[TMP3]], i32 0
 ; SSE-NEXT:    [[TMP14:%.*]] = shufflevector <2 x float> [[TMP11]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
 ; SSE-NEXT:    [[TMP15:%.*]] = shufflevector <4 x float> [[TMP13]], <4 x float> [[TMP14]], <4 x i32> <i32 0, i32 4, i32 poison, i32 poison>
@@ -516,23 +537,23 @@ define void @gather_load_div(ptr noalias nocapture %0, ptr noalias nocapture rea
 ; SSE-NEXT:    [[TMP21:%.*]] = shufflevector <4 x float> [[TMP20]], <4 x float> [[TMP14]], <4 x i32> <i32 0, i32 1, i32 5, i32 poison>
 ; SSE-NEXT:    [[TMP22:%.*]] = insertelement <4 x float> [[TMP21]], float [[TMP8]], i32 3
 ; SSE-NEXT:    [[TMP23:%.*]] = fdiv <4 x float> [[TMP19]], [[TMP22]]
-; SSE-NEXT:    store <4 x float> [[TMP23]], ptr [[TMP0]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    store <4 x float> [[TMP23]], ptr [[TMP0]], align 4, !tbaa [[SHORT_TBAA0]]
 ; SSE-NEXT:    [[TMP24:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 17
-; SSE-NEXT:    [[TMP25:%.*]] = load float, ptr [[TMP24]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP25:%.*]] = load float, ptr [[TMP24]], align 4, !tbaa [[SHORT_TBAA0]]
 ; SSE-NEXT:    [[TMP26:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 33
-; SSE-NEXT:    [[TMP27:%.*]] = load float, ptr [[TMP26]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP27:%.*]] = load float, ptr [[TMP26]], align 4, !tbaa [[SHORT_TBAA0]]
 ; SSE-NEXT:    [[TMP28:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 8
-; SSE-NEXT:    [[TMP29:%.*]] = load float, ptr [[TMP28]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP29:%.*]] = load float, ptr [[TMP28]], align 4, !tbaa [[SHORT_TBAA0]]
 ; SSE-NEXT:    [[TMP30:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 30
-; SSE-NEXT:    [[TMP31:%.*]] = load float, ptr [[TMP30]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP31:%.*]] = load float, ptr [[TMP30]], align 4, !tbaa [[SHORT_TBAA0]]
 ; SSE-NEXT:    [[TMP32:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 5
-; SSE-NEXT:    [[TMP33:%.*]] = load float, ptr [[TMP32]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP33:%.*]] = load float, ptr [[TMP32]], align 4, !tbaa [[SHORT_TBAA0]]
 ; SSE-NEXT:    [[TMP34:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 27
-; SSE-NEXT:    [[TMP35:%.*]] = load float, ptr [[TMP34]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP35:%.*]] = load float, ptr [[TMP34]], align 4, !tbaa [[SHORT_TBAA0]]
 ; SSE-NEXT:    [[TMP36:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 20
-; SSE-NEXT:    [[TMP37:%.*]] = load float, ptr [[TMP36]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP37:%.*]] = load float, ptr [[TMP36]], align 4, !tbaa [[SHORT_TBAA0]]
 ; SSE-NEXT:    [[TMP38:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 23
-; SSE-NEXT:    [[TMP39:%.*]] = load float, ptr [[TMP38]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP39:%.*]] = load float, ptr [[TMP38]], align 4, !tbaa [[SHORT_TBAA0]]
 ; SSE-NEXT:    [[TMP40:%.*]] = insertelement <4 x float> poison, float [[TMP25]], i32 0
 ; SSE-NEXT:    [[TMP41:%.*]] = insertelement <4 x float> [[TMP40]], float [[TMP29]], i32 1
 ; SSE-NEXT:    [[TMP42:%.*]] = insertelement <4 x float> [[TMP41]], float [[TMP33]], i32 2
@@ -542,35 +563,36 @@ define void @gather_load_div(ptr noalias nocapture %0, ptr noalias nocapture rea
 ; SSE-NEXT:    [[TMP46:%.*]] = insertelement <4 x float> [[TMP45]], float [[TMP35]], i32 2
 ; SSE-NEXT:    [[TMP47:%.*]] = insertelement <4 x float> [[TMP46]], float [[TMP39]], i32 3
 ; SSE-NEXT:    [[TMP48:%.*]] = fdiv <4 x float> [[TMP43]], [[TMP47]]
-; SSE-NEXT:    store <4 x float> [[TMP48]], ptr [[TMP9]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    store <4 x float> [[TMP48]], ptr [[TMP9]], align 4, !tbaa [[SHORT_TBAA0]]
 ; SSE-NEXT:    ret void
 ;
-; AVX-LABEL: @gather_load_div(
-; AVX-NEXT:    [[TMP3:%.*]] = load float, ptr [[TMP1:%.*]], align 4, !tbaa [[TBAA0]]
+; AVX-LABEL: define void @gather_load_div(
+; AVX-SAME: ptr noalias captures(none) [[TMP0:%.*]], ptr noalias readonly captures(none) [[TMP1:%.*]]) #[[ATTR0]] {
+; AVX-NEXT:    [[TMP3:%.*]] = load float, ptr [[TMP1]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 10
 ; AVX-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 13
 ; AVX-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 3
 ; AVX-NEXT:    [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 44
-; AVX-NEXT:    [[TMP8:%.*]] = load float, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP8:%.*]] = load float, ptr [[TMP7]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX-NEXT:    [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 17
-; AVX-NEXT:    [[TMP10:%.*]] = load float, ptr [[TMP9]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP10:%.*]] = load float, ptr [[TMP9]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX-NEXT:    [[TMP11:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 33
-; AVX-NEXT:    [[TMP12:%.*]] = load float, ptr [[TMP11]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP12:%.*]] = load float, ptr [[TMP11]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX-NEXT:    [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 8
-; AVX-NEXT:    [[TMP14:%.*]] = load float, ptr [[TMP13]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP14:%.*]] = load float, ptr [[TMP13]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX-NEXT:    [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 30
-; AVX-NEXT:    [[TMP16:%.*]] = load float, ptr [[TMP15]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP16:%.*]] = load float, ptr [[TMP15]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX-NEXT:    [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 5
-; AVX-NEXT:    [[TMP18:%.*]] = load float, ptr [[TMP17]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP18:%.*]] = load float, ptr [[TMP17]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX-NEXT:    [[TMP19:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 27
-; AVX-NEXT:    [[TMP20:%.*]] = load float, ptr [[TMP19]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP20:%.*]] = load float, ptr [[TMP19]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX-NEXT:    [[TMP21:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 20
-; AVX-NEXT:    [[TMP22:%.*]] = load float, ptr [[TMP21]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP22:%.*]] = load float, ptr [[TMP21]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX-NEXT:    [[TMP23:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 23
-; AVX-NEXT:    [[TMP24:%.*]] = load float, ptr [[TMP23]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP25:%.*]] = load <2 x float>, ptr [[TMP6]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP26:%.*]] = load <2 x float>, ptr [[TMP4]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP27:%.*]] = load <2 x float>, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP24:%.*]] = load float, ptr [[TMP23]], align 4, !tbaa [[SHORT_TBAA0]]
+; AVX-NEXT:    [[TMP25:%.*]] = load <2 x float>, ptr [[TMP6]], align 4, !tbaa [[SHORT_TBAA0]]
+; AVX-NEXT:    [[TMP26:%.*]] = load <2 x float>, ptr [[TMP4]], align 4, !tbaa [[SHORT_TBAA0]]
+; AVX-NEXT:    [[TMP27:%.*]] = load <2 x float>, ptr [[TMP5]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX-NEXT:    [[TMP28:%.*]] = insertelement <8 x float> poison, float [[TMP3]], i32 0
 ; AVX-NEXT:    [[TMP29:%.*]] = shufflevector <2 x float> [[TMP26]], <2 x float> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; AVX-NEXT:    [[TMP30:%.*]] = shufflevector <8 x float> [[TMP28]], <8 x float> [[TMP29]], <8 x i32> <i32 0, i32 8, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
@@ -590,35 +612,36 @@ define void @gather_load_div(ptr noalias nocapture %0, ptr noalias nocapture rea
 ; AVX-NEXT:    [[TMP44:%.*]] = insertelement <8 x float> [[TMP43]], float [[TMP20]], i32 6
 ; AVX-NEXT:    [[TMP45:%.*]] = insertelement <8 x float> [[TMP44]], float [[TMP24]], i32 7
 ; AVX-NEXT:    [[TMP46:%.*]] = fdiv <8 x float> [[TMP38]], [[TMP45]]
-; AVX-NEXT:    store <8 x float> [[TMP46]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    store <8 x float> [[TMP46]], ptr [[TMP0]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX-NEXT:    ret void
 ;
-; AVX2-LABEL: @gather_load_div(
-; AVX2-NEXT:    [[TMP3:%.*]] = load float, ptr [[TMP1:%.*]], align 4, !tbaa [[TBAA0]]
+; AVX2-LABEL: define void @gather_load_div(
+; AVX2-SAME: ptr noalias captures(none) [[TMP0:%.*]], ptr noalias readonly captures(none) [[TMP1:%.*]]) #[[ATTR0]] {
+; AVX2-NEXT:    [[TMP3:%.*]] = load float, ptr [[TMP1]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 10
 ; AVX2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 13
 ; AVX2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 3
 ; AVX2-NEXT:    [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 44
-; AVX2-NEXT:    [[TMP8:%.*]] = load float, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP8:%.*]] = load float, ptr [[TMP7]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 17
-; AVX2-NEXT:    [[TMP10:%.*]] = load float, ptr [[TMP9]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP10:%.*]] = load float, ptr [[TMP9]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX2-NEXT:    [[TMP11:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 33
-; AVX2-NEXT:    [[TMP12:%.*]] = load float, ptr [[TMP11]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP12:%.*]] = load float, ptr [[TMP11]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX2-NEXT:    [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 8
-; AVX2-NEXT:    [[TMP14:%.*]] = load float, ptr [[TMP13]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP14:%.*]] = load float, ptr [[TMP13]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX2-NEXT:    [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 30
-; AVX2-NEXT:    [[TMP16:%.*]] = load float, ptr [[TMP15]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP16:%.*]] = load float, ptr [[TMP15]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX2-NEXT:    [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 5
-; AVX2-NEXT:    [[TMP18:%.*]] = load float, ptr [[TMP17]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP18:%.*]] = load float, ptr [[TMP17]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX2-NEXT:    [[TMP19:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 27
-; AVX2-NEXT:    [[TMP20:%.*]] = load float, ptr [[TMP19]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP20:%.*]] = load float, ptr [[TMP19]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX2-NEXT:    [[TMP21:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 20
-; AVX2-NEXT:    [[TMP22:%.*]] = load float, ptr [[TMP21]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP22:%.*]] = load float, ptr [[TMP21]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX2-NEXT:    [[TMP23:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 23
-; AVX2-NEXT:    [[TMP24:%.*]] = load float, ptr [[TMP23]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP25:%.*]] = load <2 x float>, ptr [[TMP6]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP26:%.*]] = load <2 x float>, ptr [[TMP4]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP27:%.*]] = load <2 x float>, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP24:%.*]] = load float, ptr [[TMP23]], align 4, !tbaa [[SHORT_TBAA0]]
+; AVX2-NEXT:    [[TMP25:%.*]] = load <2 x float>, ptr [[TMP6]], align 4, !tbaa [[SHORT_TBAA0]]
+; AVX2-NEXT:    [[TMP26:%.*]] = load <2 x float>, ptr [[TMP4]], align 4, !tbaa [[SHORT_TBAA0]]
+; AVX2-NEXT:    [[TMP27:%.*]] = load <2 x float>, ptr [[TMP5]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX2-NEXT:    [[TMP28:%.*]] = insertelement <8 x float> poison, float [[TMP3]], i32 0
 ; AVX2-NEXT:    [[TMP29:%.*]] = shufflevector <2 x float> [[TMP26]], <2 x float> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; AVX2-NEXT:    [[TMP30:%.*]] = shufflevector <8 x float> [[TMP28]], <8 x float> [[TMP29]], <8 x i32> <i32 0, i32 8, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
@@ -638,27 +661,29 @@ define void @gather_load_div(ptr noalias nocapture %0, ptr noalias nocapture rea
 ; AVX2-NEXT:    [[TMP44:%.*]] = insertelement <8 x float> [[TMP43]], float [[TMP20]], i32 6
 ; AVX2-NEXT:    [[TMP45:%.*]] = insertelement <8 x float> [[TMP44]], float [[TMP24]], i32 7
 ; AVX2-NEXT:    [[TMP46:%.*]] = fdiv <8 x float> [[TMP38]], [[TMP45]]
-; AVX2-NEXT:    store <8 x float> [[TMP46]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    store <8 x float> [[TMP46]], ptr [[TMP0]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX2-NEXT:    ret void
 ;
-; AVX512F-LABEL: @gather_load_div(
-; AVX512F-NEXT:    [[TMP3:%.*]] = call <45 x float> @llvm.masked.load.v45f32.p0(ptr [[TMP1:%.*]], i32 4, <45 x i1> <i1 true, i1 false, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true>, <45 x float> poison), !tbaa [[TBAA0]]
+; AVX512F-LABEL: define void @gather_load_div(
+; AVX512F-SAME: ptr noalias captures(none) [[TMP0:%.*]], ptr noalias readonly captures(none) [[TMP1:%.*]]) #[[ATTR0]] {
+; AVX512F-NEXT:    [[TMP3:%.*]] = call <45 x float> @llvm.masked.load.v45f32.p0(ptr [[TMP1]], i32 4, <45 x i1> <i1 true, i1 false, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true>, <45 x float> poison), !tbaa [[SHORT_TBAA0]]
 ; AVX512F-NEXT:    [[TMP4:%.*]] = shufflevector <45 x float> [[TMP3]], <45 x float> poison, <16 x i32> <i32 0, i32 3, i32 4, i32 5, i32 8, i32 10, i32 11, i32 13, i32 14, i32 17, i32 20, i32 23, i32 27, i32 30, i32 33, i32 44>
 ; AVX512F-NEXT:    [[TMP7:%.*]] = shufflevector <45 x float> [[TMP3]], <45 x float> poison, <8 x i32> <i32 0, i32 3, i32 5, i32 8, i32 10, i32 14, i32 17, i32 20>
 ; AVX512F-NEXT:    [[TMP8:%.*]] = shufflevector <45 x float> [[TMP3]], <45 x float> poison, <8 x i32> <i32 4, i32 11, i32 27, i32 30, i32 13, i32 44, i32 33, i32 23>
 ; AVX512F-NEXT:    [[TMP9:%.*]] = fdiv <8 x float> [[TMP7]], [[TMP8]]
 ; AVX512F-NEXT:    [[TMP10:%.*]] = shufflevector <8 x float> [[TMP9]], <8 x float> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 6, i32 3, i32 2, i32 7>
-; AVX512F-NEXT:    store <8 x float> [[TMP10]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
+; AVX512F-NEXT:    store <8 x float> [[TMP10]], ptr [[TMP0]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX512F-NEXT:    ret void
 ;
-; AVX512VL-LABEL: @gather_load_div(
-; AVX512VL-NEXT:    [[TMP3:%.*]] = call <45 x float> @llvm.masked.load.v45f32.p0(ptr [[TMP1:%.*]], i32 4, <45 x i1> <i1 true, i1 false, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true>, <45 x float> poison), !tbaa [[TBAA0]]
+; AVX512VL-LABEL: define void @gather_load_div(
+; AVX512VL-SAME: ptr noalias captures(none) [[TMP0:%.*]], ptr noalias readonly captures(none) [[TMP1:%.*]]) #[[ATTR0]] {
+; AVX512VL-NEXT:    [[TMP3:%.*]] = call <45 x float> @llvm.masked.load.v45f32.p0(ptr [[TMP1]], i32 4, <45 x i1> <i1 true, i1 false, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true>, <45 x float> poison), !tbaa [[SHORT_TBAA0]]
 ; AVX512VL-NEXT:    [[TMP4:%.*]] = shufflevector <45 x float> [[TMP3]], <45 x float> poison, <16 x i32> <i32 0, i32 3, i32 4, i32 5, i32 8, i32 10, i32 11, i32 13, i32 14, i32 17, i32 20, i32 23, i32 27, i32 30, i32 33, i32 44>
 ; AVX512VL-NEXT:    [[TMP7:%.*]] = shufflevector <45 x float> [[TMP3]], <45 x float> poison, <8 x i32> <i32 0, i32 3, i32 5, i32 8, i32 10, i32 14, i32 17, i32 20>
 ; AVX512VL-NEXT:    [[TMP8:%.*]] = shufflevector <45 x float> [[TMP3]], <45 x float> poison, <8 x i32> <i32 4, i32 11, i32 27, i32 30, i32 13, i32 44, i32 33, i32 23>
 ; AVX512VL-NEXT:    [[TMP9:%.*]] = fdiv <8 x float> [[TMP7]], [[TMP8]]
 ; AVX512VL-NEXT:    [[TMP10:%.*]] = shufflevector <8 x float> [[TMP9]], <8 x float> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 6, i32 3, i32 2, i32 7>
-; AVX512VL-NEXT:    store <8 x float> [[TMP10]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
+; AVX512VL-NEXT:    store <8 x float> [[TMP10]], ptr [[TMP0]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX512VL-NEXT:    ret void
 ;
   %3 = load float, ptr %1, align 4, !tbaa !2
@@ -722,3 +747,29 @@ define void @gather_load_div(ptr noalias nocapture %0, ptr noalias nocapture rea
 !3 = !{!"short", !4, i64 0}
 !4 = !{!"omnipotent char", !5, i64 0}
 !5 = !{!"Simple C++ TBAA"}
+;.
+; SSE: [[SHORT_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
+; SSE: [[META1]] = !{!"short", [[META2:![0-9]+]], i64 0}
+; SSE: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0}
+; SSE: [[META3]] = !{!"Simple C++ TBAA"}
+;.
+; AVX: [[SHORT_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
+; AVX: [[META1]] = !{!"short", [[META2:![0-9]+]], i64 0}
+; AVX: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0}
+; AVX: [[META3]] = !{!"Simple C++ TBAA"}
+;.
+; AVX2: [[SHORT_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
+; AVX2: [[META1]] = !{!"short", [[META2:![0-9]+]], i64 0}
+; AVX2: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0}
+; AVX2: [[META3]] = !{!"Simple C++ TBAA"}
+;.
+; AVX512F: [[SHORT_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
+; AVX512F: [[META1]] = !{!"short", [[META2:![0-9]+]], i64 0}
+; AVX512F: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0}
+; AVX512F: [[META3]] = !{!"Simple C++ TBAA"}
+;.
+; AVX512VL: [[SHORT_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
+; AVX512VL: [[META1]] = !{!"short", [[META2:![0-9]+]], i64 0}
+; AVX512VL: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0}
+; AVX512VL: [[META3]] = !{!"Simple C++ TBAA"}
+;.
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr47629.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr47629.ll
index bfa3610..cf380f0 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/pr47629.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/pr47629.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN:  opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux -mattr=+sse2     | FileCheck %s --check-prefixes=SSE
 ; RUN:  opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux -mattr=+avx      | FileCheck %s --check-prefixes=AVX
 ; RUN:  opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux -mattr=+avx2     | FileCheck %s --check-prefixes=AVX2
@@ -6,99 +6,104 @@
 ; RUN:  opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux -mattr=+avx512vl | FileCheck %s --check-prefixes=AVX512VL
 
 define void @gather_load(ptr noalias nocapture %0, ptr noalias nocapture readonly %1) {
-; SSE-LABEL: @gather_load(
-; SSE-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1:%.*]], i64 1
-; SSE-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]]
-; SSE-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 1
+; SSE-LABEL: define void @gather_load(
+; SSE-SAME: ptr noalias captures(none) [[TMP0:%.*]], ptr noalias readonly captures(none) [[TMP1:%.*]]) #[[ATTR0:[0-9]+]] {
+; SSE-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1
+; SSE-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[SHORT_TBAA0:![0-9]+]]
+; SSE-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 1
 ; SSE-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 11
-; SSE-NEXT:    [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[SHORT_TBAA0]]
 ; SSE-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 2
 ; SSE-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 4
-; SSE-NEXT:    [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[SHORT_TBAA0]]
 ; SSE-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 3
-; SSE-NEXT:    [[TMP12:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP12:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[SHORT_TBAA0]]
 ; SSE-NEXT:    [[TMP16:%.*]] = insertelement <4 x i32> undef, i32 [[TMP4]], i32 0
 ; SSE-NEXT:    [[TMP17:%.*]] = insertelement <4 x i32> [[TMP16]], i32 [[TMP7]], i32 1
 ; SSE-NEXT:    [[TMP15:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP10]], i32 2
 ; SSE-NEXT:    [[TMP13:%.*]] = insertelement <4 x i32> [[TMP15]], i32 [[TMP12]], i32 3
 ; SSE-NEXT:    [[TMP14:%.*]] = add nsw <4 x i32> [[TMP13]], <i32 1, i32 2, i32 3, i32 4>
-; SSE-NEXT:    store <4 x i32> [[TMP14]], ptr [[TMP0]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    store <4 x i32> [[TMP14]], ptr [[TMP0]], align 4, !tbaa [[SHORT_TBAA0]]
 ; SSE-NEXT:    ret void
 ;
-; AVX-LABEL: @gather_load(
-; AVX-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1:%.*]], i64 1
-; AVX-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]]
-; AVX-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 1
+; AVX-LABEL: define void @gather_load(
+; AVX-SAME: ptr noalias captures(none) [[TMP0:%.*]], ptr noalias readonly captures(none) [[TMP1:%.*]]) #[[ATTR0:[0-9]+]] {
+; AVX-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1
+; AVX-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[SHORT_TBAA0:![0-9]+]]
+; AVX-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 1
 ; AVX-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 11
-; AVX-NEXT:    [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 2
 ; AVX-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 4
-; AVX-NEXT:    [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 3
-; AVX-NEXT:    [[TMP12:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP12:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX-NEXT:    [[TMP16:%.*]] = insertelement <4 x i32> undef, i32 [[TMP4]], i32 0
 ; AVX-NEXT:    [[TMP17:%.*]] = insertelement <4 x i32> [[TMP16]], i32 [[TMP7]], i32 1
 ; AVX-NEXT:    [[TMP15:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP10]], i32 2
 ; AVX-NEXT:    [[TMP13:%.*]] = insertelement <4 x i32> [[TMP15]], i32 [[TMP12]], i32 3
 ; AVX-NEXT:    [[TMP14:%.*]] = add nsw <4 x i32> [[TMP13]], <i32 1, i32 2, i32 3, i32 4>
-; AVX-NEXT:    store <4 x i32> [[TMP14]], ptr [[TMP0]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    store <4 x i32> [[TMP14]], ptr [[TMP0]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX-NEXT:    ret void
 ;
-; AVX2-LABEL: @gather_load(
-; AVX2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1:%.*]], i64 1
-; AVX2-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]]
-; AVX2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 1
+; AVX2-LABEL: define void @gather_load(
+; AVX2-SAME: ptr noalias captures(none) [[TMP0:%.*]], ptr noalias readonly captures(none) [[TMP1:%.*]]) #[[ATTR0:[0-9]+]] {
+; AVX2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1
+; AVX2-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[SHORT_TBAA0:![0-9]+]]
+; AVX2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 1
 ; AVX2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 11
-; AVX2-NEXT:    [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX2-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 2
 ; AVX2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 4
-; AVX2-NEXT:    [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX2-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 3
-; AVX2-NEXT:    [[TMP12:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP12:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX2-NEXT:    [[TMP16:%.*]] = insertelement <4 x i32> undef, i32 [[TMP4]], i32 0
 ; AVX2-NEXT:    [[TMP17:%.*]] = insertelement <4 x i32> [[TMP16]], i32 [[TMP7]], i32 1
 ; AVX2-NEXT:    [[TMP15:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP10]], i32 2
 ; AVX2-NEXT:    [[TMP13:%.*]] = insertelement <4 x i32> [[TMP15]], i32 [[TMP12]], i32 3
 ; AVX2-NEXT:    [[TMP14:%.*]] = add nsw <4 x i32> [[TMP13]], <i32 1, i32 2, i32 3, i32 4>
-; AVX2-NEXT:    store <4 x i32> [[TMP14]], ptr [[TMP0]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    store <4 x i32> [[TMP14]], ptr [[TMP0]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX2-NEXT:    ret void
 ;
-; AVX512F-LABEL: @gather_load(
-; AVX512F-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1:%.*]], i64 1
-; AVX512F-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]]
-; AVX512F-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 1
+; AVX512F-LABEL: define void @gather_load(
+; AVX512F-SAME: ptr noalias captures(none) [[TMP0:%.*]], ptr noalias readonly captures(none) [[TMP1:%.*]]) #[[ATTR0:[0-9]+]] {
+; AVX512F-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1
+; AVX512F-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[SHORT_TBAA0:![0-9]+]]
+; AVX512F-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 1
 ; AVX512F-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 11
-; AVX512F-NEXT:    [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[TBAA0]]
+; AVX512F-NEXT:    [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX512F-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 2
 ; AVX512F-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 4
-; AVX512F-NEXT:    [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[TBAA0]]
+; AVX512F-NEXT:    [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX512F-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 3
-; AVX512F-NEXT:    [[TMP12:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
+; AVX512F-NEXT:    [[TMP12:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX512F-NEXT:    [[TMP16:%.*]] = insertelement <4 x i32> undef, i32 [[TMP4]], i32 0
 ; AVX512F-NEXT:    [[TMP17:%.*]] = insertelement <4 x i32> [[TMP16]], i32 [[TMP7]], i32 1
 ; AVX512F-NEXT:    [[TMP15:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP10]], i32 2
 ; AVX512F-NEXT:    [[TMP13:%.*]] = insertelement <4 x i32> [[TMP15]], i32 [[TMP12]], i32 3
 ; AVX512F-NEXT:    [[TMP14:%.*]] = add nsw <4 x i32> [[TMP13]], <i32 1, i32 2, i32 3, i32 4>
-; AVX512F-NEXT:    store <4 x i32> [[TMP14]], ptr [[TMP0]], align 4, !tbaa [[TBAA0]]
+; AVX512F-NEXT:    store <4 x i32> [[TMP14]], ptr [[TMP0]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX512F-NEXT:    ret void
 ;
-; AVX512VL-LABEL: @gather_load(
-; AVX512VL-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1:%.*]], i64 1
-; AVX512VL-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]]
-; AVX512VL-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 1
+; AVX512VL-LABEL: define void @gather_load(
+; AVX512VL-SAME: ptr noalias captures(none) [[TMP0:%.*]], ptr noalias readonly captures(none) [[TMP1:%.*]]) #[[ATTR0:[0-9]+]] {
+; AVX512VL-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1
+; AVX512VL-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[SHORT_TBAA0:![0-9]+]]
+; AVX512VL-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 1
 ; AVX512VL-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 11
-; AVX512VL-NEXT:    [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[TBAA0]]
+; AVX512VL-NEXT:    [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX512VL-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 2
 ; AVX512VL-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 4
-; AVX512VL-NEXT:    [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[TBAA0]]
+; AVX512VL-NEXT:    [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX512VL-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 3
-; AVX512VL-NEXT:    [[TMP12:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
+; AVX512VL-NEXT:    [[TMP12:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX512VL-NEXT:    [[TMP16:%.*]] = insertelement <4 x i32> undef, i32 [[TMP4]], i32 0
 ; AVX512VL-NEXT:    [[TMP17:%.*]] = insertelement <4 x i32> [[TMP16]], i32 [[TMP7]], i32 1
 ; AVX512VL-NEXT:    [[TMP15:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP10]], i32 2
 ; AVX512VL-NEXT:    [[TMP13:%.*]] = insertelement <4 x i32> [[TMP15]], i32 [[TMP12]], i32 3
 ; AVX512VL-NEXT:    [[TMP14:%.*]] = add nsw <4 x i32> [[TMP13]], <i32 1, i32 2, i32 3, i32 4>
-; AVX512VL-NEXT:    store <4 x i32> [[TMP14]], ptr [[TMP0]], align 4, !tbaa [[TBAA0]]
+; AVX512VL-NEXT:    store <4 x i32> [[TMP14]], ptr [[TMP0]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX512VL-NEXT:    ret void
 ;
   %3 = getelementptr inbounds i32, ptr %1, i64 1
@@ -121,78 +126,83 @@ define void @gather_load(ptr noalias nocapture %0, ptr noalias nocapture readonl
 }
 
 define void @gather_load_2(ptr noalias nocapture %0, ptr noalias nocapture readonly %1) {
-; SSE-LABEL: @gather_load_2(
-; SSE-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1:%.*]], i64 1
-; SSE-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
+; SSE-LABEL: define void @gather_load_2(
+; SSE-SAME: ptr noalias captures(none) [[TMP0:%.*]], ptr noalias readonly captures(none) [[TMP1:%.*]]) #[[ATTR0]] {
+; SSE-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1
+; SSE-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[SHORT_TBAA0]]
 ; SSE-NEXT:    [[TMP5:%.*]] = add nsw i32 [[TMP4]], 1
-; SSE-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 1
-; SSE-NEXT:    store i32 [[TMP5]], ptr [[TMP0]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 1
+; SSE-NEXT:    store i32 [[TMP5]], ptr [[TMP0]], align 4, !tbaa [[SHORT_TBAA0]]
 ; SSE-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 10
-; SSE-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[SHORT_TBAA0]]
 ; SSE-NEXT:    [[TMP9:%.*]] = add nsw i32 [[TMP8]], 2
 ; SSE-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 2
-; SSE-NEXT:    store i32 [[TMP9]], ptr [[TMP6]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    store i32 [[TMP9]], ptr [[TMP6]], align 4, !tbaa [[SHORT_TBAA0]]
 ; SSE-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 3
-; SSE-NEXT:    [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4, !tbaa [[SHORT_TBAA0]]
 ; SSE-NEXT:    [[TMP13:%.*]] = add nsw i32 [[TMP12]], 3
 ; SSE-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 3
-; SSE-NEXT:    store i32 [[TMP13]], ptr [[TMP10]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    store i32 [[TMP13]], ptr [[TMP10]], align 4, !tbaa [[SHORT_TBAA0]]
 ; SSE-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 5
-; SSE-NEXT:    [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4, !tbaa [[SHORT_TBAA0]]
 ; SSE-NEXT:    [[TMP17:%.*]] = add nsw i32 [[TMP16]], 4
-; SSE-NEXT:    store i32 [[TMP17]], ptr [[TMP14]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    store i32 [[TMP17]], ptr [[TMP14]], align 4, !tbaa [[SHORT_TBAA0]]
 ; SSE-NEXT:    ret void
 ;
-; AVX-LABEL: @gather_load_2(
-; AVX-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1:%.*]], i64 1
-; AVX-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
+; AVX-LABEL: define void @gather_load_2(
+; AVX-SAME: ptr noalias captures(none) [[TMP0:%.*]], ptr noalias readonly captures(none) [[TMP1:%.*]]) #[[ATTR0]] {
+; AVX-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1
+; AVX-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 10
-; AVX-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 3
-; AVX-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 5
-; AVX-NEXT:    [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX-NEXT:    [[TMP11:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i32 0
 ; AVX-NEXT:    [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP6]], i32 1
 ; AVX-NEXT:    [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP8]], i32 2
 ; AVX-NEXT:    [[TMP14:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP10]], i32 3
 ; AVX-NEXT:    [[TMP15:%.*]] = add nsw <4 x i32> [[TMP14]], <i32 1, i32 2, i32 3, i32 4>
-; AVX-NEXT:    store <4 x i32> [[TMP15]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    store <4 x i32> [[TMP15]], ptr [[TMP0]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX-NEXT:    ret void
 ;
-; AVX2-LABEL: @gather_load_2(
-; AVX2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1:%.*]], i64 1
-; AVX2-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
+; AVX2-LABEL: define void @gather_load_2(
+; AVX2-SAME: ptr noalias captures(none) [[TMP0:%.*]], ptr noalias readonly captures(none) [[TMP1:%.*]]) #[[ATTR0]] {
+; AVX2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1
+; AVX2-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 10
-; AVX2-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX2-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 3
-; AVX2-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 5
-; AVX2-NEXT:    [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX2-NEXT:    [[TMP11:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i32 0
 ; AVX2-NEXT:    [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP6]], i32 1
 ; AVX2-NEXT:    [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP8]], i32 2
 ; AVX2-NEXT:    [[TMP14:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP10]], i32 3
 ; AVX2-NEXT:    [[TMP15:%.*]] = add nsw <4 x i32> [[TMP14]], <i32 1, i32 2, i32 3, i32 4>
-; AVX2-NEXT:    store <4 x i32> [[TMP15]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    store <4 x i32> [[TMP15]], ptr [[TMP0]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX2-NEXT:    ret void
 ;
-; AVX512F-LABEL: @gather_load_2(
-; AVX512F-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1:%.*]], i64 1
-; AVX512F-NEXT:    [[TMP4:%.*]] = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr [[TMP3]], i32 4, <10 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 true>, <10 x i32> poison), !tbaa [[TBAA0]]
+; AVX512F-LABEL: define void @gather_load_2(
+; AVX512F-SAME: ptr noalias captures(none) [[TMP0:%.*]], ptr noalias readonly captures(none) [[TMP1:%.*]]) #[[ATTR0]] {
+; AVX512F-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1
+; AVX512F-NEXT:    [[TMP4:%.*]] = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr [[TMP3]], i32 4, <10 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 true>, <10 x i32> poison), !tbaa [[SHORT_TBAA0]]
 ; AVX512F-NEXT:    [[TMP5:%.*]] = shufflevector <10 x i32> [[TMP4]], <10 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 9>
 ; AVX512F-NEXT:    [[TMP6:%.*]] = add nsw <4 x i32> [[TMP5]], <i32 1, i32 3, i32 4, i32 2>
 ; AVX512F-NEXT:    [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> poison, <4 x i32> <i32 0, i32 3, i32 1, i32 2>
-; AVX512F-NEXT:    store <4 x i32> [[TMP7]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
+; AVX512F-NEXT:    store <4 x i32> [[TMP7]], ptr [[TMP0]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX512F-NEXT:    ret void
 ;
-; AVX512VL-LABEL: @gather_load_2(
-; AVX512VL-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1:%.*]], i64 1
-; AVX512VL-NEXT:    [[TMP4:%.*]] = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr [[TMP3]], i32 4, <10 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 true>, <10 x i32> poison), !tbaa [[TBAA0]]
+; AVX512VL-LABEL: define void @gather_load_2(
+; AVX512VL-SAME: ptr noalias captures(none) [[TMP0:%.*]], ptr noalias readonly captures(none) [[TMP1:%.*]]) #[[ATTR0]] {
+; AVX512VL-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1
+; AVX512VL-NEXT:    [[TMP4:%.*]] = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr [[TMP3]], i32 4, <10 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 true>, <10 x i32> poison), !tbaa [[SHORT_TBAA0]]
 ; AVX512VL-NEXT:    [[TMP5:%.*]] = shufflevector <10 x i32> [[TMP4]], <10 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 9>
 ; AVX512VL-NEXT:    [[TMP6:%.*]] = add nsw <4 x i32> [[TMP5]], <i32 1, i32 3, i32 4, i32 2>
 ; AVX512VL-NEXT:    [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> poison, <4 x i32> <i32 0, i32 3, i32 1, i32 2>
-; AVX512VL-NEXT:    store <4 x i32> [[TMP7]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
+; AVX512VL-NEXT:    store <4 x i32> [[TMP7]], ptr [[TMP0]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX512VL-NEXT:    ret void
 ;
   %3 = getelementptr inbounds i32, ptr %1, i64 1
@@ -219,63 +229,65 @@ define void @gather_load_2(ptr noalias nocapture %0, ptr noalias nocapture reado
 
 
 define void @gather_load_3(ptr noalias nocapture %0, ptr noalias nocapture readonly %1) {
-; SSE-LABEL: @gather_load_3(
-; SSE-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP1:%.*]], align 4, !tbaa [[TBAA0]]
+; SSE-LABEL: define void @gather_load_3(
+; SSE-SAME: ptr noalias captures(none) [[TMP0:%.*]], ptr noalias readonly captures(none) [[TMP1:%.*]]) #[[ATTR0]] {
+; SSE-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[SHORT_TBAA0]]
 ; SSE-NEXT:    [[TMP4:%.*]] = add i32 [[TMP3]], 1
-; SSE-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 1
-; SSE-NEXT:    store i32 [[TMP4]], ptr [[TMP0]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 1
+; SSE-NEXT:    store i32 [[TMP4]], ptr [[TMP0]], align 4, !tbaa [[SHORT_TBAA0]]
 ; SSE-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 11
-; SSE-NEXT:    [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[SHORT_TBAA0]]
 ; SSE-NEXT:    [[TMP8:%.*]] = add i32 [[TMP7]], 2
 ; SSE-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 2
-; SSE-NEXT:    store i32 [[TMP8]], ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    store i32 [[TMP8]], ptr [[TMP5]], align 4, !tbaa [[SHORT_TBAA0]]
 ; SSE-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 4
-; SSE-NEXT:    [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4, !tbaa [[SHORT_TBAA0]]
 ; SSE-NEXT:    [[TMP12:%.*]] = add i32 [[TMP11]], 3
 ; SSE-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 3
-; SSE-NEXT:    store i32 [[TMP12]], ptr [[TMP9]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    store i32 [[TMP12]], ptr [[TMP9]], align 4, !tbaa [[SHORT_TBAA0]]
 ; SSE-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 15
-; SSE-NEXT:    [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4, !tbaa [[SHORT_TBAA0]]
 ; SSE-NEXT:    [[TMP16:%.*]] = add i32 [[TMP15]], 4
 ; SSE-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 4
-; SSE-NEXT:    store i32 [[TMP16]], ptr [[TMP13]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    store i32 [[TMP16]], ptr [[TMP13]], align 4, !tbaa [[SHORT_TBAA0]]
 ; SSE-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 18
-; SSE-NEXT:    [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4, !tbaa [[SHORT_TBAA0]]
 ; SSE-NEXT:    [[TMP20:%.*]] = add i32 [[TMP19]], 1
 ; SSE-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 5
-; SSE-NEXT:    store i32 [[TMP20]], ptr [[TMP17]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    store i32 [[TMP20]], ptr [[TMP17]], align 4, !tbaa [[SHORT_TBAA0]]
 ; SSE-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 9
-; SSE-NEXT:    [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4, !tbaa [[SHORT_TBAA0]]
 ; SSE-NEXT:    [[TMP24:%.*]] = add i32 [[TMP23]], 2
 ; SSE-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 6
-; SSE-NEXT:    store i32 [[TMP24]], ptr [[TMP21]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    store i32 [[TMP24]], ptr [[TMP21]], align 4, !tbaa [[SHORT_TBAA0]]
 ; SSE-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 6
-; SSE-NEXT:    [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4, !tbaa [[SHORT_TBAA0]]
 ; SSE-NEXT:    [[TMP28:%.*]] = add i32 [[TMP27]], 3
 ; SSE-NEXT:    [[TMP29:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 7
-; SSE-NEXT:    store i32 [[TMP28]], ptr [[TMP25]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    store i32 [[TMP28]], ptr [[TMP25]], align 4, !tbaa [[SHORT_TBAA0]]
 ; SSE-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 21
-; SSE-NEXT:    [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4, !tbaa [[SHORT_TBAA0]]
 ; SSE-NEXT:    [[TMP32:%.*]] = add i32 [[TMP31]], 4
-; SSE-NEXT:    store i32 [[TMP32]], ptr [[TMP29]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    store i32 [[TMP32]], ptr [[TMP29]], align 4, !tbaa [[SHORT_TBAA0]]
 ; SSE-NEXT:    ret void
 ;
-; AVX-LABEL: @gather_load_3(
-; AVX-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP1:%.*]], align 4, !tbaa [[TBAA0]]
+; AVX-LABEL: define void @gather_load_3(
+; AVX-SAME: ptr noalias captures(none) [[TMP0:%.*]], ptr noalias readonly captures(none) [[TMP1:%.*]]) #[[ATTR0]] {
+; AVX-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 11
-; AVX-NEXT:    [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 4
-; AVX-NEXT:    [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 15
-; AVX-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 18
-; AVX-NEXT:    [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 9
-; AVX-NEXT:    [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 6
-; AVX-NEXT:    [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 21
-; AVX-NEXT:    [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX-NEXT:    [[TMP18:%.*]] = insertelement <8 x i32> poison, i32 [[TMP3]], i32 0
 ; AVX-NEXT:    [[TMP19:%.*]] = insertelement <8 x i32> [[TMP18]], i32 [[TMP5]], i32 1
 ; AVX-NEXT:    [[TMP20:%.*]] = insertelement <8 x i32> [[TMP19]], i32 [[TMP7]], i32 2
@@ -285,31 +297,34 @@ define void @gather_load_3(ptr noalias nocapture %0, ptr noalias nocapture reado
 ; AVX-NEXT:    [[TMP24:%.*]] = insertelement <8 x i32> [[TMP23]], i32 [[TMP15]], i32 6
 ; AVX-NEXT:    [[TMP25:%.*]] = insertelement <8 x i32> [[TMP24]], i32 [[TMP17]], i32 7
 ; AVX-NEXT:    [[TMP26:%.*]] = add <8 x i32> [[TMP25]], <i32 1, i32 2, i32 3, i32 4, i32 1, i32 2, i32 3, i32 4>
-; AVX-NEXT:    store <8 x i32> [[TMP26]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    store <8 x i32> [[TMP26]], ptr [[TMP0]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX-NEXT:    ret void
 ;
-; AVX2-LABEL: @gather_load_3(
-; AVX2-NEXT:    [[TMP3:%.*]] = call <22 x i32> @llvm.masked.load.v22i32.p0(ptr [[TMP1:%.*]], i32 4, <22 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true>, <22 x i32> poison), !tbaa [[TBAA0]]
+; AVX2-LABEL: define void @gather_load_3(
+; AVX2-SAME: ptr noalias captures(none) [[TMP0:%.*]], ptr noalias readonly captures(none) [[TMP1:%.*]]) #[[ATTR0]] {
+; AVX2-NEXT:    [[TMP3:%.*]] = call <22 x i32> @llvm.masked.load.v22i32.p0(ptr [[TMP1]], i32 4, <22 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true>, <22 x i32> poison), !tbaa [[SHORT_TBAA0]]
 ; AVX2-NEXT:    [[TMP4:%.*]] = shufflevector <22 x i32> [[TMP3]], <22 x i32> poison, <8 x i32> <i32 0, i32 4, i32 6, i32 9, i32 11, i32 15, i32 18, i32 21>
 ; AVX2-NEXT:    [[TMP5:%.*]] = add <8 x i32> [[TMP4]], <i32 1, i32 3, i32 3, i32 2, i32 2, i32 4, i32 1, i32 4>
 ; AVX2-NEXT:    [[TMP6:%.*]] = shufflevector <8 x i32> [[TMP5]], <8 x i32> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 6, i32 3, i32 2, i32 7>
-; AVX2-NEXT:    store <8 x i32> [[TMP6]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    store <8 x i32> [[TMP6]], ptr [[TMP0]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX2-NEXT:    ret void
 ;
-; AVX512F-LABEL: @gather_load_3(
-; AVX512F-NEXT:    [[TMP3:%.*]] = call <22 x i32> @llvm.masked.load.v22i32.p0(ptr [[TMP1:%.*]], i32 4, <22 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true>, <22 x i32> poison), !tbaa [[TBAA0]]
+; AVX512F-LABEL: define void @gather_load_3(
+; AVX512F-SAME: ptr noalias captures(none) [[TMP0:%.*]], ptr noalias readonly captures(none) [[TMP1:%.*]]) #[[ATTR0]] {
+; AVX512F-NEXT:    [[TMP3:%.*]] = call <22 x i32> @llvm.masked.load.v22i32.p0(ptr [[TMP1]], i32 4, <22 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true>, <22 x i32> poison), !tbaa [[SHORT_TBAA0]]
 ; AVX512F-NEXT:    [[TMP4:%.*]] = shufflevector <22 x i32> [[TMP3]], <22 x i32> poison, <8 x i32> <i32 0, i32 4, i32 6, i32 9, i32 11, i32 15, i32 18, i32 21>
 ; AVX512F-NEXT:    [[TMP5:%.*]] = add <8 x i32> [[TMP4]], <i32 1, i32 3, i32 3, i32 2, i32 2, i32 4, i32 1, i32 4>
 ; AVX512F-NEXT:    [[TMP6:%.*]] = shufflevector <8 x i32> [[TMP5]], <8 x i32> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 6, i32 3, i32 2, i32 7>
-; AVX512F-NEXT:    store <8 x i32> [[TMP6]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
+; AVX512F-NEXT:    store <8 x i32> [[TMP6]], ptr [[TMP0]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX512F-NEXT:    ret void
 ;
-; AVX512VL-LABEL: @gather_load_3(
-; AVX512VL-NEXT:    [[TMP3:%.*]] = call <22 x i32> @llvm.masked.load.v22i32.p0(ptr [[TMP1:%.*]], i32 4, <22 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true>, <22 x i32> poison), !tbaa [[TBAA0]]
+; AVX512VL-LABEL: define void @gather_load_3(
+; AVX512VL-SAME: ptr noalias captures(none) [[TMP0:%.*]], ptr noalias readonly captures(none) [[TMP1:%.*]]) #[[ATTR0]] {
+; AVX512VL-NEXT:    [[TMP3:%.*]] = call <22 x i32> @llvm.masked.load.v22i32.p0(ptr [[TMP1]], i32 4, <22 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true>, <22 x i32> poison), !tbaa [[SHORT_TBAA0]]
 ; AVX512VL-NEXT:    [[TMP4:%.*]] = shufflevector <22 x i32> [[TMP3]], <22 x i32> poison, <8 x i32> <i32 0, i32 4, i32 6, i32 9, i32 11, i32 15, i32 18, i32 21>
 ; AVX512VL-NEXT:    [[TMP5:%.*]] = add <8 x i32> [[TMP4]], <i32 1, i32 3, i32 3, i32 2, i32 2, i32 4, i32 1, i32 4>
 ; AVX512VL-NEXT:    [[TMP6:%.*]] = shufflevector <8 x i32> [[TMP5]], <8 x i32> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 6, i32 3, i32 2, i32 7>
-; AVX512VL-NEXT:    store <8 x i32> [[TMP6]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
+; AVX512VL-NEXT:    store <8 x i32> [[TMP6]], ptr [[TMP0]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX512VL-NEXT:    ret void
 ;
   %3 = load i32, ptr %1, align 4, !tbaa !2
@@ -354,9 +369,10 @@ define void @gather_load_3(ptr noalias nocapture %0, ptr noalias nocapture reado
 }
 
 define void @gather_load_4(ptr noalias nocapture %t0, ptr noalias nocapture readonly %t1) {
-; SSE-LABEL: @gather_load_4(
-; SSE-NEXT:    [[T5:%.*]] = getelementptr inbounds i32, ptr [[T0:%.*]], i64 1
-; SSE-NEXT:    [[T6:%.*]] = getelementptr inbounds i32, ptr [[T1:%.*]], i64 11
+; SSE-LABEL: define void @gather_load_4(
+; SSE-SAME: ptr noalias captures(none) [[T0:%.*]], ptr noalias readonly captures(none) [[T1:%.*]]) #[[ATTR0]] {
+; SSE-NEXT:    [[T5:%.*]] = getelementptr inbounds i32, ptr [[T0]], i64 1
+; SSE-NEXT:    [[T6:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 11
 ; SSE-NEXT:    [[T9:%.*]] = getelementptr inbounds i32, ptr [[T0]], i64 2
 ; SSE-NEXT:    [[T10:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 4
 ; SSE-NEXT:    [[T13:%.*]] = getelementptr inbounds i32, ptr [[T0]], i64 3
@@ -369,14 +385,14 @@ define void @gather_load_4(ptr noalias nocapture %t0, ptr noalias nocapture read
 ; SSE-NEXT:    [[T26:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 6
 ; SSE-NEXT:    [[T29:%.*]] = getelementptr inbounds i32, ptr [[T0]], i64 7
 ; SSE-NEXT:    [[T30:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 21
-; SSE-NEXT:    [[T3:%.*]] = load i32, ptr [[T1]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[T7:%.*]] = load i32, ptr [[T6]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[T11:%.*]] = load i32, ptr [[T10]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[T15:%.*]] = load i32, ptr [[T14]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[T19:%.*]] = load i32, ptr [[T18]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[T23:%.*]] = load i32, ptr [[T22]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[T27:%.*]] = load i32, ptr [[T26]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[T31:%.*]] = load i32, ptr [[T30]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[T3:%.*]] = load i32, ptr [[T1]], align 4, !tbaa [[SHORT_TBAA0]]
+; SSE-NEXT:    [[T7:%.*]] = load i32, ptr [[T6]], align 4, !tbaa [[SHORT_TBAA0]]
+; SSE-NEXT:    [[T11:%.*]] = load i32, ptr [[T10]], align 4, !tbaa [[SHORT_TBAA0]]
+; SSE-NEXT:    [[T15:%.*]] = load i32, ptr [[T14]], align 4, !tbaa [[SHORT_TBAA0]]
+; SSE-NEXT:    [[T19:%.*]] = load i32, ptr [[T18]], align 4, !tbaa [[SHORT_TBAA0]]
+; SSE-NEXT:    [[T23:%.*]] = load i32, ptr [[T22]], align 4, !tbaa [[SHORT_TBAA0]]
+; SSE-NEXT:    [[T27:%.*]] = load i32, ptr [[T26]], align 4, !tbaa [[SHORT_TBAA0]]
+; SSE-NEXT:    [[T31:%.*]] = load i32, ptr [[T30]], align 4, !tbaa [[SHORT_TBAA0]]
 ; SSE-NEXT:    [[T4:%.*]] = add i32 [[T3]], 1
 ; SSE-NEXT:    [[T8:%.*]] = add i32 [[T7]], 2
 ; SSE-NEXT:    [[T12:%.*]] = add i32 [[T11]], 3
@@ -385,32 +401,33 @@ define void @gather_load_4(ptr noalias nocapture %t0, ptr noalias nocapture read
 ; SSE-NEXT:    [[T24:%.*]] = add i32 [[T23]], 2
 ; SSE-NEXT:    [[T28:%.*]] = add i32 [[T27]], 3
 ; SSE-NEXT:    [[T32:%.*]] = add i32 [[T31]], 4
-; SSE-NEXT:    store i32 [[T4]], ptr [[T0]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    store i32 [[T8]], ptr [[T5]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    store i32 [[T12]], ptr [[T9]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    store i32 [[T16]], ptr [[T13]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    store i32 [[T20]], ptr [[T17]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    store i32 [[T24]], ptr [[T21]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    store i32 [[T28]], ptr [[T25]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    store i32 [[T32]], ptr [[T29]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    store i32 [[T4]], ptr [[T0]], align 4, !tbaa [[SHORT_TBAA0]]
+; SSE-NEXT:    store i32 [[T8]], ptr [[T5]], align 4, !tbaa [[SHORT_TBAA0]]
+; SSE-NEXT:    store i32 [[T12]], ptr [[T9]], align 4, !tbaa [[SHORT_TBAA0]]
+; SSE-NEXT:    store i32 [[T16]], ptr [[T13]], align 4, !tbaa [[SHORT_TBAA0]]
+; SSE-NEXT:    store i32 [[T20]], ptr [[T17]], align 4, !tbaa [[SHORT_TBAA0]]
+; SSE-NEXT:    store i32 [[T24]], ptr [[T21]], align 4, !tbaa [[SHORT_TBAA0]]
+; SSE-NEXT:    store i32 [[T28]], ptr [[T25]], align 4, !tbaa [[SHORT_TBAA0]]
+; SSE-NEXT:    store i32 [[T32]], ptr [[T29]], align 4, !tbaa [[SHORT_TBAA0]]
 ; SSE-NEXT:    ret void
 ;
-; AVX-LABEL: @gather_load_4(
-; AVX-NEXT:    [[T6:%.*]] = getelementptr inbounds i32, ptr [[T1:%.*]], i64 11
+; AVX-LABEL: define void @gather_load_4(
+; AVX-SAME: ptr noalias captures(none) [[T0:%.*]], ptr noalias readonly captures(none) [[T1:%.*]]) #[[ATTR0]] {
+; AVX-NEXT:    [[T6:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 11
 ; AVX-NEXT:    [[T10:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 4
 ; AVX-NEXT:    [[T14:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 15
 ; AVX-NEXT:    [[T18:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 18
 ; AVX-NEXT:    [[T22:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 9
 ; AVX-NEXT:    [[T26:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 6
 ; AVX-NEXT:    [[T30:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 21
-; AVX-NEXT:    [[T3:%.*]] = load i32, ptr [[T1]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[T7:%.*]] = load i32, ptr [[T6]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[T11:%.*]] = load i32, ptr [[T10]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[T15:%.*]] = load i32, ptr [[T14]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[T19:%.*]] = load i32, ptr [[T18]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[T23:%.*]] = load i32, ptr [[T22]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[T27:%.*]] = load i32, ptr [[T26]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[T31:%.*]] = load i32, ptr [[T30]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[T3:%.*]] = load i32, ptr [[T1]], align 4, !tbaa [[SHORT_TBAA0]]
+; AVX-NEXT:    [[T7:%.*]] = load i32, ptr [[T6]], align 4, !tbaa [[SHORT_TBAA0]]
+; AVX-NEXT:    [[T11:%.*]] = load i32, ptr [[T10]], align 4, !tbaa [[SHORT_TBAA0]]
+; AVX-NEXT:    [[T15:%.*]] = load i32, ptr [[T14]], align 4, !tbaa [[SHORT_TBAA0]]
+; AVX-NEXT:    [[T19:%.*]] = load i32, ptr [[T18]], align 4, !tbaa [[SHORT_TBAA0]]
+; AVX-NEXT:    [[T23:%.*]] = load i32, ptr [[T22]], align 4, !tbaa [[SHORT_TBAA0]]
+; AVX-NEXT:    [[T27:%.*]] = load i32, ptr [[T26]], align 4, !tbaa [[SHORT_TBAA0]]
+; AVX-NEXT:    [[T31:%.*]] = load i32, ptr [[T30]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX-NEXT:    [[TMP1:%.*]] = insertelement <8 x i32> poison, i32 [[T3]], i32 0
 ; AVX-NEXT:    [[TMP2:%.*]] = insertelement <8 x i32> [[TMP1]], i32 [[T7]], i32 1
 ; AVX-NEXT:    [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[T11]], i32 2
@@ -420,31 +437,34 @@ define void @gather_load_4(ptr noalias nocapture %t0, ptr noalias nocapture read
 ; AVX-NEXT:    [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[T27]], i32 6
 ; AVX-NEXT:    [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[T31]], i32 7
 ; AVX-NEXT:    [[TMP9:%.*]] = add <8 x i32> [[TMP8]], <i32 1, i32 2, i32 3, i32 4, i32 1, i32 2, i32 3, i32 4>
-; AVX-NEXT:    store <8 x i32> [[TMP9]], ptr [[T0:%.*]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    store <8 x i32> [[TMP9]], ptr [[T0]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX-NEXT:    ret void
 ;
-; AVX2-LABEL: @gather_load_4(
-; AVX2-NEXT:    [[TMP1:%.*]] = call <22 x i32> @llvm.masked.load.v22i32.p0(ptr [[T1:%.*]], i32 4, <22 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true>, <22 x i32> poison), !tbaa [[TBAA0]]
+; AVX2-LABEL: define void @gather_load_4(
+; AVX2-SAME: ptr noalias captures(none) [[T0:%.*]], ptr noalias readonly captures(none) [[T1:%.*]]) #[[ATTR0]] {
+; AVX2-NEXT:    [[TMP1:%.*]] = call <22 x i32> @llvm.masked.load.v22i32.p0(ptr [[T1]], i32 4, <22 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true>, <22 x i32> poison), !tbaa [[SHORT_TBAA0]]
 ; AVX2-NEXT:    [[TMP2:%.*]] = shufflevector <22 x i32> [[TMP1]], <22 x i32> poison, <8 x i32> <i32 0, i32 4, i32 6, i32 9, i32 11, i32 15, i32 18, i32 21>
 ; AVX2-NEXT:    [[TMP3:%.*]] = add <8 x i32> [[TMP2]], <i32 1, i32 3, i32 3, i32 2, i32 2, i32 4, i32 1, i32 4>
 ; AVX2-NEXT:    [[TMP4:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 6, i32 3, i32 2, i32 7>
-; AVX2-NEXT:    store <8 x i32> [[TMP4]], ptr [[T0:%.*]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    store <8 x i32> [[TMP4]], ptr [[T0]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX2-NEXT:    ret void
 ;
-; AVX512F-LABEL: @gather_load_4(
-; AVX512F-NEXT:    [[TMP1:%.*]] = call <22 x i32> @llvm.masked.load.v22i32.p0(ptr [[T1:%.*]], i32 4, <22 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true>, <22 x i32> poison), !tbaa [[TBAA0]]
+; AVX512F-LABEL: define void @gather_load_4(
+; AVX512F-SAME: ptr noalias captures(none) [[T0:%.*]], ptr noalias readonly captures(none) [[T1:%.*]]) #[[ATTR0]] {
+; AVX512F-NEXT:    [[TMP1:%.*]] = call <22 x i32> @llvm.masked.load.v22i32.p0(ptr [[T1]], i32 4, <22 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true>, <22 x i32> poison), !tbaa [[SHORT_TBAA0]]
 ; AVX512F-NEXT:    [[TMP2:%.*]] = shufflevector <22 x i32> [[TMP1]], <22 x i32> poison, <8 x i32> <i32 0, i32 4, i32 6, i32 9, i32 11, i32 15, i32 18, i32 21>
 ; AVX512F-NEXT:    [[TMP3:%.*]] = add <8 x i32> [[TMP2]], <i32 1, i32 3, i32 3, i32 2, i32 2, i32 4, i32 1, i32 4>
 ; AVX512F-NEXT:    [[TMP4:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 6, i32 3, i32 2, i32 7>
-; AVX512F-NEXT:    store <8 x i32> [[TMP4]], ptr [[T0:%.*]], align 4, !tbaa [[TBAA0]]
+; AVX512F-NEXT:    store <8 x i32> [[TMP4]], ptr [[T0]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX512F-NEXT:    ret void
 ;
-; AVX512VL-LABEL: @gather_load_4(
-; AVX512VL-NEXT:    [[TMP1:%.*]] = call <22 x i32> @llvm.masked.load.v22i32.p0(ptr [[T1:%.*]], i32 4, <22 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true>, <22 x i32> poison), !tbaa [[TBAA0]]
+; AVX512VL-LABEL: define void @gather_load_4(
+; AVX512VL-SAME: ptr noalias captures(none) [[T0:%.*]], ptr noalias readonly captures(none) [[T1:%.*]]) #[[ATTR0]] {
+; AVX512VL-NEXT:    [[TMP1:%.*]] = call <22 x i32> @llvm.masked.load.v22i32.p0(ptr [[T1]], i32 4, <22 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true>, <22 x i32> poison), !tbaa [[SHORT_TBAA0]]
 ; AVX512VL-NEXT:    [[TMP2:%.*]] = shufflevector <22 x i32> [[TMP1]], <22 x i32> poison, <8 x i32> <i32 0, i32 4, i32 6, i32 9, i32 11, i32 15, i32 18, i32 21>
 ; AVX512VL-NEXT:    [[TMP3:%.*]] = add <8 x i32> [[TMP2]], <i32 1, i32 3, i32 3, i32 2, i32 2, i32 4, i32 1, i32 4>
 ; AVX512VL-NEXT:    [[TMP4:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 6, i32 3, i32 2, i32 7>
-; AVX512VL-NEXT:    store <8 x i32> [[TMP4]], ptr [[T0:%.*]], align 4, !tbaa [[TBAA0]]
+; AVX512VL-NEXT:    store <8 x i32> [[TMP4]], ptr [[T0]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX512VL-NEXT:    ret void
 ;
   %t5 = getelementptr inbounds i32, ptr %t0, i64 1
@@ -494,17 +514,18 @@ define void @gather_load_4(ptr noalias nocapture %t0, ptr noalias nocapture read
 
 
 define void @gather_load_div(ptr noalias nocapture %0, ptr noalias nocapture readonly %1) {
-; SSE-LABEL: @gather_load_div(
-; SSE-NEXT:    [[TMP3:%.*]] = load float, ptr [[TMP1:%.*]], align 4, !tbaa [[TBAA0]]
+; SSE-LABEL: define void @gather_load_div(
+; SSE-SAME: ptr noalias captures(none) [[TMP0:%.*]], ptr noalias readonly captures(none) [[TMP1:%.*]]) #[[ATTR0]] {
+; SSE-NEXT:    [[TMP3:%.*]] = load float, ptr [[TMP1]], align 4, !tbaa [[SHORT_TBAA0]]
 ; SSE-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 10
 ; SSE-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 13
 ; SSE-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 3
 ; SSE-NEXT:    [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 44
-; SSE-NEXT:    [[TMP8:%.*]] = load float, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP0:%.*]], i64 4
-; SSE-NEXT:    [[TMP10:%.*]] = load <2 x float>, ptr [[TMP6]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP11:%.*]] = load <2 x float>, ptr [[TMP4]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP12:%.*]] = load <2 x float>, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP8:%.*]] = load float, ptr [[TMP7]], align 4, !tbaa [[SHORT_TBAA0]]
+; SSE-NEXT:    [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i64 4
+; SSE-NEXT:    [[TMP10:%.*]] = load <2 x float>, ptr [[TMP6]], align 4, !tbaa [[SHORT_TBAA0]]
+; SSE-NEXT:    [[TMP11:%.*]] = load <2 x float>, ptr [[TMP4]], align 4, !tbaa [[SHORT_TBAA0]]
+; SSE-NEXT:    [[TMP12:%.*]] = load <2 x float>, ptr [[TMP5]], align 4, !tbaa [[SHORT_TBAA0]]
 ; SSE-NEXT:    [[TMP13:%.*]] = insertelement <4 x float> poison, float [[TMP3]], i32 0
 ; SSE-NEXT:    [[TMP14:%.*]] = shufflevector <2 x float> [[TMP11]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
 ; SSE-NEXT:    [[TMP15:%.*]] = shufflevector <4 x float> [[TMP13]], <4 x float> [[TMP14]], <4 x i32> <i32 0, i32 4, i32 poison, i32 poison>
@@ -516,23 +537,23 @@ define void @gather_load_div(ptr noalias nocapture %0, ptr noalias nocapture rea
 ; SSE-NEXT:    [[TMP21:%.*]] = shufflevector <4 x float> [[TMP20]], <4 x float> [[TMP14]], <4 x i32> <i32 0, i32 1, i32 5, i32 poison>
 ; SSE-NEXT:    [[TMP22:%.*]] = insertelement <4 x float> [[TMP21]], float [[TMP8]], i32 3
 ; SSE-NEXT:    [[TMP23:%.*]] = fdiv <4 x float> [[TMP19]], [[TMP22]]
-; SSE-NEXT:    store <4 x float> [[TMP23]], ptr [[TMP0]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    store <4 x float> [[TMP23]], ptr [[TMP0]], align 4, !tbaa [[SHORT_TBAA0]]
 ; SSE-NEXT:    [[TMP24:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 17
-; SSE-NEXT:    [[TMP25:%.*]] = load float, ptr [[TMP24]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP25:%.*]] = load float, ptr [[TMP24]], align 4, !tbaa [[SHORT_TBAA0]]
 ; SSE-NEXT:    [[TMP26:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 33
-; SSE-NEXT:    [[TMP27:%.*]] = load float, ptr [[TMP26]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP27:%.*]] = load float, ptr [[TMP26]], align 4, !tbaa [[SHORT_TBAA0]]
 ; SSE-NEXT:    [[TMP28:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 8
-; SSE-NEXT:    [[TMP29:%.*]] = load float, ptr [[TMP28]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP29:%.*]] = load float, ptr [[TMP28]], align 4, !tbaa [[SHORT_TBAA0]]
 ; SSE-NEXT:    [[TMP30:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 30
-; SSE-NEXT:    [[TMP31:%.*]] = load float, ptr [[TMP30]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP31:%.*]] = load float, ptr [[TMP30]], align 4, !tbaa [[SHORT_TBAA0]]
 ; SSE-NEXT:    [[TMP32:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 5
-; SSE-NEXT:    [[TMP33:%.*]] = load float, ptr [[TMP32]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP33:%.*]] = load float, ptr [[TMP32]], align 4, !tbaa [[SHORT_TBAA0]]
 ; SSE-NEXT:    [[TMP34:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 27
-; SSE-NEXT:    [[TMP35:%.*]] = load float, ptr [[TMP34]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP35:%.*]] = load float, ptr [[TMP34]], align 4, !tbaa [[SHORT_TBAA0]]
 ; SSE-NEXT:    [[TMP36:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 20
-; SSE-NEXT:    [[TMP37:%.*]] = load float, ptr [[TMP36]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP37:%.*]] = load float, ptr [[TMP36]], align 4, !tbaa [[SHORT_TBAA0]]
 ; SSE-NEXT:    [[TMP38:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 23
-; SSE-NEXT:    [[TMP39:%.*]] = load float, ptr [[TMP38]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP39:%.*]] = load float, ptr [[TMP38]], align 4, !tbaa [[SHORT_TBAA0]]
 ; SSE-NEXT:    [[TMP40:%.*]] = insertelement <4 x float> poison, float [[TMP25]], i32 0
 ; SSE-NEXT:    [[TMP41:%.*]] = insertelement <4 x float> [[TMP40]], float [[TMP29]], i32 1
 ; SSE-NEXT:    [[TMP42:%.*]] = insertelement <4 x float> [[TMP41]], float [[TMP33]], i32 2
@@ -542,35 +563,36 @@ define void @gather_load_div(ptr noalias nocapture %0, ptr noalias nocapture rea
 ; SSE-NEXT:    [[TMP46:%.*]] = insertelement <4 x float> [[TMP45]], float [[TMP35]], i32 2
 ; SSE-NEXT:    [[TMP47:%.*]] = insertelement <4 x float> [[TMP46]], float [[TMP39]], i32 3
 ; SSE-NEXT:    [[TMP48:%.*]] = fdiv <4 x float> [[TMP43]], [[TMP47]]
-; SSE-NEXT:    store <4 x float> [[TMP48]], ptr [[TMP9]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    store <4 x float> [[TMP48]], ptr [[TMP9]], align 4, !tbaa [[SHORT_TBAA0]]
 ; SSE-NEXT:    ret void
 ;
-; AVX-LABEL: @gather_load_div(
-; AVX-NEXT:    [[TMP3:%.*]] = load float, ptr [[TMP1:%.*]], align 4, !tbaa [[TBAA0]]
+; AVX-LABEL: define void @gather_load_div(
+; AVX-SAME: ptr noalias captures(none) [[TMP0:%.*]], ptr noalias readonly captures(none) [[TMP1:%.*]]) #[[ATTR0]] {
+; AVX-NEXT:    [[TMP3:%.*]] = load float, ptr [[TMP1]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 10
 ; AVX-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 13
 ; AVX-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 3
 ; AVX-NEXT:    [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 44
-; AVX-NEXT:    [[TMP8:%.*]] = load float, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP8:%.*]] = load float, ptr [[TMP7]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX-NEXT:    [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 17
-; AVX-NEXT:    [[TMP10:%.*]] = load float, ptr [[TMP9]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP10:%.*]] = load float, ptr [[TMP9]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX-NEXT:    [[TMP11:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 33
-; AVX-NEXT:    [[TMP12:%.*]] = load float, ptr [[TMP11]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP12:%.*]] = load float, ptr [[TMP11]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX-NEXT:    [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 8
-; AVX-NEXT:    [[TMP14:%.*]] = load float, ptr [[TMP13]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP14:%.*]] = load float, ptr [[TMP13]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX-NEXT:    [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 30
-; AVX-NEXT:    [[TMP16:%.*]] = load float, ptr [[TMP15]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP16:%.*]] = load float, ptr [[TMP15]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX-NEXT:    [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 5
-; AVX-NEXT:    [[TMP18:%.*]] = load float, ptr [[TMP17]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP18:%.*]] = load float, ptr [[TMP17]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX-NEXT:    [[TMP19:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 27
-; AVX-NEXT:    [[TMP20:%.*]] = load float, ptr [[TMP19]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP20:%.*]] = load float, ptr [[TMP19]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX-NEXT:    [[TMP21:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 20
-; AVX-NEXT:    [[TMP22:%.*]] = load float, ptr [[TMP21]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP22:%.*]] = load float, ptr [[TMP21]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX-NEXT:    [[TMP23:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 23
-; AVX-NEXT:    [[TMP24:%.*]] = load float, ptr [[TMP23]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP25:%.*]] = load <2 x float>, ptr [[TMP6]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP26:%.*]] = load <2 x float>, ptr [[TMP4]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP27:%.*]] = load <2 x float>, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP24:%.*]] = load float, ptr [[TMP23]], align 4, !tbaa [[SHORT_TBAA0]]
+; AVX-NEXT:    [[TMP25:%.*]] = load <2 x float>, ptr [[TMP6]], align 4, !tbaa [[SHORT_TBAA0]]
+; AVX-NEXT:    [[TMP26:%.*]] = load <2 x float>, ptr [[TMP4]], align 4, !tbaa [[SHORT_TBAA0]]
+; AVX-NEXT:    [[TMP27:%.*]] = load <2 x float>, ptr [[TMP5]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX-NEXT:    [[TMP28:%.*]] = insertelement <8 x float> poison, float [[TMP3]], i32 0
 ; AVX-NEXT:    [[TMP29:%.*]] = shufflevector <2 x float> [[TMP26]], <2 x float> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; AVX-NEXT:    [[TMP30:%.*]] = shufflevector <8 x float> [[TMP28]], <8 x float> [[TMP29]], <8 x i32> <i32 0, i32 8, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
@@ -590,35 +612,36 @@ define void @gather_load_div(ptr noalias nocapture %0, ptr noalias nocapture rea
 ; AVX-NEXT:    [[TMP44:%.*]] = insertelement <8 x float> [[TMP43]], float [[TMP20]], i32 6
 ; AVX-NEXT:    [[TMP45:%.*]] = insertelement <8 x float> [[TMP44]], float [[TMP24]], i32 7
 ; AVX-NEXT:    [[TMP46:%.*]] = fdiv <8 x float> [[TMP38]], [[TMP45]]
-; AVX-NEXT:    store <8 x float> [[TMP46]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    store <8 x float> [[TMP46]], ptr [[TMP0]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX-NEXT:    ret void
 ;
-; AVX2-LABEL: @gather_load_div(
-; AVX2-NEXT:    [[TMP3:%.*]] = load float, ptr [[TMP1:%.*]], align 4, !tbaa [[TBAA0]]
+; AVX2-LABEL: define void @gather_load_div(
+; AVX2-SAME: ptr noalias captures(none) [[TMP0:%.*]], ptr noalias readonly captures(none) [[TMP1:%.*]]) #[[ATTR0]] {
+; AVX2-NEXT:    [[TMP3:%.*]] = load float, ptr [[TMP1]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 10
 ; AVX2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 13
 ; AVX2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 3
 ; AVX2-NEXT:    [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 44
-; AVX2-NEXT:    [[TMP8:%.*]] = load float, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP8:%.*]] = load float, ptr [[TMP7]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 17
-; AVX2-NEXT:    [[TMP10:%.*]] = load float, ptr [[TMP9]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP10:%.*]] = load float, ptr [[TMP9]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX2-NEXT:    [[TMP11:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 33
-; AVX2-NEXT:    [[TMP12:%.*]] = load float, ptr [[TMP11]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP12:%.*]] = load float, ptr [[TMP11]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX2-NEXT:    [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 8
-; AVX2-NEXT:    [[TMP14:%.*]] = load float, ptr [[TMP13]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP14:%.*]] = load float, ptr [[TMP13]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX2-NEXT:    [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 30
-; AVX2-NEXT:    [[TMP16:%.*]] = load float, ptr [[TMP15]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP16:%.*]] = load float, ptr [[TMP15]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX2-NEXT:    [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 5
-; AVX2-NEXT:    [[TMP18:%.*]] = load float, ptr [[TMP17]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP18:%.*]] = load float, ptr [[TMP17]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX2-NEXT:    [[TMP19:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 27
-; AVX2-NEXT:    [[TMP20:%.*]] = load float, ptr [[TMP19]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP20:%.*]] = load float, ptr [[TMP19]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX2-NEXT:    [[TMP21:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 20
-; AVX2-NEXT:    [[TMP22:%.*]] = load float, ptr [[TMP21]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP22:%.*]] = load float, ptr [[TMP21]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX2-NEXT:    [[TMP23:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 23
-; AVX2-NEXT:    [[TMP24:%.*]] = load float, ptr [[TMP23]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP25:%.*]] = load <2 x float>, ptr [[TMP6]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP26:%.*]] = load <2 x float>, ptr [[TMP4]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP27:%.*]] = load <2 x float>, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP24:%.*]] = load float, ptr [[TMP23]], align 4, !tbaa [[SHORT_TBAA0]]
+; AVX2-NEXT:    [[TMP25:%.*]] = load <2 x float>, ptr [[TMP6]], align 4, !tbaa [[SHORT_TBAA0]]
+; AVX2-NEXT:    [[TMP26:%.*]] = load <2 x float>, ptr [[TMP4]], align 4, !tbaa [[SHORT_TBAA0]]
+; AVX2-NEXT:    [[TMP27:%.*]] = load <2 x float>, ptr [[TMP5]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX2-NEXT:    [[TMP28:%.*]] = insertelement <8 x float> poison, float [[TMP3]], i32 0
 ; AVX2-NEXT:    [[TMP29:%.*]] = shufflevector <2 x float> [[TMP26]], <2 x float> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; AVX2-NEXT:    [[TMP30:%.*]] = shufflevector <8 x float> [[TMP28]], <8 x float> [[TMP29]], <8 x i32> <i32 0, i32 8, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
@@ -638,27 +661,29 @@ define void @gather_load_div(ptr noalias nocapture %0, ptr noalias nocapture rea
 ; AVX2-NEXT:    [[TMP44:%.*]] = insertelement <8 x float> [[TMP43]], float [[TMP20]], i32 6
 ; AVX2-NEXT:    [[TMP45:%.*]] = insertelement <8 x float> [[TMP44]], float [[TMP24]], i32 7
 ; AVX2-NEXT:    [[TMP46:%.*]] = fdiv <8 x float> [[TMP38]], [[TMP45]]
-; AVX2-NEXT:    store <8 x float> [[TMP46]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    store <8 x float> [[TMP46]], ptr [[TMP0]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX2-NEXT:    ret void
 ;
-; AVX512F-LABEL: @gather_load_div(
-; AVX512F-NEXT:    [[TMP3:%.*]] = call <45 x float> @llvm.masked.load.v45f32.p0(ptr [[TMP1:%.*]], i32 4, <45 x i1> <i1 true, i1 false, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true>, <45 x float> poison), !tbaa [[TBAA0]]
+; AVX512F-LABEL: define void @gather_load_div(
+; AVX512F-SAME: ptr noalias captures(none) [[TMP0:%.*]], ptr noalias readonly captures(none) [[TMP1:%.*]]) #[[ATTR0]] {
+; AVX512F-NEXT:    [[TMP3:%.*]] = call <45 x float> @llvm.masked.load.v45f32.p0(ptr [[TMP1]], i32 4, <45 x i1> <i1 true, i1 false, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true>, <45 x float> poison), !tbaa [[SHORT_TBAA0]]
 ; AVX512F-NEXT:    [[TMP4:%.*]] = shufflevector <45 x float> [[TMP3]], <45 x float> poison, <16 x i32> <i32 0, i32 3, i32 4, i32 5, i32 8, i32 10, i32 11, i32 13, i32 14, i32 17, i32 20, i32 23, i32 27, i32 30, i32 33, i32 44>
 ; AVX512F-NEXT:    [[TMP7:%.*]] = shufflevector <45 x float> [[TMP3]], <45 x float> poison, <8 x i32> <i32 0, i32 3, i32 5, i32 8, i32 10, i32 14, i32 17, i32 20>
 ; AVX512F-NEXT:    [[TMP8:%.*]] = shufflevector <45 x float> [[TMP3]], <45 x float> poison, <8 x i32> <i32 4, i32 11, i32 27, i32 30, i32 13, i32 44, i32 33, i32 23>
 ; AVX512F-NEXT:    [[TMP9:%.*]] = fdiv <8 x float> [[TMP7]], [[TMP8]]
 ; AVX512F-NEXT:    [[TMP10:%.*]] = shufflevector <8 x float> [[TMP9]], <8 x float> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 6, i32 3, i32 2, i32 7>
-; AVX512F-NEXT:    store <8 x float> [[TMP10]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
+; AVX512F-NEXT:    store <8 x float> [[TMP10]], ptr [[TMP0]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX512F-NEXT:    ret void
 ;
-; AVX512VL-LABEL: @gather_load_div(
-; AVX512VL-NEXT:    [[TMP3:%.*]] = call <45 x float> @llvm.masked.load.v45f32.p0(ptr [[TMP1:%.*]], i32 4, <45 x i1> <i1 true, i1 false, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true>, <45 x float> poison), !tbaa [[TBAA0]]
+; AVX512VL-LABEL: define void @gather_load_div(
+; AVX512VL-SAME: ptr noalias captures(none) [[TMP0:%.*]], ptr noalias readonly captures(none) [[TMP1:%.*]]) #[[ATTR0]] {
+; AVX512VL-NEXT:    [[TMP3:%.*]] = call <45 x float> @llvm.masked.load.v45f32.p0(ptr [[TMP1]], i32 4, <45 x i1> <i1 true, i1 false, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true>, <45 x float> poison), !tbaa [[SHORT_TBAA0]]
 ; AVX512VL-NEXT:    [[TMP4:%.*]] = shufflevector <45 x float> [[TMP3]], <45 x float> poison, <16 x i32> <i32 0, i32 3, i32 4, i32 5, i32 8, i32 10, i32 11, i32 13, i32 14, i32 17, i32 20, i32 23, i32 27, i32 30, i32 33, i32 44>
 ; AVX512VL-NEXT:    [[TMP7:%.*]] = shufflevector <45 x float> [[TMP3]], <45 x float> poison, <8 x i32> <i32 0, i32 3, i32 5, i32 8, i32 10, i32 14, i32 17, i32 20>
 ; AVX512VL-NEXT:    [[TMP8:%.*]] = shufflevector <45 x float> [[TMP3]], <45 x float> poison, <8 x i32> <i32 4, i32 11, i32 27, i32 30, i32 13, i32 44, i32 33, i32 23>
 ; AVX512VL-NEXT:    [[TMP9:%.*]] = fdiv <8 x float> [[TMP7]], [[TMP8]]
 ; AVX512VL-NEXT:    [[TMP10:%.*]] = shufflevector <8 x float> [[TMP9]], <8 x float> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 6, i32 3, i32 2, i32 7>
-; AVX512VL-NEXT:    store <8 x float> [[TMP10]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
+; AVX512VL-NEXT:    store <8 x float> [[TMP10]], ptr [[TMP0]], align 4, !tbaa [[SHORT_TBAA0]]
 ; AVX512VL-NEXT:    ret void
 ;
   %3 = load float, ptr %1, align 4, !tbaa !2
@@ -722,3 +747,29 @@ define void @gather_load_div(ptr noalias nocapture %0, ptr noalias nocapture rea
 !3 = !{!"short", !4, i64 0}
 !4 = !{!"omnipotent char", !5, i64 0}
 !5 = !{!"Simple C++ TBAA"}
+;.
+; SSE: [[SHORT_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
+; SSE: [[META1]] = !{!"short", [[META2:![0-9]+]], i64 0}
+; SSE: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0}
+; SSE: [[META3]] = !{!"Simple C++ TBAA"}
+;.
+; AVX: [[SHORT_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
+; AVX: [[META1]] = !{!"short", [[META2:![0-9]+]], i64 0}
+; AVX: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0}
+; AVX: [[META3]] = !{!"Simple C++ TBAA"}
+;.
+; AVX2: [[SHORT_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
+; AVX2: [[META1]] = !{!"short", [[META2:![0-9]+]], i64 0}
+; AVX2: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0}
+; AVX2: [[META3]] = !{!"Simple C++ TBAA"}
+;.
+; AVX512F: [[SHORT_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
+; AVX512F: [[META1]] = !{!"short", [[META2:![0-9]+]], i64 0}
+; AVX512F: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0}
+; AVX512F: [[META3]] = !{!"Simple C++ TBAA"}
+;.
+; AVX512VL: [[SHORT_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
+; AVX512VL: [[META1]] = !{!"short", [[META2:![0-9]+]], i64 0}
+; AVX512VL: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0}
+; AVX512VL: [[META3]] = !{!"Simple C++ TBAA"}
+;.
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr49933.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr49933.ll
index 2625840..253f084 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/pr49933.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/pr49933.ll
@@ -1,13 +1,14 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-- -mcpu=skylake-avx512 | FileCheck %s
 
 define void @foo(ptr noalias nocapture %t0, ptr noalias nocapture readonly %t1) {
-; CHECK-LABEL: @foo(
-; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i8>, ptr [[T1:%.*]], align 1, !tbaa [[TBAA0:![0-9]+]]
+; CHECK-LABEL: define void @foo(
+; CHECK-SAME: ptr noalias captures(none) [[T0:%.*]], ptr noalias readonly captures(none) [[T1:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i8>, ptr [[T1]], align 1, !tbaa [[CHAR_TBAA0:![0-9]+]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp ult <8 x i8> [[TMP2]], splat (i8 64)
 ; CHECK-NEXT:    [[TMP4:%.*]] = sub <8 x i8> zeroinitializer, [[TMP2]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = select <8 x i1> [[TMP3]], <8 x i8> [[TMP2]], <8 x i8> [[TMP4]]
-; CHECK-NEXT:    store <8 x i8> [[TMP5]], ptr [[T0:%.*]], align 1, !tbaa [[TBAA0]]
+; CHECK-NEXT:    store <8 x i8> [[TMP5]], ptr [[T0]], align 1, !tbaa [[CHAR_TBAA0]]
 ; CHECK-NEXT:    ret void
 ;
   %t3 = load i8, ptr %t1, align 1, !tbaa !3
@@ -70,3 +71,8 @@ define void @foo(ptr noalias nocapture %t0, ptr noalias nocapture readonly %t1)
 !3 = !{!4, !4, i64 0}
 !4 = !{!"omnipotent char", !5, i64 0}
 !5 = !{!"Simple C++ TBAA"}
+;.
+; CHECK: [[CHAR_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
+; CHECK: [[META1]] = !{!"omnipotent char", [[META2:![0-9]+]], i64 0}
+; CHECK: [[META2]] = !{!"Simple C++ TBAA"}
+;.
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/remark_listcost.ll b/llvm/test/Transforms/SLPVectorizer/X86/remark_listcost.ll
index 2cd7ada..b409aa7 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/remark_listcost.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/remark_listcost.ll
@@ -1,26 +1,27 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt -S -mtriple=x86_64-pc-linux-gnu -mcpu=generic -passes=slp-vectorizer -pass-remarks-output=%t < %s | FileCheck %s
 ; RUN: FileCheck --input-file=%t --check-prefix=YAML %s
 
 define void @vsub2_test(ptr %pin1, ptr %pin2, ptr %pout) #0 {
-; CHECK-LABEL: @vsub2_test(
-; CHECK-NEXT:    br label [[TMP1:%.*]]
-; CHECK:       1:
-; CHECK-NEXT:    [[IDX_04:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[TMP8:%.*]], [[TMP1]] ]
-; CHECK-NEXT:    [[PO_03:%.*]] = phi ptr [ [[POUT:%.*]], [[TMP0]] ], [ [[TMP7:%.*]], [[TMP1]] ]
-; CHECK-NEXT:    [[PTMPI2_02:%.*]] = phi ptr [ [[PIN2:%.*]], [[TMP0]] ], [ [[TMP4:%.*]], [[TMP1]] ]
-; CHECK-NEXT:    [[PTMPI1_01:%.*]] = phi ptr [ [[PIN1:%.*]], [[TMP0]] ], [ [[TMP2:%.*]], [[TMP1]] ]
+; CHECK-LABEL: define void @vsub2_test(
+; CHECK-SAME: ptr [[PIN1:%.*]], ptr [[PIN2:%.*]], ptr [[POUT:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    br label %[[TMP1:.*]]
+; CHECK:       [[TMP1]]:
+; CHECK-NEXT:    [[IDX_04:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[TMP8:%.*]], %[[TMP1]] ]
+; CHECK-NEXT:    [[PO_03:%.*]] = phi ptr [ [[POUT]], [[TMP0]] ], [ [[TMP7:%.*]], %[[TMP1]] ]
+; CHECK-NEXT:    [[PTMPI2_02:%.*]] = phi ptr [ [[PIN2]], [[TMP0]] ], [ [[TMP4:%.*]], %[[TMP1]] ]
+; CHECK-NEXT:    [[PTMPI1_01:%.*]] = phi ptr [ [[PIN1]], [[TMP0]] ], [ [[TMP2:%.*]], %[[TMP1]] ]
 ; CHECK-NEXT:    [[TMP2]] = getelementptr inbounds i32, ptr [[PTMPI1_01]], i64 1
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[PTMPI1_01]], align 4, !tbaa [[TBAA1:![0-9]+]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[PTMPI1_01]], align 4, !tbaa [[INT_TBAA1:![0-9]+]]
 ; CHECK-NEXT:    [[TMP4]] = getelementptr inbounds i32, ptr [[PTMPI2_02]], i64 1
-; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[PTMPI2_02]], align 4, !tbaa [[TBAA1]]
+; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[PTMPI2_02]], align 4, !tbaa [[INT_TBAA1]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = sub nsw i32 [[TMP3]], [[TMP5]]
 ; CHECK-NEXT:    [[TMP7]] = getelementptr inbounds i32, ptr [[PO_03]], i64 1
-; CHECK-NEXT:    store i32 [[TMP6]], ptr [[PO_03]], align 4, !tbaa [[TBAA1]]
+; CHECK-NEXT:    store i32 [[TMP6]], ptr [[PO_03]], align 4, !tbaa [[INT_TBAA1]]
 ; CHECK-NEXT:    [[TMP8]] = add nuw nsw i32 [[IDX_04]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[TMP8]], 64
-; CHECK-NEXT:    br i1 [[EXITCOND]], label [[TMP9:%.*]], label [[TMP1]], !llvm.loop [[LOOP5:![0-9]+]]
-; CHECK:       9:
+; CHECK-NEXT:    br i1 [[EXITCOND]], label %[[BB9:.*]], label %[[TMP1]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK:       [[BB9]]:
 ; CHECK-NEXT:    ret void
 ;
   br label %1
@@ -61,3 +62,12 @@ define void @vsub2_test(ptr %pin1, ptr %pin2, ptr %pout) #0 {
 !5 = distinct !{!5, !6, !7}
 !6 = !{!"llvm.loop.vectorize.width", i32 1}
 !7 = !{!"llvm.loop.interleave.count", i32 1}
+;.
+; CHECK: [[INT_TBAA1]] = !{[[META2:![0-9]+]], [[META2]], i64 0}
+; CHECK: [[META2]] = !{!"int", [[META3:![0-9]+]], i64 0}
+; CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0}
+; CHECK: [[META4]] = !{!"Simple C/C++ TBAA"}
+; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META6:![0-9]+]], [[META7:![0-9]+]]}
+; CHECK: [[META6]] = !{!"llvm.loop.vectorize.width", i32 1}
+; CHECK: [[META7]] = !{!"llvm.loop.interleave.count", i32 1}
+;.
diff --git a/llvm/test/Transforms/SROA/tbaa-struct2.ll b/llvm/test/Transforms/SROA/tbaa-struct2.ll
index 545fa47..be91a87 100644
--- a/llvm/test/Transforms/SROA/tbaa-struct2.ll
+++ b/llvm/test/Transforms/SROA/tbaa-struct2.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 6
 ; RUN: opt -S -passes='sroa<preserve-cfg>' %s | FileCheck %s --check-prefixes=CHECK,CHECK-PRESERVE-CFG
 ; RUN: opt -S -passes='sroa<modify-cfg>' %s | FileCheck %s --check-prefixes=CHECK,CHECK-MODIFY-CFG
 
@@ -9,11 +9,12 @@ declare void @llvm.memcpy.p0.p0.i64(ptr writeonly, ptr readonly, i64, i1 immarg)
 declare double @subcall(double %g, i32 %m)
 
 define double @bar(ptr %wishart) {
-; CHECK-LABEL: @bar(
+; CHECK-LABEL: define double @bar(
+; CHECK-SAME: ptr [[WISHART:%.*]]) {
 ; CHECK-NEXT:    [[TMP_SROA_3:%.*]] = alloca [4 x i8], align 4
-; CHECK-NEXT:    [[TMP_SROA_0_0_COPYLOAD:%.*]] = load double, ptr [[WISHART:%.*]], align 8, !tbaa [[TBAA0:![0-9]+]]
+; CHECK-NEXT:    [[TMP_SROA_0_0_COPYLOAD:%.*]] = load double, ptr [[WISHART]], align 8, !tbaa [[DOUBLE_TBAA0:![0-9]+]]
 ; CHECK-NEXT:    [[TMP_SROA_2_0_WISHART_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[WISHART]], i64 8
-; CHECK-NEXT:    [[TMP_SROA_2_0_COPYLOAD:%.*]] = load i32, ptr [[TMP_SROA_2_0_WISHART_SROA_IDX]], align 8, !tbaa [[TBAA4:![0-9]+]]
+; CHECK-NEXT:    [[TMP_SROA_2_0_COPYLOAD:%.*]] = load i32, ptr [[TMP_SROA_2_0_WISHART_SROA_IDX]], align 8, !tbaa [[INT_TBAA4:![0-9]+]]
 ; CHECK-NEXT:    [[TMP_SROA_3_0_WISHART_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[WISHART]], i64 12
 ; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP_SROA_3]], ptr align 4 [[TMP_SROA_3_0_WISHART_SROA_IDX]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT6:![0-9]+]]
 ; CHECK-NEXT:    [[CALL:%.*]] = call double @subcall(double [[TMP_SROA_0_0_COPYLOAD]], i32 [[TMP_SROA_2_0_COPYLOAD]])
@@ -38,11 +39,11 @@ define double @bar(ptr %wishart) {
 ;.
 ; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
 ;.
-; CHECK: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
+; CHECK: [[DOUBLE_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
 ; CHECK: [[META1]] = !{!"double", [[META2:![0-9]+]], i64 0}
 ; CHECK: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0}
 ; CHECK: [[META3]] = !{!"Simple C++ TBAA"}
-; CHECK: [[TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0}
+; CHECK: [[INT_TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0}
 ; CHECK: [[META5]] = !{!"int", [[META2]], i64 0}
 ; CHECK: [[TBAA_STRUCT6]] = !{}
 ;.
diff --git a/llvm/test/Transforms/SROA/tbaa-struct3.ll b/llvm/test/Transforms/SROA/tbaa-struct3.ll
index 5326b98..6a0cacc 100644
--- a/llvm/test/Transforms/SROA/tbaa-struct3.ll
+++ b/llvm/test/Transforms/SROA/tbaa-struct3.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt -p sroa -S %s | FileCheck %s
 
 
@@ -7,12 +7,12 @@ target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-
 define void @load_store_transfer_split_struct_tbaa_2_float(ptr dereferenceable(24) %res, float %a, float %b) {
 ; CHECK-LABEL: define void @load_store_transfer_split_struct_tbaa_2_float(
 ; CHECK-SAME: ptr dereferenceable(24) [[RES:%.*]], float [[A:%.*]], float [[B:%.*]]) {
-; CHECK-NEXT:  entry:
+; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float [[A]] to i32
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float [[B]] to i32
-; CHECK-NEXT:    store i32 [[TMP0]], ptr [[RES]], align 4, !tbaa [[TBAA0:![0-9]+]]
+; CHECK-NEXT:    store i32 [[TMP0]], ptr [[RES]], align 4, !tbaa [[FLOAT_TBAA0:![0-9]+]]
 ; CHECK-NEXT:    [[RES_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[RES]], i64 4
-; CHECK-NEXT:    store i32 [[TMP1]], ptr [[RES_SROA_IDX]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    store i32 [[TMP1]], ptr [[RES_SROA_IDX]], align 4, !tbaa [[FLOAT_TBAA0]]
 ; CHECK-NEXT:    [[P:%.*]] = load ptr, ptr [[RES]], align 8
 ; CHECK-NEXT:    ret void
 ;
@@ -30,11 +30,11 @@ entry:
 define void @memcpy_transfer(ptr dereferenceable(24) %res, float %a, float %b) {
 ; CHECK-LABEL: define void @memcpy_transfer(
 ; CHECK-SAME: ptr dereferenceable(24) [[RES:%.*]], float [[A:%.*]], float [[B:%.*]]) {
-; CHECK-NEXT:  entry:
+; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[L_PTR:%.*]] = load ptr, ptr [[RES]], align 8
-; CHECK-NEXT:    store float [[A]], ptr [[L_PTR]], align 1, !tbaa [[TBAA0]]
+; CHECK-NEXT:    store float [[A]], ptr [[L_PTR]], align 1, !tbaa [[FLOAT_TBAA0]]
 ; CHECK-NEXT:    [[TMP_SROA_2_0_L_PTR_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[L_PTR]], i64 4
-; CHECK-NEXT:    store float [[B]], ptr [[TMP_SROA_2_0_L_PTR_SROA_IDX]], align 1, !tbaa [[TBAA0]]
+; CHECK-NEXT:    store float [[B]], ptr [[TMP_SROA_2_0_L_PTR_SROA_IDX]], align 1, !tbaa [[FLOAT_TBAA0]]
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -50,9 +50,9 @@ entry:
 define void @memcpy_transfer_tbaa_field_and_size_do_not_align(ptr dereferenceable(24) %res, float %a, float %b) {
 ; CHECK-LABEL: define void @memcpy_transfer_tbaa_field_and_size_do_not_align(
 ; CHECK-SAME: ptr dereferenceable(24) [[RES:%.*]], float [[A:%.*]], float [[B:%.*]]) {
-; CHECK-NEXT:  entry:
+; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[L_PTR:%.*]] = load ptr, ptr [[RES]], align 8
-; CHECK-NEXT:    store float [[A]], ptr [[L_PTR]], align 1, !tbaa [[TBAA0]]
+; CHECK-NEXT:    store float [[A]], ptr [[L_PTR]], align 1, !tbaa [[FLOAT_TBAA0]]
 ; CHECK-NEXT:    [[TMP_SROA_2_0_L_PTR_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[L_PTR]], i64 4
 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float [[B]] to i32
 ; CHECK-NEXT:    [[TMP_SROA_2_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[TMP0]] to i16
@@ -72,7 +72,7 @@ entry:
 define void @load_store_transfer_split_struct_tbaa_2_i31(ptr dereferenceable(24) %res, i31 %a, i31 %b) {
 ; CHECK-LABEL: define void @load_store_transfer_split_struct_tbaa_2_i31(
 ; CHECK-SAME: ptr dereferenceable(24) [[RES:%.*]], i31 [[A:%.*]], i31 [[B:%.*]]) {
-; CHECK-NEXT:  entry:
+; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[TMP:%.*]] = alloca { i31, i31 }, align 4
 ; CHECK-NEXT:    store i31 [[A]], ptr [[TMP]], align 4
 ; CHECK-NEXT:    [[TMP_4_TMP_4_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[TMP]], i64 4
@@ -98,9 +98,9 @@ define void @store_vector_part_first(ptr %y2, float %f) {
 ; CHECK-LABEL: define void @store_vector_part_first(
 ; CHECK-SAME: ptr [[Y2:%.*]], float [[F:%.*]]) {
 ; CHECK-NEXT:    [[V_1:%.*]] = call <2 x float> @foo(ptr [[Y2]])
-; CHECK-NEXT:    store <2 x float> [[V_1]], ptr [[Y2]], align 8, !tbaa [[TBAA5:![0-9]+]]
+; CHECK-NEXT:    store <2 x float> [[V_1]], ptr [[Y2]], align 8, !tbaa [[V2F32_TBAA5:![0-9]+]]
 ; CHECK-NEXT:    [[X7_SROA_2_0_Y2_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[Y2]], i64 8
-; CHECK-NEXT:    store float [[F]], ptr [[X7_SROA_2_0_Y2_SROA_IDX]], align 8, !tbaa [[TBAA0]]
+; CHECK-NEXT:    store float [[F]], ptr [[X7_SROA_2_0_Y2_SROA_IDX]], align 8, !tbaa [[FLOAT_TBAA0]]
 ; CHECK-NEXT:    ret void
 ;
   %x7 = alloca { float, float, float, float }
@@ -116,9 +116,9 @@ define void @store_vector_part_second(ptr %y2, float %f) {
 ; CHECK-LABEL: define void @store_vector_part_second(
 ; CHECK-SAME: ptr [[Y2:%.*]], float [[F:%.*]]) {
 ; CHECK-NEXT:    [[V_1:%.*]] = call <2 x float> @foo(ptr [[Y2]])
-; CHECK-NEXT:    store float [[F]], ptr [[Y2]], align 8, !tbaa [[TBAA0]]
+; CHECK-NEXT:    store float [[F]], ptr [[Y2]], align 8, !tbaa [[FLOAT_TBAA0]]
 ; CHECK-NEXT:    [[X7_SROA_2_0_Y2_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[Y2]], i64 4
-; CHECK-NEXT:    store <2 x float> [[V_1]], ptr [[X7_SROA_2_0_Y2_SROA_IDX]], align 4, !tbaa [[TBAA5]]
+; CHECK-NEXT:    store <2 x float> [[V_1]], ptr [[X7_SROA_2_0_Y2_SROA_IDX]], align 4, !tbaa [[V2F32_TBAA5]]
 ; CHECK-NEXT:    ret void
 ;
   %x7 = alloca { float, float, float, float }
@@ -134,7 +134,7 @@ define void @store_vector_single(ptr %y2, float %f) {
 ; CHECK-LABEL: define void @store_vector_single(
 ; CHECK-SAME: ptr [[Y2:%.*]], float [[F:%.*]]) {
 ; CHECK-NEXT:    [[V_1:%.*]] = call <2 x float> @foo(ptr [[Y2]])
-; CHECK-NEXT:    store <2 x float> [[V_1]], ptr [[Y2]], align 4, !tbaa [[TBAA5]]
+; CHECK-NEXT:    store <2 x float> [[V_1]], ptr [[Y2]], align 4, !tbaa [[V2F32_TBAA5]]
 ; CHECK-NEXT:    ret void
 ;
   %x7 = alloca { float, float }
@@ -149,7 +149,7 @@ declare void @llvm.memset.p0.i8(ptr nocapture, i8, i32, i1) nounwind
 define void @memset(ptr %dst, ptr align 8 %src) {
 ; CHECK-LABEL: define void @memset(
 ; CHECK-SAME: ptr [[DST:%.*]], ptr align 8 [[SRC:%.*]]) {
-; CHECK-NEXT:  entry:
+; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[A_SROA_0:%.*]] = alloca [7 x i8], align 1
 ; CHECK-NEXT:    [[A_SROA_3:%.*]] = alloca i16, align 2
 ; CHECK-NEXT:    [[A_SROA_4:%.*]] = alloca [10 x i8], align 1
@@ -162,7 +162,7 @@ define void @memset(ptr %dst, ptr align 8 %src) {
 ; CHECK-NEXT:    store i16 1, ptr [[A_SROA_3]], align 2
 ; CHECK-NEXT:    [[A_SROA_0_1_A_1_SROA_IDX2:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_0]], i64 1
 ; CHECK-NEXT:    call void @llvm.memset.p0.i32(ptr align 1 [[A_SROA_0_1_A_1_SROA_IDX2]], i8 42, i32 6, i1 false)
-; CHECK-NEXT:    store i16 10794, ptr [[A_SROA_3]], align 2, !tbaa [[TBAA0]]
+; CHECK-NEXT:    store i16 10794, ptr [[A_SROA_3]], align 2, !tbaa [[FLOAT_TBAA0]]
 ; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[DST]], ptr align 1 [[A_SROA_0]], i32 7, i1 true)
 ; CHECK-NEXT:    [[A_SROA_3_0_DST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 7
 ; CHECK-NEXT:    [[A_SROA_3_0_A_SROA_3_0_COPYLOAD1:%.*]] = load volatile i16, ptr [[A_SROA_3]], align 2
@@ -187,7 +187,7 @@ entry:
 define void @memset2(ptr %dst, ptr align 8 %src) {
 ; CHECK-LABEL: define void @memset2(
 ; CHECK-SAME: ptr [[DST:%.*]], ptr align 8 [[SRC:%.*]]) {
-; CHECK-NEXT:  entry:
+; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[A_SROA_0:%.*]] = alloca [209 x i8], align 1
 ; CHECK-NEXT:    [[A_SROA_3:%.*]] = alloca i8, align 1
 ; CHECK-NEXT:    [[A_SROA_4:%.*]] = alloca [90 x i8], align 1
@@ -199,8 +199,8 @@ define void @memset2(ptr %dst, ptr align 8 %src) {
 ; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_4]], ptr align 2 [[A_SROA_4_0_SRC_SROA_IDX]], i32 90, i1 false)
 ; CHECK-NEXT:    store i8 1, ptr [[A_SROA_3]], align 1
 ; CHECK-NEXT:    [[A_SROA_0_202_A_202_SROA_IDX2:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_0]], i64 202
-; CHECK-NEXT:    call void @llvm.memset.p0.i32(ptr align 1 [[A_SROA_0_202_A_202_SROA_IDX2]], i8 42, i32 7, i1 false), !tbaa [[TBAA5]]
-; CHECK-NEXT:    store i8 42, ptr [[A_SROA_3]], align 1, !tbaa [[TBAA5]]
+; CHECK-NEXT:    call void @llvm.memset.p0.i32(ptr align 1 [[A_SROA_0_202_A_202_SROA_IDX2]], i8 42, i32 7, i1 false), !tbaa [[V2F32_TBAA5]]
+; CHECK-NEXT:    store i8 42, ptr [[A_SROA_3]], align 1, !tbaa [[V2F32_TBAA5]]
 ; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[DST]], ptr align 1 [[A_SROA_0]], i32 209, i1 true)
 ; CHECK-NEXT:    [[A_SROA_3_0_DST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 209
 ; CHECK-NEXT:    [[A_SROA_3_0_A_SROA_3_0_COPYLOAD1:%.*]] = load volatile i8, ptr [[A_SROA_3]], align 1
@@ -233,7 +233,7 @@ entry:
 define void @slice_store_v2i8_1(ptr %dst, ptr %dst.2, ptr %src) {
 ; CHECK-LABEL: define void @slice_store_v2i8_1(
 ; CHECK-SAME: ptr [[DST:%.*]], ptr [[DST_2:%.*]], ptr [[SRC:%.*]]) {
-; CHECK-NEXT:  entry:
+; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[A_SROA_0:%.*]] = alloca [6 x i8], align 1
 ; CHECK-NEXT:    [[A_SROA_2_SROA_0:%.*]] = alloca <2 x i8>, align 4
 ; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_0]], ptr align 8 [[SRC]], i32 6, i1 false)
@@ -268,7 +268,7 @@ entry:
 define void @slice_store_v2i8_2(ptr %dst, ptr %dst.2, ptr %src) {
 ; CHECK-LABEL: define void @slice_store_v2i8_2(
 ; CHECK-SAME: ptr [[DST:%.*]], ptr [[DST_2:%.*]], ptr [[SRC:%.*]]) {
-; CHECK-NEXT:  entry:
+; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[A_SROA_0_SROA_1:%.*]] = alloca <2 x i8>, align 2
 ; CHECK-NEXT:    [[A_SROA_0_SROA_4:%.*]] = alloca i8, align 1
 ; CHECK-NEXT:    [[A_SROA_4:%.*]] = alloca [5 x i8], align 1
@@ -317,7 +317,7 @@ define double @tbaa_struct_load(ptr %src, ptr %dst) {
 ; CHECK-NEXT:    [[TMP_SROA_3_0_SRC_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 8
 ; CHECK-NEXT:    [[TMP_SROA_3_0_COPYLOAD:%.*]] = load i64, ptr [[TMP_SROA_3_0_SRC_SROA_IDX]], align 8
 ; CHECK-NEXT:    store i64 [[TMP_SROA_3_0_COPYLOAD]], ptr [[TMP_SROA_3]], align 8
-; CHECK-NEXT:    [[TMP_SROA_0_0_TMP_SROA_0_0_LG:%.*]] = load double, ptr [[TMP_SROA_0]], align 8, !tbaa [[TBAA5]]
+; CHECK-NEXT:    [[TMP_SROA_0_0_TMP_SROA_0_0_LG:%.*]] = load double, ptr [[TMP_SROA_0]], align 8, !tbaa [[V2F32_TBAA5]]
 ; CHECK-NEXT:    [[TMP_SROA_0_0_TMP_SROA_0_0_COPYLOAD1:%.*]] = load volatile double, ptr [[TMP_SROA_0]], align 8
 ; CHECK-NEXT:    store volatile double [[TMP_SROA_0_0_TMP_SROA_0_0_COPYLOAD1]], ptr [[DST]], align 8
 ; CHECK-NEXT:    [[TMP_SROA_3_0_DST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 8
@@ -335,9 +335,9 @@ define double @tbaa_struct_load(ptr %src, ptr %dst) {
 define i32 @shorten_integer_store_single_field(ptr %dst, ptr %dst.2, ptr %src) {
 ; CHECK-LABEL: define i32 @shorten_integer_store_single_field(
 ; CHECK-SAME: ptr [[DST:%.*]], ptr [[DST_2:%.*]], ptr [[SRC:%.*]]) {
-; CHECK-NEXT:  entry:
+; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[A_SROA_0:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    store i32 123, ptr [[A_SROA_0]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    store i32 123, ptr [[A_SROA_0]], align 4, !tbaa [[FLOAT_TBAA0]]
 ; CHECK-NEXT:    [[A_SROA_0_0_A_SROA_0_0_L:%.*]] = load i32, ptr [[A_SROA_0]], align 4
 ; CHECK-NEXT:    [[A_SROA_0_0_A_SROA_0_0_COPYLOAD:%.*]] = load volatile i32, ptr [[A_SROA_0]], align 4
 ; CHECK-NEXT:    store volatile i32 [[A_SROA_0_0_A_SROA_0_0_COPYLOAD]], ptr [[DST]], align 1
@@ -354,9 +354,9 @@ entry:
 define i32 @shorten_integer_store_multiple_fields(ptr %dst, ptr %dst.2, ptr %src) {
 ; CHECK-LABEL: define i32 @shorten_integer_store_multiple_fields(
 ; CHECK-SAME: ptr [[DST:%.*]], ptr [[DST_2:%.*]], ptr [[SRC:%.*]]) {
-; CHECK-NEXT:  entry:
+; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[A_SROA_0:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    store i32 123, ptr [[A_SROA_0]], align 4, !tbaa [[TBAA5]]
+; CHECK-NEXT:    store i32 123, ptr [[A_SROA_0]], align 4, !tbaa [[V2F32_TBAA5]]
 ; CHECK-NEXT:    [[A_SROA_0_0_A_SROA_0_0_L:%.*]] = load i32, ptr [[A_SROA_0]], align 4
 ; CHECK-NEXT:    [[A_SROA_0_0_A_SROA_0_0_COPYLOAD:%.*]] = load volatile i32, ptr [[A_SROA_0]], align 4
 ; CHECK-NEXT:    store volatile i32 [[A_SROA_0_0_A_SROA_0_0_COPYLOAD]], ptr [[DST]], align 1
@@ -373,7 +373,7 @@ entry:
 define <2 x i16> @shorten_vector_store_multiple_fields(ptr %dst, ptr %dst.2, ptr %src) {
 ; CHECK-LABEL: define <2 x i16> @shorten_vector_store_multiple_fields(
 ; CHECK-SAME: ptr [[DST:%.*]], ptr [[DST_2:%.*]], ptr [[SRC:%.*]]) {
-; CHECK-NEXT:  entry:
+; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[A_SROA_0:%.*]] = alloca <2 x i32>, align 8
 ; CHECK-NEXT:    store <2 x i32> <i32 1, i32 2>, ptr [[A_SROA_0]], align 8
 ; CHECK-NEXT:    [[A_SROA_0_0_A_SROA_0_0_L:%.*]] = load <2 x i16>, ptr [[A_SROA_0]], align 8
@@ -391,7 +391,7 @@ entry:
 define <2 x i16> @shorten_vector_store_single_fields(ptr %dst, ptr %dst.2, ptr %src) {
 ; CHECK-LABEL: define <2 x i16> @shorten_vector_store_single_fields(
 ; CHECK-SAME: ptr [[DST:%.*]], ptr [[DST_2:%.*]], ptr [[SRC:%.*]]) {
-; CHECK-NEXT:  entry:
+; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[A_SROA_0:%.*]] = alloca <2 x i32>, align 8
 ; CHECK-NEXT:    store <2 x i32> <i32 1, i32 2>, ptr [[A_SROA_0]], align 8
 ; CHECK-NEXT:    [[A_SROA_0_0_A_SROA_0_0_L:%.*]] = load <2 x i16>, ptr [[A_SROA_0]], align 8
@@ -409,7 +409,7 @@ entry:
 define i32 @split_load_with_tbaa_struct(i32 %x, ptr %src, ptr %dst) {
 ; CHECK-LABEL: define i32 @split_load_with_tbaa_struct(
 ; CHECK-SAME: i32 [[X:%.*]], ptr [[SRC:%.*]], ptr [[DST:%.*]]) {
-; CHECK-NEXT:  entry:
+; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[A3_SROA_0:%.*]] = alloca i16, align 8
 ; CHECK-NEXT:    [[A3_SROA_3:%.*]] = alloca i16, align 2
 ; CHECK-NEXT:    [[A3_SROA_33:%.*]] = alloca float, align 4
@@ -429,11 +429,11 @@ define i32 @split_load_with_tbaa_struct(i32 %x, ptr %src, ptr %dst) {
 ; CHECK-NEXT:    [[A3_SROA_5_0_SRC_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 9
 ; CHECK-NEXT:    [[A3_SROA_5_0_COPYLOAD:%.*]] = load i8, ptr [[A3_SROA_5_0_SRC_SROA_IDX]], align 1
 ; CHECK-NEXT:    store i8 [[A3_SROA_5_0_COPYLOAD]], ptr [[A3_SROA_5]], align 1
-; CHECK-NEXT:    [[A3_SROA_0_0_A3_SROA_0_0_LOAD4_FCA_0_LOAD:%.*]] = load i16, ptr [[A3_SROA_0]], align 8, !tbaa [[TBAA5]]
+; CHECK-NEXT:    [[A3_SROA_0_0_A3_SROA_0_0_LOAD4_FCA_0_LOAD:%.*]] = load i16, ptr [[A3_SROA_0]], align 8, !tbaa [[V2F32_TBAA5]]
 ; CHECK-NEXT:    [[LOAD4_FCA_0_INSERT:%.*]] = insertvalue { i16, float, i8 } poison, i16 [[A3_SROA_0_0_A3_SROA_0_0_LOAD4_FCA_0_LOAD]], 0
-; CHECK-NEXT:    [[A3_SROA_33_0_A3_SROA_33_4_LOAD4_FCA_1_LOAD:%.*]] = load float, ptr [[A3_SROA_33]], align 4, !tbaa [[TBAA5]]
+; CHECK-NEXT:    [[A3_SROA_33_0_A3_SROA_33_4_LOAD4_FCA_1_LOAD:%.*]] = load float, ptr [[A3_SROA_33]], align 4, !tbaa [[V2F32_TBAA5]]
 ; CHECK-NEXT:    [[LOAD4_FCA_1_INSERT:%.*]] = insertvalue { i16, float, i8 } [[LOAD4_FCA_0_INSERT]], float [[A3_SROA_33_0_A3_SROA_33_4_LOAD4_FCA_1_LOAD]], 1
-; CHECK-NEXT:    [[A3_SROA_4_0_A3_SROA_4_8_LOAD4_FCA_2_LOAD:%.*]] = load i8, ptr [[A3_SROA_4]], align 8, !tbaa [[TBAA5]]
+; CHECK-NEXT:    [[A3_SROA_4_0_A3_SROA_4_8_LOAD4_FCA_2_LOAD:%.*]] = load i8, ptr [[A3_SROA_4]], align 8, !tbaa [[V2F32_TBAA5]]
 ; CHECK-NEXT:    [[LOAD4_FCA_2_INSERT:%.*]] = insertvalue { i16, float, i8 } [[LOAD4_FCA_1_INSERT]], i8 [[A3_SROA_4_0_A3_SROA_4_8_LOAD4_FCA_2_LOAD]], 2
 ; CHECK-NEXT:    [[UNWRAP2:%.*]] = extractvalue { i16, float, i8 } [[LOAD4_FCA_2_INSERT]], 1
 ; CHECK-NEXT:    [[VALCAST2:%.*]] = bitcast float [[UNWRAP2]] to i32
@@ -468,7 +468,7 @@ entry:
 define i32 @split_store_with_tbaa_struct(i32 %x, ptr %src, ptr %dst) {
 ; CHECK-LABEL: define i32 @split_store_with_tbaa_struct(
 ; CHECK-SAME: i32 [[X:%.*]], ptr [[SRC:%.*]], ptr [[DST:%.*]]) {
-; CHECK-NEXT:  entry:
+; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[A3_SROA_0:%.*]] = alloca i16, align 8
 ; CHECK-NEXT:    [[A3_SROA_3:%.*]] = alloca i16, align 2
 ; CHECK-NEXT:    [[A3_SROA_33:%.*]] = alloca float, align 4
@@ -492,11 +492,11 @@ define i32 @split_store_with_tbaa_struct(i32 %x, ptr %src, ptr %dst) {
 ; CHECK-NEXT:    [[I_2:%.*]] = insertvalue { i16, float, i8 } [[I_1]], float 3.000000e+00, 1
 ; CHECK-NEXT:    [[I_3:%.*]] = insertvalue { i16, float, i8 } [[I_2]], i8 99, 2
 ; CHECK-NEXT:    [[I_3_FCA_0_EXTRACT:%.*]] = extractvalue { i16, float, i8 } [[I_3]], 0
-; CHECK-NEXT:    store i16 [[I_3_FCA_0_EXTRACT]], ptr [[A3_SROA_0]], align 8, !tbaa [[TBAA5]]
+; CHECK-NEXT:    store i16 [[I_3_FCA_0_EXTRACT]], ptr [[A3_SROA_0]], align 8, !tbaa [[V2F32_TBAA5]]
 ; CHECK-NEXT:    [[I_3_FCA_1_EXTRACT:%.*]] = extractvalue { i16, float, i8 } [[I_3]], 1
-; CHECK-NEXT:    store float [[I_3_FCA_1_EXTRACT]], ptr [[A3_SROA_33]], align 4, !tbaa [[TBAA5]]
+; CHECK-NEXT:    store float [[I_3_FCA_1_EXTRACT]], ptr [[A3_SROA_33]], align 4, !tbaa [[V2F32_TBAA5]]
 ; CHECK-NEXT:    [[I_3_FCA_2_EXTRACT:%.*]] = extractvalue { i16, float, i8 } [[I_3]], 2
-; CHECK-NEXT:    store i8 [[I_3_FCA_2_EXTRACT]], ptr [[A3_SROA_4]], align 8, !tbaa [[TBAA5]]
+; CHECK-NEXT:    store i8 [[I_3_FCA_2_EXTRACT]], ptr [[A3_SROA_4]], align 8, !tbaa [[V2F32_TBAA5]]
 ; CHECK-NEXT:    [[A3_SROA_0_0_A3_SROA_0_0_COPYLOAD1:%.*]] = load volatile i16, ptr [[A3_SROA_0]], align 8
 ; CHECK-NEXT:    store volatile i16 [[A3_SROA_0_0_A3_SROA_0_0_COPYLOAD1]], ptr [[DST]], align 1
 ; CHECK-NEXT:    [[A3_SROA_3_0_DST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 2
@@ -548,11 +548,11 @@ declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias
 !15 = !{i64 0, i64 7, !6, i64 7, i64 1, !6}
 !16 = !{i64 0, i64 2, !6, i64 4, i64 4, !6, i64 8, i64 1, !6}
 ;.
-; CHECK: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
+; CHECK: [[FLOAT_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
 ; CHECK: [[META1]] = !{!"float", [[META2:![0-9]+]], i64 0}
 ; CHECK: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0}
 ; CHECK: [[META3]] = !{!"Simple C++ TBAA"}
-; CHECK: [[TBAA_STRUCT4]] = !{i64 0, i64 4, [[TBAA0]], i64 4, i64 4, [[TBAA0]]}
-; CHECK: [[TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0}
+; CHECK: [[TBAA_STRUCT4]] = !{i64 0, i64 4, [[FLOAT_TBAA0]], i64 4, i64 4, [[FLOAT_TBAA0]]}
+; CHECK: [[V2F32_TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0}
 ; CHECK: [[META6]] = !{!"v2f32", [[META2]], i64 0}
 ;.
diff --git a/llvm/test/Transforms/SROA/tbaa-subload.ll b/llvm/test/Transforms/SROA/tbaa-subload.ll
index b07874d..4c18006 100644
--- a/llvm/test/Transforms/SROA/tbaa-subload.ll
+++ b/llvm/test/Transforms/SROA/tbaa-subload.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 6
 ; RUN: opt -S -passes='sroa<preserve-cfg>' %s | FileCheck %s --check-prefixes=CHECK,CHECK-PRESERVE-CFG
 ; RUN: opt -S -passes='sroa<modify-cfg>' %s | FileCheck %s --check-prefixes=CHECK,CHECK-MODIFY-CFG
 
@@ -8,14 +8,14 @@
 %class.anon = type <{ %class.ar, [7 x i8], { i64, i64 } }>
 
 define void @caller() {
-; CHECK-LABEL: @caller(
-; CHECK-NEXT:  entry:
+; CHECK-LABEL: define void @caller() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[AGG:%.*]] = alloca [[CLASS_ANON:%.*]], align 8
 ; CHECK-NEXT:    [[OFF:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[AGG]], i32 0, i32 2
 ; CHECK-NEXT:    [[DOTFCA_0_GEP:%.*]] = getelementptr inbounds { i64, i64 }, ptr [[OFF]], i32 0, i32 0
-; CHECK-NEXT:    store i64 1, ptr [[DOTFCA_0_GEP]], align 8, !tbaa [[TBAA0:![0-9]+]]
+; CHECK-NEXT:    store i64 1, ptr [[DOTFCA_0_GEP]], align 8, !tbaa [[CHAR_TBAA0:![0-9]+]]
 ; CHECK-NEXT:    [[DOTFCA_1_GEP:%.*]] = getelementptr inbounds { i64, i64 }, ptr [[OFF]], i32 0, i32 1
-; CHECK-NEXT:    store i64 2, ptr [[DOTFCA_1_GEP]], align 8, !tbaa [[TBAA0]]
+; CHECK-NEXT:    store i64 2, ptr [[DOTFCA_1_GEP]], align 8, !tbaa [[CHAR_TBAA0]]
 ; CHECK-NEXT:    call void @use(ptr [[AGG]])
 ; CHECK-NEXT:    ret void
 ;
@@ -36,11 +36,11 @@ declare void @use(ptr %this)
 !8 = !{!"_ZTSZN2ax2baEMS_FvvE2an2arE3$_0", !9, i64 0, !3, i64 8}
 !9 = !{!"_ZTS2ar"}
 ;.
-; CHECK: [[TBAA0]] = !{!1, !3, i64 8}
-; CHECK: [[META1:![0-9]+]] = !{!"_ZTSZN2ax2baEMS_FvvE2an2arE3$_0", !2, i64 0, !3, i64 8}
-; CHECK: [[META2:![0-9]+]] = !{!"_ZTS2ar"}
-; CHECK: [[META3:![0-9]+]] = !{!"omnipotent char", !4, i64 0}
-; CHECK: [[META4:![0-9]+]] = !{!"Simple C++ TBAA"}
+; CHECK: [[CHAR_TBAA0]] = !{[[META1:![0-9]+]], [[META3:![0-9]+]], i64 8}
+; CHECK: [[META1]] = !{!"_ZTSZN2ax2baEMS_FvvE2an2arE3$_0", [[META2:![0-9]+]], i64 0, [[META3]], i64 8}
+; CHECK: [[META2]] = !{!"_ZTS2ar"}
+; CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0}
+; CHECK: [[META4]] = !{!"Simple C++ TBAA"}
 ;.
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; CHECK-MODIFY-CFG: {{.*}}
diff --git a/llvm/test/Transforms/Scalarizer/basic-inseltpoison.ll b/llvm/test/Transforms/Scalarizer/basic-inseltpoison.ll
index 6cb94e8..af152d4 100644
--- a/llvm/test/Transforms/Scalarizer/basic-inseltpoison.ll
+++ b/llvm/test/Transforms/Scalarizer/basic-inseltpoison.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt %s -passes='function(scalarizer<load-store>,dce)' -S | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
@@ -6,21 +6,22 @@ declare <4 x float> @ext(<4 x float>)
 @g = global <4 x float> zeroinitializer
 
 define void @f1(<4 x float> %init, ptr %base, i32 %count) {
-; CHECK-LABEL: @f1(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[INIT_I0:%.*]] = extractelement <4 x float> [[INIT:%.*]], i64 0
+; CHECK-LABEL: define void @f1(
+; CHECK-SAME: <4 x float> [[INIT:%.*]], ptr [[BASE:%.*]], i32 [[COUNT:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[INIT_I0:%.*]] = extractelement <4 x float> [[INIT]], i64 0
 ; CHECK-NEXT:    [[INIT_I1:%.*]] = extractelement <4 x float> [[INIT]], i64 1
 ; CHECK-NEXT:    [[INIT_I2:%.*]] = extractelement <4 x float> [[INIT]], i64 2
 ; CHECK-NEXT:    [[INIT_I3:%.*]] = extractelement <4 x float> [[INIT]], i64 3
-; CHECK-NEXT:    br label [[LOOP:%.*]]
-; CHECK:       loop:
-; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[COUNT:%.*]], [[ENTRY:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[ACC_I0:%.*]] = phi float [ [[INIT_I0]], [[ENTRY]] ], [ [[SEL_I0:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[ACC_I1:%.*]] = phi float [ [[INIT_I1]], [[ENTRY]] ], [ [[SEL_I1:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[ACC_I2:%.*]] = phi float [ [[INIT_I2]], [[ENTRY]] ], [ [[SEL_I2:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[ACC_I3:%.*]] = phi float [ [[INIT_I3]], [[ENTRY]] ], [ [[SEL_I3:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[COUNT]], %[[ENTRY]] ], [ [[NEXTI:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[ACC_I0:%.*]] = phi float [ [[INIT_I0]], %[[ENTRY]] ], [ [[SEL_I0:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[ACC_I1:%.*]] = phi float [ [[INIT_I1]], %[[ENTRY]] ], [ [[SEL_I1:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[ACC_I2:%.*]] = phi float [ [[INIT_I2]], %[[ENTRY]] ], [ [[SEL_I2:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[ACC_I3:%.*]] = phi float [ [[INIT_I3]], %[[ENTRY]] ], [ [[SEL_I3:%.*]], %[[LOOP]] ]
 ; CHECK-NEXT:    [[NEXTI]] = sub i32 [[I]], 1
-; CHECK-NEXT:    [[PTR:%.*]] = getelementptr <4 x float>, ptr [[BASE:%.*]], i32 [[I]]
+; CHECK-NEXT:    [[PTR:%.*]] = getelementptr <4 x float>, ptr [[BASE]], i32 [[I]]
 ; CHECK-NEXT:    [[VAL_I0:%.*]] = load float, ptr [[PTR]], align 16
 ; CHECK-NEXT:    [[PTR_I1:%.*]] = getelementptr float, ptr [[PTR]], i32 1
 ; CHECK-NEXT:    [[VAL_I1:%.*]] = load float, ptr [[PTR_I1]], align 4
@@ -54,8 +55,8 @@ define void @f1(<4 x float> %init, ptr %base, i32 %count) {
 ; CHECK-NEXT:    store float [[SEL_I2]], ptr [[PTR_I2]], align 8
 ; CHECK-NEXT:    store float [[SEL_I3]], ptr [[PTR_I3]], align 4
 ; CHECK-NEXT:    [[TEST:%.*]] = icmp eq i32 [[NEXTI]], 0
-; CHECK-NEXT:    br i1 [[TEST]], label [[LOOP]], label [[EXIT:%.*]]
-; CHECK:       exit:
+; CHECK-NEXT:    br i1 [[TEST]], label %[[LOOP]], label %[[EXIT:.*]]
+; CHECK:       [[EXIT]]:
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -92,21 +93,22 @@ exit:
 }
 
 define void @f2(<4 x i32> %init, ptr %base, i32 %count) {
-; CHECK-LABEL: @f2(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[INIT_I0:%.*]] = extractelement <4 x i32> [[INIT:%.*]], i64 0
+; CHECK-LABEL: define void @f2(
+; CHECK-SAME: <4 x i32> [[INIT:%.*]], ptr [[BASE:%.*]], i32 [[COUNT:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[INIT_I0:%.*]] = extractelement <4 x i32> [[INIT]], i64 0
 ; CHECK-NEXT:    [[INIT_I1:%.*]] = extractelement <4 x i32> [[INIT]], i64 1
 ; CHECK-NEXT:    [[INIT_I2:%.*]] = extractelement <4 x i32> [[INIT]], i64 2
 ; CHECK-NEXT:    [[INIT_I3:%.*]] = extractelement <4 x i32> [[INIT]], i64 3
-; CHECK-NEXT:    br label [[LOOP:%.*]]
-; CHECK:       loop:
-; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[COUNT:%.*]], [[ENTRY:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[ACC_I0:%.*]] = phi i32 [ [[INIT_I0]], [[ENTRY]] ], [ [[SEL_I0:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[ACC_I1:%.*]] = phi i32 [ [[INIT_I1]], [[ENTRY]] ], [ [[SEL_I1:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[ACC_I2:%.*]] = phi i32 [ [[INIT_I2]], [[ENTRY]] ], [ [[SEL_I2:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[ACC_I3:%.*]] = phi i32 [ [[INIT_I3]], [[ENTRY]] ], [ [[SEL_I3:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[COUNT]], %[[ENTRY]] ], [ [[NEXTI:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[ACC_I0:%.*]] = phi i32 [ [[INIT_I0]], %[[ENTRY]] ], [ [[SEL_I0:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[ACC_I1:%.*]] = phi i32 [ [[INIT_I1]], %[[ENTRY]] ], [ [[SEL_I1:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[ACC_I2:%.*]] = phi i32 [ [[INIT_I2]], %[[ENTRY]] ], [ [[SEL_I2:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[ACC_I3:%.*]] = phi i32 [ [[INIT_I3]], %[[ENTRY]] ], [ [[SEL_I3:%.*]], %[[LOOP]] ]
 ; CHECK-NEXT:    [[NEXTI]] = sub i32 [[I]], 1
-; CHECK-NEXT:    [[PTR:%.*]] = getelementptr <4 x i8>, ptr [[BASE:%.*]], i32 [[I]]
+; CHECK-NEXT:    [[PTR:%.*]] = getelementptr <4 x i8>, ptr [[BASE]], i32 [[I]]
 ; CHECK-NEXT:    [[VAL_I0:%.*]] = load i8, ptr [[PTR]], align 4
 ; CHECK-NEXT:    [[PTR_I1:%.*]] = getelementptr i8, ptr [[PTR]], i32 1
 ; CHECK-NEXT:    [[VAL_I1:%.*]] = load i8, ptr [[PTR_I1]], align 1
@@ -139,8 +141,8 @@ define void @f2(<4 x i32> %init, ptr %base, i32 %count) {
 ; CHECK-NEXT:    store i8 [[TRUNC_I2]], ptr [[PTR_I2]], align 2
 ; CHECK-NEXT:    store i8 [[TRUNC_I3]], ptr [[PTR_I3]], align 1
 ; CHECK-NEXT:    [[TEST:%.*]] = icmp eq i32 [[NEXTI]], 0
-; CHECK-NEXT:    br i1 [[TEST]], label [[LOOP]], label [[EXIT:%.*]]
-; CHECK:       exit:
+; CHECK-NEXT:    br i1 [[TEST]], label %[[LOOP]], label %[[EXIT:.*]]
+; CHECK:       [[EXIT]]:
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -172,25 +174,26 @@ exit:
 
 ; Check that !tbaa information is preserved.
 define void @f3(ptr %src, ptr %dst) {
-; CHECK-LABEL: @f3(
-; CHECK-NEXT:    [[DST_I1:%.*]] = getelementptr i32, ptr [[DST:%.*]], i32 1
+; CHECK-LABEL: define void @f3(
+; CHECK-SAME: ptr [[SRC:%.*]], ptr [[DST:%.*]]) {
+; CHECK-NEXT:    [[DST_I1:%.*]] = getelementptr i32, ptr [[DST]], i32 1
 ; CHECK-NEXT:    [[DST_I2:%.*]] = getelementptr i32, ptr [[DST]], i32 2
 ; CHECK-NEXT:    [[DST_I3:%.*]] = getelementptr i32, ptr [[DST]], i32 3
-; CHECK-NEXT:    [[VAL_I0:%.*]] = load i32, ptr [[SRC:%.*]], align 16, !tbaa [[TBAA0:![0-9]+]]
+; CHECK-NEXT:    [[VAL_I0:%.*]] = load i32, ptr [[SRC]], align 16, !tbaa [[SET1_TBAA0:![0-9]+]]
 ; CHECK-NEXT:    [[SRC_I1:%.*]] = getelementptr i32, ptr [[SRC]], i32 1
-; CHECK-NEXT:    [[VAL_I1:%.*]] = load i32, ptr [[SRC_I1]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    [[VAL_I1:%.*]] = load i32, ptr [[SRC_I1]], align 4, !tbaa [[SET1_TBAA0]]
 ; CHECK-NEXT:    [[SRC_I2:%.*]] = getelementptr i32, ptr [[SRC]], i32 2
-; CHECK-NEXT:    [[VAL_I2:%.*]] = load i32, ptr [[SRC_I2]], align 8, !tbaa [[TBAA0]]
+; CHECK-NEXT:    [[VAL_I2:%.*]] = load i32, ptr [[SRC_I2]], align 8, !tbaa [[SET1_TBAA0]]
 ; CHECK-NEXT:    [[SRC_I3:%.*]] = getelementptr i32, ptr [[SRC]], i32 3
-; CHECK-NEXT:    [[VAL_I3:%.*]] = load i32, ptr [[SRC_I3]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    [[VAL_I3:%.*]] = load i32, ptr [[SRC_I3]], align 4, !tbaa [[SET1_TBAA0]]
 ; CHECK-NEXT:    [[ADD_I0:%.*]] = add i32 [[VAL_I0]], [[VAL_I0]]
 ; CHECK-NEXT:    [[ADD_I1:%.*]] = add i32 [[VAL_I1]], [[VAL_I1]]
 ; CHECK-NEXT:    [[ADD_I2:%.*]] = add i32 [[VAL_I2]], [[VAL_I2]]
 ; CHECK-NEXT:    [[ADD_I3:%.*]] = add i32 [[VAL_I3]], [[VAL_I3]]
-; CHECK-NEXT:    store i32 [[ADD_I0]], ptr [[DST]], align 16, !tbaa [[TBAA3:![0-9]+]]
-; CHECK-NEXT:    store i32 [[ADD_I1]], ptr [[DST_I1]], align 4, !tbaa [[TBAA3]]
-; CHECK-NEXT:    store i32 [[ADD_I2]], ptr [[DST_I2]], align 8, !tbaa [[TBAA3]]
-; CHECK-NEXT:    store i32 [[ADD_I3]], ptr [[DST_I3]], align 4, !tbaa [[TBAA3]]
+; CHECK-NEXT:    store i32 [[ADD_I0]], ptr [[DST]], align 16, !tbaa [[SET2_TBAA3:![0-9]+]]
+; CHECK-NEXT:    store i32 [[ADD_I1]], ptr [[DST_I1]], align 4, !tbaa [[SET2_TBAA3]]
+; CHECK-NEXT:    store i32 [[ADD_I2]], ptr [[DST_I2]], align 8, !tbaa [[SET2_TBAA3]]
+; CHECK-NEXT:    store i32 [[ADD_I3]], ptr [[DST_I3]], align 4, !tbaa [[SET2_TBAA3]]
 ; CHECK-NEXT:    ret void
 ;
   %val = load <4 x i32> , ptr %src, !tbaa !1
@@ -201,11 +204,12 @@ define void @f3(ptr %src, ptr %dst) {
 
 ; Check that !tbaa.struct information is preserved.
 define void @f4(ptr %src, ptr %dst) {
-; CHECK-LABEL: @f4(
-; CHECK-NEXT:    [[DST_I1:%.*]] = getelementptr i32, ptr [[DST:%.*]], i32 1
+; CHECK-LABEL: define void @f4(
+; CHECK-SAME: ptr [[SRC:%.*]], ptr [[DST:%.*]]) {
+; CHECK-NEXT:    [[DST_I1:%.*]] = getelementptr i32, ptr [[DST]], i32 1
 ; CHECK-NEXT:    [[DST_I2:%.*]] = getelementptr i32, ptr [[DST]], i32 2
 ; CHECK-NEXT:    [[DST_I3:%.*]] = getelementptr i32, ptr [[DST]], i32 3
-; CHECK-NEXT:    [[VAL_I0:%.*]] = load i32, ptr [[SRC:%.*]], align 16, !tbaa.struct [[TBAA_STRUCT5:![0-9]+]]
+; CHECK-NEXT:    [[VAL_I0:%.*]] = load i32, ptr [[SRC]], align 16, !tbaa.struct [[TBAA_STRUCT5:![0-9]+]]
 ; CHECK-NEXT:    [[SRC_I1:%.*]] = getelementptr i32, ptr [[SRC]], i32 1
 ; CHECK-NEXT:    [[VAL_I1:%.*]] = load i32, ptr [[SRC_I1]], align 4, !tbaa.struct [[TBAA_STRUCT5]]
 ; CHECK-NEXT:    [[SRC_I2:%.*]] = getelementptr i32, ptr [[SRC]], i32 2
@@ -230,16 +234,17 @@ define void @f4(ptr %src, ptr %dst) {
 
 ; Check that llvm.access.group information is preserved.
 define void @f5(i32 %count, ptr %src, ptr %dst) {
-; CHECK-LABEL: @f5(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br label [[LOOP:%.*]]
-; CHECK:       loop:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[NEXT_INDEX:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[THIS_SRC:%.*]] = getelementptr <4 x i32>, ptr [[SRC:%.*]], i32 [[INDEX]]
+; CHECK-LABEL: define void @f5(
+; CHECK-SAME: i32 [[COUNT:%.*]], ptr [[SRC:%.*]], ptr [[DST:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[NEXT_INDEX:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[THIS_SRC:%.*]] = getelementptr <4 x i32>, ptr [[SRC]], i32 [[INDEX]]
 ; CHECK-NEXT:    [[THIS_SRC_I1:%.*]] = getelementptr i32, ptr [[THIS_SRC]], i32 1
 ; CHECK-NEXT:    [[THIS_SRC_I2:%.*]] = getelementptr i32, ptr [[THIS_SRC]], i32 2
 ; CHECK-NEXT:    [[THIS_SRC_I3:%.*]] = getelementptr i32, ptr [[THIS_SRC]], i32 3
-; CHECK-NEXT:    [[THIS_DST:%.*]] = getelementptr <4 x i32>, ptr [[DST:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[THIS_DST:%.*]] = getelementptr <4 x i32>, ptr [[DST]], i32 [[INDEX]]
 ; CHECK-NEXT:    [[THIS_DST_I1:%.*]] = getelementptr i32, ptr [[THIS_DST]], i32 1
 ; CHECK-NEXT:    [[THIS_DST_I2:%.*]] = getelementptr i32, ptr [[THIS_DST]], i32 2
 ; CHECK-NEXT:    [[THIS_DST_I3:%.*]] = getelementptr i32, ptr [[THIS_DST]], i32 3
@@ -256,9 +261,9 @@ define void @f5(i32 %count, ptr %src, ptr %dst) {
 ; CHECK-NEXT:    store i32 [[ADD_I2]], ptr [[THIS_DST_I2]], align 8, !llvm.access.group [[ACC_GRP6]]
 ; CHECK-NEXT:    store i32 [[ADD_I3]], ptr [[THIS_DST_I3]], align 4, !llvm.access.group [[ACC_GRP6]]
 ; CHECK-NEXT:    [[NEXT_INDEX]] = add i32 [[INDEX]], -1
-; CHECK-NEXT:    [[CONTINUE:%.*]] = icmp ne i32 [[NEXT_INDEX]], [[COUNT:%.*]]
-; CHECK-NEXT:    br i1 [[CONTINUE]], label [[LOOP]], label [[END:%.*]], !llvm.loop [[LOOP7:![0-9]+]]
-; CHECK:       end:
+; CHECK-NEXT:    [[CONTINUE:%.*]] = icmp ne i32 [[NEXT_INDEX]], [[COUNT]]
+; CHECK-NEXT:    br i1 [[CONTINUE]], label %[[LOOP]], label %[[END:.*]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK:       [[END]]:
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -281,15 +286,16 @@ end:
 
 ; Check that fpmath information is preserved.
 define <4 x float> @f6(<4 x float> %x) {
-; CHECK-LABEL: @f6(
-; CHECK-NEXT:    [[X_I0:%.*]] = extractelement <4 x float> [[X:%.*]], i64 0
-; CHECK-NEXT:    [[RES_I0:%.*]] = fadd float [[X_I0]], 1.000000e+00, !fpmath !9
+; CHECK-LABEL: define <4 x float> @f6(
+; CHECK-SAME: <4 x float> [[X:%.*]]) {
+; CHECK-NEXT:    [[X_I0:%.*]] = extractelement <4 x float> [[X]], i64 0
+; CHECK-NEXT:    [[RES_I0:%.*]] = fadd float [[X_I0]], 1.000000e+00, !fpmath [[META9:![0-9]+]]
 ; CHECK-NEXT:    [[X_I1:%.*]] = extractelement <4 x float> [[X]], i64 1
-; CHECK-NEXT:    [[RES_I1:%.*]] = fadd float [[X_I1]], 2.000000e+00, !fpmath !9
+; CHECK-NEXT:    [[RES_I1:%.*]] = fadd float [[X_I1]], 2.000000e+00, !fpmath [[META9]]
 ; CHECK-NEXT:    [[X_I2:%.*]] = extractelement <4 x float> [[X]], i64 2
-; CHECK-NEXT:    [[RES_I2:%.*]] = fadd float [[X_I2]], 3.000000e+00, !fpmath !9
+; CHECK-NEXT:    [[RES_I2:%.*]] = fadd float [[X_I2]], 3.000000e+00, !fpmath [[META9]]
 ; CHECK-NEXT:    [[X_I3:%.*]] = extractelement <4 x float> [[X]], i64 3
-; CHECK-NEXT:    [[RES_I3:%.*]] = fadd float [[X_I3]], 4.000000e+00, !fpmath !9
+; CHECK-NEXT:    [[RES_I3:%.*]] = fadd float [[X_I3]], 4.000000e+00, !fpmath [[META9]]
 ; CHECK-NEXT:    [[RES_UPTO0:%.*]] = insertelement <4 x float> poison, float [[RES_I0]], i64 0
 ; CHECK-NEXT:    [[RES_UPTO1:%.*]] = insertelement <4 x float> [[RES_UPTO0]], float [[RES_I1]], i64 1
 ; CHECK-NEXT:    [[RES_UPTO2:%.*]] = insertelement <4 x float> [[RES_UPTO1]], float [[RES_I2]], i64 2
@@ -303,11 +309,12 @@ define <4 x float> @f6(<4 x float> %x) {
 
 ; Check that random metadata isn't kept.
 define void @f7(ptr %src, ptr %dst) {
-; CHECK-LABEL: @f7(
-; CHECK-NEXT:    [[DST_I1:%.*]] = getelementptr i32, ptr [[DST:%.*]], i32 1
+; CHECK-LABEL: define void @f7(
+; CHECK-SAME: ptr [[SRC:%.*]], ptr [[DST:%.*]]) {
+; CHECK-NEXT:    [[DST_I1:%.*]] = getelementptr i32, ptr [[DST]], i32 1
 ; CHECK-NEXT:    [[DST_I2:%.*]] = getelementptr i32, ptr [[DST]], i32 2
 ; CHECK-NEXT:    [[DST_I3:%.*]] = getelementptr i32, ptr [[DST]], i32 3
-; CHECK-NEXT:    [[VAL_I0:%.*]] = load i32, ptr [[SRC:%.*]], align 16
+; CHECK-NEXT:    [[VAL_I0:%.*]] = load i32, ptr [[SRC]], align 16
 ; CHECK-NEXT:    [[SRC_I1:%.*]] = getelementptr i32, ptr [[SRC]], i32 1
 ; CHECK-NEXT:    [[VAL_I1:%.*]] = load i32, ptr [[SRC_I1]], align 4
 ; CHECK-NEXT:    [[SRC_I2:%.*]] = getelementptr i32, ptr [[SRC]], i32 2
@@ -332,17 +339,18 @@ define void @f7(ptr %src, ptr %dst) {
 
 ; Test GEP with vectors.
 define void @f8(ptr %dest, <4 x ptr> %ptr0, <4 x i32> %i0,
-; CHECK-LABEL: @f8(
-; CHECK-NEXT:    [[DEST_I1:%.*]] = getelementptr ptr, ptr [[DEST:%.*]], i32 1
+; CHECK-LABEL: define void @f8(
+; CHECK-SAME: ptr [[DEST:%.*]], <4 x ptr> [[PTR0:%.*]], <4 x i32> [[I0:%.*]], ptr [[OTHER:%.*]]) {
+; CHECK-NEXT:    [[DEST_I1:%.*]] = getelementptr ptr, ptr [[DEST]], i32 1
 ; CHECK-NEXT:    [[DEST_I2:%.*]] = getelementptr ptr, ptr [[DEST]], i32 2
 ; CHECK-NEXT:    [[DEST_I3:%.*]] = getelementptr ptr, ptr [[DEST]], i32 3
-; CHECK-NEXT:    [[PTR0_I0:%.*]] = extractelement <4 x ptr> [[PTR0:%.*]], i64 0
+; CHECK-NEXT:    [[PTR0_I0:%.*]] = extractelement <4 x ptr> [[PTR0]], i64 0
 ; CHECK-NEXT:    [[PTR0_I2:%.*]] = extractelement <4 x ptr> [[PTR0]], i64 2
 ; CHECK-NEXT:    [[PTR0_I3:%.*]] = extractelement <4 x ptr> [[PTR0]], i64 3
-; CHECK-NEXT:    [[I0_I1:%.*]] = extractelement <4 x i32> [[I0:%.*]], i64 1
+; CHECK-NEXT:    [[I0_I1:%.*]] = extractelement <4 x i32> [[I0]], i64 1
 ; CHECK-NEXT:    [[I0_I3:%.*]] = extractelement <4 x i32> [[I0]], i64 3
 ; CHECK-NEXT:    [[VAL_I0:%.*]] = getelementptr float, ptr [[PTR0_I0]], i32 100
-; CHECK-NEXT:    [[VAL_I1:%.*]] = getelementptr float, ptr [[OTHER:%.*]], i32 [[I0_I1]]
+; CHECK-NEXT:    [[VAL_I1:%.*]] = getelementptr float, ptr [[OTHER]], i32 [[I0_I1]]
 ; CHECK-NEXT:    [[VAL_I2:%.*]] = getelementptr float, ptr [[PTR0_I2]], i32 100
 ; CHECK-NEXT:    [[VAL_I3:%.*]] = getelementptr float, ptr [[PTR0_I3]], i32 [[I0_I3]]
 ; CHECK-NEXT:    store ptr [[VAL_I0]], ptr [[DEST]], align 32
@@ -362,11 +370,12 @@ define void @f8(ptr %dest, <4 x ptr> %ptr0, <4 x i32> %i0,
 
 ; Test the handling of unaligned loads.
 define void @f9(ptr %dest, ptr %src) {
-; CHECK-LABEL: @f9(
-; CHECK-NEXT:    [[DEST_I1:%.*]] = getelementptr float, ptr [[DEST:%.*]], i32 1
+; CHECK-LABEL: define void @f9(
+; CHECK-SAME: ptr [[DEST:%.*]], ptr [[SRC:%.*]]) {
+; CHECK-NEXT:    [[DEST_I1:%.*]] = getelementptr float, ptr [[DEST]], i32 1
 ; CHECK-NEXT:    [[DEST_I2:%.*]] = getelementptr float, ptr [[DEST]], i32 2
 ; CHECK-NEXT:    [[DEST_I3:%.*]] = getelementptr float, ptr [[DEST]], i32 3
-; CHECK-NEXT:    [[VAL_I0:%.*]] = load float, ptr [[SRC:%.*]], align 4
+; CHECK-NEXT:    [[VAL_I0:%.*]] = load float, ptr [[SRC]], align 4
 ; CHECK-NEXT:    [[SRC_I1:%.*]] = getelementptr float, ptr [[SRC]], i32 1
 ; CHECK-NEXT:    [[VAL_I1:%.*]] = load float, ptr [[SRC_I1]], align 4
 ; CHECK-NEXT:    [[SRC_I2:%.*]] = getelementptr float, ptr [[SRC]], i32 2
@@ -386,11 +395,12 @@ define void @f9(ptr %dest, ptr %src) {
 
 ; ...and again with subelement alignment.
 define void @f10(ptr %dest, ptr %src) {
-; CHECK-LABEL: @f10(
-; CHECK-NEXT:    [[DEST_I1:%.*]] = getelementptr float, ptr [[DEST:%.*]], i32 1
+; CHECK-LABEL: define void @f10(
+; CHECK-SAME: ptr [[DEST:%.*]], ptr [[SRC:%.*]]) {
+; CHECK-NEXT:    [[DEST_I1:%.*]] = getelementptr float, ptr [[DEST]], i32 1
 ; CHECK-NEXT:    [[DEST_I2:%.*]] = getelementptr float, ptr [[DEST]], i32 2
 ; CHECK-NEXT:    [[DEST_I3:%.*]] = getelementptr float, ptr [[DEST]], i32 3
-; CHECK-NEXT:    [[VAL_I0:%.*]] = load float, ptr [[SRC:%.*]], align 1
+; CHECK-NEXT:    [[VAL_I0:%.*]] = load float, ptr [[SRC]], align 1
 ; CHECK-NEXT:    [[SRC_I1:%.*]] = getelementptr float, ptr [[SRC]], i32 1
 ; CHECK-NEXT:    [[VAL_I1:%.*]] = load float, ptr [[SRC_I1]], align 1
 ; CHECK-NEXT:    [[SRC_I2:%.*]] = getelementptr float, ptr [[SRC]], i32 2
@@ -410,8 +420,9 @@ define void @f10(ptr %dest, ptr %src) {
 
 ; Test that sub-byte loads aren't scalarized.
 define void @f11(ptr %dest, ptr %src0) {
-; CHECK-LABEL: @f11(
-; CHECK-NEXT:    [[SRC1:%.*]] = getelementptr <32 x i1>, ptr [[SRC0:%.*]], i32 1
+; CHECK-LABEL: define void @f11(
+; CHECK-SAME: ptr [[DEST:%.*]], ptr [[SRC0:%.*]]) {
+; CHECK-NEXT:    [[SRC1:%.*]] = getelementptr <32 x i1>, ptr [[SRC0]], i32 1
 ; CHECK-NEXT:    [[VAL0:%.*]] = load <32 x i1>, ptr [[SRC0]], align 4
 ; CHECK-NEXT:    [[VAL0_I0:%.*]] = extractelement <32 x i1> [[VAL0]], i64 0
 ; CHECK-NEXT:    [[VAL0_I1:%.*]] = extractelement <32 x i1> [[VAL0]], i64 1
@@ -542,7 +553,7 @@ define void @f11(ptr %dest, ptr %src0) {
 ; CHECK-NEXT:    [[AND_UPTO29:%.*]] = insertelement <32 x i1> [[AND_UPTO28]], i1 [[AND_I29]], i64 29
 ; CHECK-NEXT:    [[AND_UPTO30:%.*]] = insertelement <32 x i1> [[AND_UPTO29]], i1 [[AND_I30]], i64 30
 ; CHECK-NEXT:    [[AND:%.*]] = insertelement <32 x i1> [[AND_UPTO30]], i1 [[AND_I31]], i64 31
-; CHECK-NEXT:    store <32 x i1> [[AND]], ptr [[DEST:%.*]], align 4
+; CHECK-NEXT:    store <32 x i1> [[AND]], ptr [[DEST]], align 4
 ; CHECK-NEXT:    ret void
 ;
   %src1 = getelementptr <32 x i1>, ptr %src0, i32 1
@@ -555,12 +566,13 @@ define void @f11(ptr %dest, ptr %src0) {
 
 ; Test vector GEPs with more than one index.
 define void @f13(ptr %dest, <4 x ptr> %ptr, <4 x i32> %i,
-; CHECK-LABEL: @f13(
-; CHECK-NEXT:    [[DEST_I1:%.*]] = getelementptr ptr, ptr [[DEST:%.*]], i32 1
+; CHECK-LABEL: define void @f13(
+; CHECK-SAME: ptr [[DEST:%.*]], <4 x ptr> [[PTR:%.*]], <4 x i32> [[I:%.*]], ptr [[OTHER:%.*]]) {
+; CHECK-NEXT:    [[DEST_I1:%.*]] = getelementptr ptr, ptr [[DEST]], i32 1
 ; CHECK-NEXT:    [[DEST_I2:%.*]] = getelementptr ptr, ptr [[DEST]], i32 2
 ; CHECK-NEXT:    [[DEST_I3:%.*]] = getelementptr ptr, ptr [[DEST]], i32 3
-; CHECK-NEXT:    [[PTR_I0:%.*]] = extractelement <4 x ptr> [[PTR:%.*]], i64 0
-; CHECK-NEXT:    [[I_I0:%.*]] = extractelement <4 x i32> [[I:%.*]], i64 0
+; CHECK-NEXT:    [[PTR_I0:%.*]] = extractelement <4 x ptr> [[PTR]], i64 0
+; CHECK-NEXT:    [[I_I0:%.*]] = extractelement <4 x i32> [[I]], i64 0
 ; CHECK-NEXT:    [[VAL_I0:%.*]] = getelementptr inbounds [4 x float], ptr [[PTR_I0]], i32 0, i32 [[I_I0]]
 ; CHECK-NEXT:    [[PTR_I1:%.*]] = extractelement <4 x ptr> [[PTR]], i64 1
 ; CHECK-NEXT:    [[I_I1:%.*]] = extractelement <4 x i32> [[I]], i64 1
@@ -587,19 +599,20 @@ define void @f13(ptr %dest, <4 x ptr> %ptr, <4 x i32> %i,
 
 ; Test combinations of vector and non-vector PHIs.
 define <4 x float> @f14(<4 x float> %acc, i32 %count) {
-; CHECK-LABEL: @f14(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[ACC_I0:%.*]] = extractelement <4 x float> [[ACC:%.*]], i64 0
+; CHECK-LABEL: define <4 x float> @f14(
+; CHECK-SAME: <4 x float> [[ACC:%.*]], i32 [[COUNT:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[ACC_I0:%.*]] = extractelement <4 x float> [[ACC]], i64 0
 ; CHECK-NEXT:    [[ACC_I1:%.*]] = extractelement <4 x float> [[ACC]], i64 1
 ; CHECK-NEXT:    [[ACC_I2:%.*]] = extractelement <4 x float> [[ACC]], i64 2
 ; CHECK-NEXT:    [[ACC_I3:%.*]] = extractelement <4 x float> [[ACC]], i64 3
-; CHECK-NEXT:    br label [[LOOP:%.*]]
-; CHECK:       loop:
-; CHECK-NEXT:    [[THIS_ACC_I0:%.*]] = phi float [ [[ACC_I0]], [[ENTRY:%.*]] ], [ [[NEXT_ACC_I0:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[THIS_ACC_I1:%.*]] = phi float [ [[ACC_I1]], [[ENTRY]] ], [ [[NEXT_ACC_I1:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[THIS_ACC_I2:%.*]] = phi float [ [[ACC_I2]], [[ENTRY]] ], [ [[NEXT_ACC_I2:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[THIS_ACC_I3:%.*]] = phi float [ [[ACC_I3]], [[ENTRY]] ], [ [[NEXT_ACC_I3:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[THIS_COUNT:%.*]] = phi i32 [ [[COUNT:%.*]], [[ENTRY]] ], [ [[NEXT_COUNT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[THIS_ACC_I0:%.*]] = phi float [ [[ACC_I0]], %[[ENTRY]] ], [ [[NEXT_ACC_I0:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[THIS_ACC_I1:%.*]] = phi float [ [[ACC_I1]], %[[ENTRY]] ], [ [[NEXT_ACC_I1:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[THIS_ACC_I2:%.*]] = phi float [ [[ACC_I2]], %[[ENTRY]] ], [ [[NEXT_ACC_I2:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[THIS_ACC_I3:%.*]] = phi float [ [[ACC_I3]], %[[ENTRY]] ], [ [[NEXT_ACC_I3:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[THIS_COUNT:%.*]] = phi i32 [ [[COUNT]], %[[ENTRY]] ], [ [[NEXT_COUNT:%.*]], %[[LOOP]] ]
 ; CHECK-NEXT:    [[THIS_ACC_UPTO0:%.*]] = insertelement <4 x float> poison, float [[THIS_ACC_I0]], i64 0
 ; CHECK-NEXT:    [[THIS_ACC_UPTO1:%.*]] = insertelement <4 x float> [[THIS_ACC_UPTO0]], float [[THIS_ACC_I1]], i64 1
 ; CHECK-NEXT:    [[THIS_ACC_UPTO2:%.*]] = insertelement <4 x float> [[THIS_ACC_UPTO1]], float [[THIS_ACC_I2]], i64 2
@@ -619,8 +632,8 @@ define <4 x float> @f14(<4 x float> %acc, i32 %count) {
 ; CHECK-NEXT:    [[NEXT_ACC:%.*]] = insertelement <4 x float> [[NEXT_ACC_UPTO2]], float [[NEXT_ACC_I3]], i64 3
 ; CHECK-NEXT:    [[NEXT_COUNT]] = sub i32 [[THIS_COUNT]], 1
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[NEXT_COUNT]], 0
-; CHECK-NEXT:    br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]]
-; CHECK:       exit:
+; CHECK-NEXT:    br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]]
+; CHECK:       [[EXIT]]:
 ; CHECK-NEXT:    ret <4 x float> [[NEXT_ACC]]
 ;
 entry:
@@ -641,13 +654,14 @@ exit:
 
 ; Test unary operator scalarization.
 define void @f15(<4 x float> %init, ptr %base, i32 %count) {
-; CHECK-LABEL: @f15(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br label [[LOOP:%.*]]
-; CHECK:       loop:
-; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[COUNT:%.*]], [[ENTRY:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
+; CHECK-LABEL: define void @f15(
+; CHECK-SAME: <4 x float> [[INIT:%.*]], ptr [[BASE:%.*]], i32 [[COUNT:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[COUNT]], %[[ENTRY]] ], [ [[NEXTI:%.*]], %[[LOOP]] ]
 ; CHECK-NEXT:    [[NEXTI]] = sub i32 [[I]], 1
-; CHECK-NEXT:    [[PTR:%.*]] = getelementptr <4 x float>, ptr [[BASE:%.*]], i32 [[I]]
+; CHECK-NEXT:    [[PTR:%.*]] = getelementptr <4 x float>, ptr [[BASE]], i32 [[I]]
 ; CHECK-NEXT:    [[VAL_I0:%.*]] = load float, ptr [[PTR]], align 16
 ; CHECK-NEXT:    [[PTR_I1:%.*]] = getelementptr float, ptr [[PTR]], i32 1
 ; CHECK-NEXT:    [[VAL_I1:%.*]] = load float, ptr [[PTR_I1]], align 4
@@ -681,8 +695,8 @@ define void @f15(<4 x float> %init, ptr %base, i32 %count) {
 ; CHECK-NEXT:    store float [[SEL_I2]], ptr [[PTR_I2]], align 8
 ; CHECK-NEXT:    store float [[SEL_I3]], ptr [[PTR_I3]], align 4
 ; CHECK-NEXT:    [[TEST:%.*]] = icmp eq i32 [[NEXTI]], 0
-; CHECK-NEXT:    br i1 [[TEST]], label [[LOOP]], label [[EXIT:%.*]]
-; CHECK:       exit:
+; CHECK-NEXT:    br i1 [[TEST]], label %[[LOOP]], label %[[EXIT:.*]]
+; CHECK:       [[EXIT]]:
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -712,9 +726,10 @@ exit:
 
 ; Check that IR flags are preserved.
 define <2 x i32> @f16(<2 x i32> %i, <2 x i32> %j) {
-; CHECK-LABEL: @f16(
-; CHECK-NEXT:    [[I_I0:%.*]] = extractelement <2 x i32> [[I:%.*]], i64 0
-; CHECK-NEXT:    [[J_I0:%.*]] = extractelement <2 x i32> [[J:%.*]], i64 0
+; CHECK-LABEL: define <2 x i32> @f16(
+; CHECK-SAME: <2 x i32> [[I:%.*]], <2 x i32> [[J:%.*]]) {
+; CHECK-NEXT:    [[I_I0:%.*]] = extractelement <2 x i32> [[I]], i64 0
+; CHECK-NEXT:    [[J_I0:%.*]] = extractelement <2 x i32> [[J]], i64 0
 ; CHECK-NEXT:    [[RES_I0:%.*]] = add nuw nsw i32 [[I_I0]], [[J_I0]]
 ; CHECK-NEXT:    [[I_I1:%.*]] = extractelement <2 x i32> [[I]], i64 1
 ; CHECK-NEXT:    [[J_I1:%.*]] = extractelement <2 x i32> [[J]], i64 1
@@ -727,9 +742,10 @@ define <2 x i32> @f16(<2 x i32> %i, <2 x i32> %j) {
   ret <2 x i32> %res
 }
 define <2 x i32> @f17(<2 x i32> %i, <2 x i32> %j) {
-; CHECK-LABEL: @f17(
-; CHECK-NEXT:    [[I_I0:%.*]] = extractelement <2 x i32> [[I:%.*]], i64 0
-; CHECK-NEXT:    [[J_I0:%.*]] = extractelement <2 x i32> [[J:%.*]], i64 0
+; CHECK-LABEL: define <2 x i32> @f17(
+; CHECK-SAME: <2 x i32> [[I:%.*]], <2 x i32> [[J:%.*]]) {
+; CHECK-NEXT:    [[I_I0:%.*]] = extractelement <2 x i32> [[I]], i64 0
+; CHECK-NEXT:    [[J_I0:%.*]] = extractelement <2 x i32> [[J]], i64 0
 ; CHECK-NEXT:    [[RES_I0:%.*]] = sdiv exact i32 [[I_I0]], [[J_I0]]
 ; CHECK-NEXT:    [[I_I1:%.*]] = extractelement <2 x i32> [[I]], i64 1
 ; CHECK-NEXT:    [[J_I1:%.*]] = extractelement <2 x i32> [[J]], i64 1
@@ -742,9 +758,10 @@ define <2 x i32> @f17(<2 x i32> %i, <2 x i32> %j) {
   ret <2 x i32> %res
 }
 define <2 x float> @f18(<2 x float> %x, <2 x float> %y) {
-; CHECK-LABEL: @f18(
-; CHECK-NEXT:    [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i64 0
-; CHECK-NEXT:    [[Y_I0:%.*]] = extractelement <2 x float> [[Y:%.*]], i64 0
+; CHECK-LABEL: define <2 x float> @f18(
+; CHECK-SAME: <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]]) {
+; CHECK-NEXT:    [[X_I0:%.*]] = extractelement <2 x float> [[X]], i64 0
+; CHECK-NEXT:    [[Y_I0:%.*]] = extractelement <2 x float> [[Y]], i64 0
 ; CHECK-NEXT:    [[RES_I0:%.*]] = fadd fast float [[X_I0]], [[Y_I0]]
 ; CHECK-NEXT:    [[X_I1:%.*]] = extractelement <2 x float> [[X]], i64 1
 ; CHECK-NEXT:    [[Y_I1:%.*]] = extractelement <2 x float> [[Y]], i64 1
@@ -757,8 +774,9 @@ define <2 x float> @f18(<2 x float> %x, <2 x float> %y) {
   ret <2 x float> %res
 }
 define <2 x float> @f19(<2 x float> %x) {
-; CHECK-LABEL: @f19(
-; CHECK-NEXT:    [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i64 0
+; CHECK-LABEL: define <2 x float> @f19(
+; CHECK-SAME: <2 x float> [[X:%.*]]) {
+; CHECK-NEXT:    [[X_I0:%.*]] = extractelement <2 x float> [[X]], i64 0
 ; CHECK-NEXT:    [[RES_I0:%.*]] = fneg fast float [[X_I0]]
 ; CHECK-NEXT:    [[X_I1:%.*]] = extractelement <2 x float> [[X]], i64 1
 ; CHECK-NEXT:    [[RES_I1:%.*]] = fneg fast float [[X_I1]]
@@ -770,9 +788,10 @@ define <2 x float> @f19(<2 x float> %x) {
   ret <2 x float> %res
 }
 define <2 x i1> @f20(<2 x float> %x, <2 x float> %y) {
-; CHECK-LABEL: @f20(
-; CHECK-NEXT:    [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i64 0
-; CHECK-NEXT:    [[Y_I0:%.*]] = extractelement <2 x float> [[Y:%.*]], i64 0
+; CHECK-LABEL: define <2 x i1> @f20(
+; CHECK-SAME: <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]]) {
+; CHECK-NEXT:    [[X_I0:%.*]] = extractelement <2 x float> [[X]], i64 0
+; CHECK-NEXT:    [[Y_I0:%.*]] = extractelement <2 x float> [[Y]], i64 0
 ; CHECK-NEXT:    [[RES_I0:%.*]] = fcmp fast ogt float [[X_I0]], [[Y_I0]]
 ; CHECK-NEXT:    [[X_I1:%.*]] = extractelement <2 x float> [[X]], i64 1
 ; CHECK-NEXT:    [[Y_I1:%.*]] = extractelement <2 x float> [[Y]], i64 1
@@ -786,8 +805,9 @@ define <2 x i1> @f20(<2 x float> %x, <2 x float> %y) {
 }
 declare <2 x float> @llvm.sqrt.v2f32(<2 x float>)
 define <2 x float> @f21(<2 x float> %x) {
-; CHECK-LABEL: @f21(
-; CHECK-NEXT:    [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i64 0
+; CHECK-LABEL: define <2 x float> @f21(
+; CHECK-SAME: <2 x float> [[X:%.*]]) {
+; CHECK-NEXT:    [[X_I0:%.*]] = extractelement <2 x float> [[X]], i64 0
 ; CHECK-NEXT:    [[RES_I0:%.*]] = call fast float @llvm.sqrt.f32(float [[X_I0]])
 ; CHECK-NEXT:    [[X_I1:%.*]] = extractelement <2 x float> [[X]], i64 1
 ; CHECK-NEXT:    [[RES_I1:%.*]] = call fast float @llvm.sqrt.f32(float [[X_I1]])
@@ -800,10 +820,11 @@ define <2 x float> @f21(<2 x float> %x) {
 }
 declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>)
 define <2 x float> @f22(<2 x float> %x, <2 x float> %y, <2 x float> %z) {
-; CHECK-LABEL: @f22(
-; CHECK-NEXT:    [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i64 0
-; CHECK-NEXT:    [[Y_I0:%.*]] = extractelement <2 x float> [[Y:%.*]], i64 0
-; CHECK-NEXT:    [[Z_I0:%.*]] = extractelement <2 x float> [[Z:%.*]], i64 0
+; CHECK-LABEL: define <2 x float> @f22(
+; CHECK-SAME: <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) {
+; CHECK-NEXT:    [[X_I0:%.*]] = extractelement <2 x float> [[X]], i64 0
+; CHECK-NEXT:    [[Y_I0:%.*]] = extractelement <2 x float> [[Y]], i64 0
+; CHECK-NEXT:    [[Z_I0:%.*]] = extractelement <2 x float> [[Z]], i64 0
 ; CHECK-NEXT:    [[RES_I0:%.*]] = call fast float @llvm.fma.f32(float [[X_I0]], float [[Y_I0]], float [[Z_I0]])
 ; CHECK-NEXT:    [[X_I1:%.*]] = extractelement <2 x float> [[X]], i64 1
 ; CHECK-NEXT:    [[Y_I1:%.*]] = extractelement <2 x float> [[Y]], i64 1
@@ -819,10 +840,11 @@ define <2 x float> @f22(<2 x float> %x, <2 x float> %y, <2 x float> %z) {
 
 ; See https://reviews.llvm.org/D83101#2133062
 define <2 x i32> @f23_crash(<2 x i32> %srcvec, i32 %v1) {
-; CHECK-LABEL: @f23_crash(
-; CHECK-NEXT:    [[SRCVEC_I0:%.*]] = extractelement <2 x i32> [[SRCVEC:%.*]], i64 0
+; CHECK-LABEL: define <2 x i32> @f23_crash(
+; CHECK-SAME: <2 x i32> [[SRCVEC:%.*]], i32 [[V1:%.*]]) {
+; CHECK-NEXT:    [[SRCVEC_I0:%.*]] = extractelement <2 x i32> [[SRCVEC]], i64 0
 ; CHECK-NEXT:    [[T1_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[SRCVEC_I0]], i64 0
-; CHECK-NEXT:    [[T1:%.*]] = insertelement <2 x i32> [[T1_UPTO0]], i32 [[V1:%.*]], i64 1
+; CHECK-NEXT:    [[T1:%.*]] = insertelement <2 x i32> [[T1_UPTO0]], i32 [[V1]], i64 1
 ; CHECK-NEXT:    ret <2 x i32> [[T1]]
 ;
   %v0 = extractelement <2 x i32> %srcvec, i32 0
@@ -838,3 +860,15 @@ define <2 x i32> @f23_crash(<2 x i32> %srcvec, i32 %v1) {
 !4 = !{ float 4.0 }
 !5 = !{ i64 0, i64 8, null }
 !13 = distinct !{}
+;.
+; CHECK: [[SET1_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
+; CHECK: [[META1]] = !{!"set1", [[META2:![0-9]+]]}
+; CHECK: [[META2]] = !{!"root"}
+; CHECK: [[SET2_TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0}
+; CHECK: [[META4]] = !{!"set2", [[META2]]}
+; CHECK: [[TBAA_STRUCT5]] = !{i64 0, i64 8, null}
+; CHECK: [[ACC_GRP6]] = distinct !{}
+; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META8:![0-9]+]]}
+; CHECK: [[META8]] = !{!"llvm.loop.parallel_accesses", [[ACC_GRP6]]}
+; CHECK: [[META9]] = !{float 4.000000e+00}
+;.
diff --git a/llvm/test/Transforms/Scalarizer/basic.ll b/llvm/test/Transforms/Scalarizer/basic.ll
index 190e8a0..82337c9 100644
--- a/llvm/test/Transforms/Scalarizer/basic.ll
+++ b/llvm/test/Transforms/Scalarizer/basic.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt %s -passes='function(scalarizer<load-store>,dce)' -S | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
@@ -6,21 +6,22 @@ declare <4 x float> @ext(<4 x float>)
 @g = global <4 x float> zeroinitializer
 
 define void @f1(<4 x float> %init, ptr %base, i32 %count) {
-; CHECK-LABEL: @f1(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[INIT_I0:%.*]] = extractelement <4 x float> [[INIT:%.*]], i64 0
+; CHECK-LABEL: define void @f1(
+; CHECK-SAME: <4 x float> [[INIT:%.*]], ptr [[BASE:%.*]], i32 [[COUNT:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[INIT_I0:%.*]] = extractelement <4 x float> [[INIT]], i64 0
 ; CHECK-NEXT:    [[INIT_I1:%.*]] = extractelement <4 x float> [[INIT]], i64 1
 ; CHECK-NEXT:    [[INIT_I2:%.*]] = extractelement <4 x float> [[INIT]], i64 2
 ; CHECK-NEXT:    [[INIT_I3:%.*]] = extractelement <4 x float> [[INIT]], i64 3
-; CHECK-NEXT:    br label [[LOOP:%.*]]
-; CHECK:       loop:
-; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[COUNT:%.*]], [[ENTRY:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[ACC_I0:%.*]] = phi float [ [[INIT_I0]], [[ENTRY]] ], [ [[SEL_I0:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[ACC_I1:%.*]] = phi float [ [[INIT_I1]], [[ENTRY]] ], [ [[SEL_I1:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[ACC_I2:%.*]] = phi float [ [[INIT_I2]], [[ENTRY]] ], [ [[SEL_I2:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[ACC_I3:%.*]] = phi float [ [[INIT_I3]], [[ENTRY]] ], [ [[SEL_I3:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[COUNT]], %[[ENTRY]] ], [ [[NEXTI:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[ACC_I0:%.*]] = phi float [ [[INIT_I0]], %[[ENTRY]] ], [ [[SEL_I0:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[ACC_I1:%.*]] = phi float [ [[INIT_I1]], %[[ENTRY]] ], [ [[SEL_I1:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[ACC_I2:%.*]] = phi float [ [[INIT_I2]], %[[ENTRY]] ], [ [[SEL_I2:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[ACC_I3:%.*]] = phi float [ [[INIT_I3]], %[[ENTRY]] ], [ [[SEL_I3:%.*]], %[[LOOP]] ]
 ; CHECK-NEXT:    [[NEXTI]] = sub i32 [[I]], 1
-; CHECK-NEXT:    [[PTR:%.*]] = getelementptr <4 x float>, ptr [[BASE:%.*]], i32 [[I]]
+; CHECK-NEXT:    [[PTR:%.*]] = getelementptr <4 x float>, ptr [[BASE]], i32 [[I]]
 ; CHECK-NEXT:    [[VAL_I0:%.*]] = load float, ptr [[PTR]], align 16
 ; CHECK-NEXT:    [[PTR_I1:%.*]] = getelementptr float, ptr [[PTR]], i32 1
 ; CHECK-NEXT:    [[VAL_I1:%.*]] = load float, ptr [[PTR_I1]], align 4
@@ -54,8 +55,8 @@ define void @f1(<4 x float> %init, ptr %base, i32 %count) {
 ; CHECK-NEXT:    store float [[SEL_I2]], ptr [[PTR_I2]], align 8
 ; CHECK-NEXT:    store float [[SEL_I3]], ptr [[PTR_I3]], align 4
 ; CHECK-NEXT:    [[TEST:%.*]] = icmp eq i32 [[NEXTI]], 0
-; CHECK-NEXT:    br i1 [[TEST]], label [[LOOP]], label [[EXIT:%.*]]
-; CHECK:       exit:
+; CHECK-NEXT:    br i1 [[TEST]], label %[[LOOP]], label %[[EXIT:.*]]
+; CHECK:       [[EXIT]]:
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -92,21 +93,22 @@ exit:
 }
 
 define void @f2(<4 x i32> %init, ptr %base, i32 %count) {
-; CHECK-LABEL: @f2(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[INIT_I0:%.*]] = extractelement <4 x i32> [[INIT:%.*]], i64 0
+; CHECK-LABEL: define void @f2(
+; CHECK-SAME: <4 x i32> [[INIT:%.*]], ptr [[BASE:%.*]], i32 [[COUNT:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[INIT_I0:%.*]] = extractelement <4 x i32> [[INIT]], i64 0
 ; CHECK-NEXT:    [[INIT_I1:%.*]] = extractelement <4 x i32> [[INIT]], i64 1
 ; CHECK-NEXT:    [[INIT_I2:%.*]] = extractelement <4 x i32> [[INIT]], i64 2
 ; CHECK-NEXT:    [[INIT_I3:%.*]] = extractelement <4 x i32> [[INIT]], i64 3
-; CHECK-NEXT:    br label [[LOOP:%.*]]
-; CHECK:       loop:
-; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[COUNT:%.*]], [[ENTRY:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[ACC_I0:%.*]] = phi i32 [ [[INIT_I0]], [[ENTRY]] ], [ [[SEL_I0:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[ACC_I1:%.*]] = phi i32 [ [[INIT_I1]], [[ENTRY]] ], [ [[SEL_I1:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[ACC_I2:%.*]] = phi i32 [ [[INIT_I2]], [[ENTRY]] ], [ [[SEL_I2:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[ACC_I3:%.*]] = phi i32 [ [[INIT_I3]], [[ENTRY]] ], [ [[SEL_I3:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[COUNT]], %[[ENTRY]] ], [ [[NEXTI:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[ACC_I0:%.*]] = phi i32 [ [[INIT_I0]], %[[ENTRY]] ], [ [[SEL_I0:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[ACC_I1:%.*]] = phi i32 [ [[INIT_I1]], %[[ENTRY]] ], [ [[SEL_I1:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[ACC_I2:%.*]] = phi i32 [ [[INIT_I2]], %[[ENTRY]] ], [ [[SEL_I2:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[ACC_I3:%.*]] = phi i32 [ [[INIT_I3]], %[[ENTRY]] ], [ [[SEL_I3:%.*]], %[[LOOP]] ]
 ; CHECK-NEXT:    [[NEXTI]] = sub i32 [[I]], 1
-; CHECK-NEXT:    [[PTR:%.*]] = getelementptr <4 x i8>, ptr [[BASE:%.*]], i32 [[I]]
+; CHECK-NEXT:    [[PTR:%.*]] = getelementptr <4 x i8>, ptr [[BASE]], i32 [[I]]
 ; CHECK-NEXT:    [[VAL_I0:%.*]] = load i8, ptr [[PTR]], align 4
 ; CHECK-NEXT:    [[PTR_I1:%.*]] = getelementptr i8, ptr [[PTR]], i32 1
 ; CHECK-NEXT:    [[VAL_I1:%.*]] = load i8, ptr [[PTR_I1]], align 1
@@ -139,8 +141,8 @@ define void @f2(<4 x i32> %init, ptr %base, i32 %count) {
 ; CHECK-NEXT:    store i8 [[TRUNC_I2]], ptr [[PTR_I2]], align 2
 ; CHECK-NEXT:    store i8 [[TRUNC_I3]], ptr [[PTR_I3]], align 1
 ; CHECK-NEXT:    [[TEST:%.*]] = icmp eq i32 [[NEXTI]], 0
-; CHECK-NEXT:    br i1 [[TEST]], label [[LOOP]], label [[EXIT:%.*]]
-; CHECK:       exit:
+; CHECK-NEXT:    br i1 [[TEST]], label %[[LOOP]], label %[[EXIT:.*]]
+; CHECK:       [[EXIT]]:
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -172,25 +174,26 @@ exit:
 
 ; Check that !tbaa information is preserved.
 define void @f3(ptr %src, ptr %dst) {
-; CHECK-LABEL: @f3(
-; CHECK-NEXT:    [[DST_I1:%.*]] = getelementptr i32, ptr [[DST:%.*]], i32 1
+; CHECK-LABEL: define void @f3(
+; CHECK-SAME: ptr [[SRC:%.*]], ptr [[DST:%.*]]) {
+; CHECK-NEXT:    [[DST_I1:%.*]] = getelementptr i32, ptr [[DST]], i32 1
 ; CHECK-NEXT:    [[DST_I2:%.*]] = getelementptr i32, ptr [[DST]], i32 2
 ; CHECK-NEXT:    [[DST_I3:%.*]] = getelementptr i32, ptr [[DST]], i32 3
-; CHECK-NEXT:    [[VAL_I0:%.*]] = load i32, ptr [[SRC:%.*]], align 16, !tbaa [[TBAA0:![0-9]+]]
+; CHECK-NEXT:    [[VAL_I0:%.*]] = load i32, ptr [[SRC]], align 16, !tbaa [[SET1_TBAA0:![0-9]+]]
 ; CHECK-NEXT:    [[SRC_I1:%.*]] = getelementptr i32, ptr [[SRC]], i32 1
-; CHECK-NEXT:    [[VAL_I1:%.*]] = load i32, ptr [[SRC_I1]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    [[VAL_I1:%.*]] = load i32, ptr [[SRC_I1]], align 4, !tbaa [[SET1_TBAA0]]
 ; CHECK-NEXT:    [[SRC_I2:%.*]] = getelementptr i32, ptr [[SRC]], i32 2
-; CHECK-NEXT:    [[VAL_I2:%.*]] = load i32, ptr [[SRC_I2]], align 8, !tbaa [[TBAA0]]
+; CHECK-NEXT:    [[VAL_I2:%.*]] = load i32, ptr [[SRC_I2]], align 8, !tbaa [[SET1_TBAA0]]
 ; CHECK-NEXT:    [[SRC_I3:%.*]] = getelementptr i32, ptr [[SRC]], i32 3
-; CHECK-NEXT:    [[VAL_I3:%.*]] = load i32, ptr [[SRC_I3]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    [[VAL_I3:%.*]] = load i32, ptr [[SRC_I3]], align 4, !tbaa [[SET1_TBAA0]]
 ; CHECK-NEXT:    [[ADD_I0:%.*]] = add i32 [[VAL_I0]], [[VAL_I0]]
 ; CHECK-NEXT:    [[ADD_I1:%.*]] = add i32 [[VAL_I1]], [[VAL_I1]]
 ; CHECK-NEXT:    [[ADD_I2:%.*]] = add i32 [[VAL_I2]], [[VAL_I2]]
 ; CHECK-NEXT:    [[ADD_I3:%.*]] = add i32 [[VAL_I3]], [[VAL_I3]]
-; CHECK-NEXT:    store i32 [[ADD_I0]], ptr [[DST]], align 16, !tbaa [[TBAA3:![0-9]+]]
-; CHECK-NEXT:    store i32 [[ADD_I1]], ptr [[DST_I1]], align 4, !tbaa [[TBAA3]]
-; CHECK-NEXT:    store i32 [[ADD_I2]], ptr [[DST_I2]], align 8, !tbaa [[TBAA3]]
-; CHECK-NEXT:    store i32 [[ADD_I3]], ptr [[DST_I3]], align 4, !tbaa [[TBAA3]]
+; CHECK-NEXT:    store i32 [[ADD_I0]], ptr [[DST]], align 16, !tbaa [[SET2_TBAA3:![0-9]+]]
+; CHECK-NEXT:    store i32 [[ADD_I1]], ptr [[DST_I1]], align 4, !tbaa [[SET2_TBAA3]]
+; CHECK-NEXT:    store i32 [[ADD_I2]], ptr [[DST_I2]], align 8, !tbaa [[SET2_TBAA3]]
+; CHECK-NEXT:    store i32 [[ADD_I3]], ptr [[DST_I3]], align 4, !tbaa [[SET2_TBAA3]]
 ; CHECK-NEXT:    ret void
 ;
   %val = load <4 x i32> , ptr %src, !tbaa !1
@@ -201,11 +204,12 @@ define void @f3(ptr %src, ptr %dst) {
 
 ; Check that !tbaa.struct information is preserved.
 define void @f4(ptr %src, ptr %dst) {
-; CHECK-LABEL: @f4(
-; CHECK-NEXT:    [[DST_I1:%.*]] = getelementptr i32, ptr [[DST:%.*]], i32 1
+; CHECK-LABEL: define void @f4(
+; CHECK-SAME: ptr [[SRC:%.*]], ptr [[DST:%.*]]) {
+; CHECK-NEXT:    [[DST_I1:%.*]] = getelementptr i32, ptr [[DST]], i32 1
 ; CHECK-NEXT:    [[DST_I2:%.*]] = getelementptr i32, ptr [[DST]], i32 2
 ; CHECK-NEXT:    [[DST_I3:%.*]] = getelementptr i32, ptr [[DST]], i32 3
-; CHECK-NEXT:    [[VAL_I0:%.*]] = load i32, ptr [[SRC:%.*]], align 16, !tbaa.struct [[TBAA_STRUCT5:![0-9]+]]
+; CHECK-NEXT:    [[VAL_I0:%.*]] = load i32, ptr [[SRC]], align 16, !tbaa.struct [[TBAA_STRUCT5:![0-9]+]]
 ; CHECK-NEXT:    [[SRC_I1:%.*]] = getelementptr i32, ptr [[SRC]], i32 1
 ; CHECK-NEXT:    [[VAL_I1:%.*]] = load i32, ptr [[SRC_I1]], align 4, !tbaa.struct [[TBAA_STRUCT5]]
 ; CHECK-NEXT:    [[SRC_I2:%.*]] = getelementptr i32, ptr [[SRC]], i32 2
@@ -230,16 +234,17 @@ define void @f4(ptr %src, ptr %dst) {
 
 ; Check that llvm.access.group information is preserved.
 define void @f5(i32 %count, ptr %src, ptr %dst) {
-; CHECK-LABEL: @f5(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br label [[LOOP:%.*]]
-; CHECK:       loop:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[NEXT_INDEX:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[THIS_SRC:%.*]] = getelementptr <4 x i32>, ptr [[SRC:%.*]], i32 [[INDEX]]
+; CHECK-LABEL: define void @f5(
+; CHECK-SAME: i32 [[COUNT:%.*]], ptr [[SRC:%.*]], ptr [[DST:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[NEXT_INDEX:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[THIS_SRC:%.*]] = getelementptr <4 x i32>, ptr [[SRC]], i32 [[INDEX]]
 ; CHECK-NEXT:    [[THIS_SRC_I1:%.*]] = getelementptr i32, ptr [[THIS_SRC]], i32 1
 ; CHECK-NEXT:    [[THIS_SRC_I2:%.*]] = getelementptr i32, ptr [[THIS_SRC]], i32 2
 ; CHECK-NEXT:    [[THIS_SRC_I3:%.*]] = getelementptr i32, ptr [[THIS_SRC]], i32 3
-; CHECK-NEXT:    [[THIS_DST:%.*]] = getelementptr <4 x i32>, ptr [[DST:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[THIS_DST:%.*]] = getelementptr <4 x i32>, ptr [[DST]], i32 [[INDEX]]
 ; CHECK-NEXT:    [[THIS_DST_I1:%.*]] = getelementptr i32, ptr [[THIS_DST]], i32 1
 ; CHECK-NEXT:    [[THIS_DST_I2:%.*]] = getelementptr i32, ptr [[THIS_DST]], i32 2
 ; CHECK-NEXT:    [[THIS_DST_I3:%.*]] = getelementptr i32, ptr [[THIS_DST]], i32 3
@@ -256,9 +261,9 @@ define void @f5(i32 %count, ptr %src, ptr %dst) {
 ; CHECK-NEXT:    store i32 [[ADD_I2]], ptr [[THIS_DST_I2]], align 8, !llvm.access.group [[ACC_GRP6]]
 ; CHECK-NEXT:    store i32 [[ADD_I3]], ptr [[THIS_DST_I3]], align 4, !llvm.access.group [[ACC_GRP6]]
 ; CHECK-NEXT:    [[NEXT_INDEX]] = add i32 [[INDEX]], -1
-; CHECK-NEXT:    [[CONTINUE:%.*]] = icmp ne i32 [[NEXT_INDEX]], [[COUNT:%.*]]
-; CHECK-NEXT:    br i1 [[CONTINUE]], label [[LOOP]], label [[END:%.*]], !llvm.loop [[LOOP7:![0-9]+]]
-; CHECK:       end:
+; CHECK-NEXT:    [[CONTINUE:%.*]] = icmp ne i32 [[NEXT_INDEX]], [[COUNT]]
+; CHECK-NEXT:    br i1 [[CONTINUE]], label %[[LOOP]], label %[[END:.*]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK:       [[END]]:
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -281,8 +286,9 @@ end:
 
 ; Check that fpmath information is preserved.
 define <4 x float> @f6(<4 x float> %x) {
-; CHECK-LABEL: @f6(
-; CHECK-NEXT:    [[X_I0:%.*]] = extractelement <4 x float> [[X:%.*]], i64 0
+; CHECK-LABEL: define <4 x float> @f6(
+; CHECK-SAME: <4 x float> [[X:%.*]]) {
+; CHECK-NEXT:    [[X_I0:%.*]] = extractelement <4 x float> [[X]], i64 0
 ; CHECK-NEXT:    [[RES_I0:%.*]] = fadd float [[X_I0]], 1.000000e+00, !fpmath [[META9:![0-9]+]]
 ; CHECK-NEXT:    [[X_I1:%.*]] = extractelement <4 x float> [[X]], i64 1
 ; CHECK-NEXT:    [[RES_I1:%.*]] = fadd float [[X_I1]], 2.000000e+00, !fpmath [[META9]]
@@ -303,11 +309,12 @@ define <4 x float> @f6(<4 x float> %x) {
 
 ; Check that random metadata isn't kept.
 define void @f7(ptr %src, ptr %dst) {
-; CHECK-LABEL: @f7(
-; CHECK-NEXT:    [[DST_I1:%.*]] = getelementptr i32, ptr [[DST:%.*]], i32 1
+; CHECK-LABEL: define void @f7(
+; CHECK-SAME: ptr [[SRC:%.*]], ptr [[DST:%.*]]) {
+; CHECK-NEXT:    [[DST_I1:%.*]] = getelementptr i32, ptr [[DST]], i32 1
 ; CHECK-NEXT:    [[DST_I2:%.*]] = getelementptr i32, ptr [[DST]], i32 2
 ; CHECK-NEXT:    [[DST_I3:%.*]] = getelementptr i32, ptr [[DST]], i32 3
-; CHECK-NEXT:    [[VAL_I0:%.*]] = load i32, ptr [[SRC:%.*]], align 16
+; CHECK-NEXT:    [[VAL_I0:%.*]] = load i32, ptr [[SRC]], align 16
 ; CHECK-NEXT:    [[SRC_I1:%.*]] = getelementptr i32, ptr [[SRC]], i32 1
 ; CHECK-NEXT:    [[VAL_I1:%.*]] = load i32, ptr [[SRC_I1]], align 4
 ; CHECK-NEXT:    [[SRC_I2:%.*]] = getelementptr i32, ptr [[SRC]], i32 2
@@ -332,17 +339,18 @@ define void @f7(ptr %src, ptr %dst) {
 
 ; Test GEP with vectors.
 define void @f8(ptr %dest, <4 x ptr> %ptr0, <4 x i32> %i0,
-; CHECK-LABEL: @f8(
-; CHECK-NEXT:    [[DEST_I1:%.*]] = getelementptr ptr, ptr [[DEST:%.*]], i32 1
+; CHECK-LABEL: define void @f8(
+; CHECK-SAME: ptr [[DEST:%.*]], <4 x ptr> [[PTR0:%.*]], <4 x i32> [[I0:%.*]], ptr [[OTHER:%.*]]) {
+; CHECK-NEXT:    [[DEST_I1:%.*]] = getelementptr ptr, ptr [[DEST]], i32 1
 ; CHECK-NEXT:    [[DEST_I2:%.*]] = getelementptr ptr, ptr [[DEST]], i32 2
 ; CHECK-NEXT:    [[DEST_I3:%.*]] = getelementptr ptr, ptr [[DEST]], i32 3
-; CHECK-NEXT:    [[PTR0_I0:%.*]] = extractelement <4 x ptr> [[PTR0:%.*]], i64 0
+; CHECK-NEXT:    [[PTR0_I0:%.*]] = extractelement <4 x ptr> [[PTR0]], i64 0
 ; CHECK-NEXT:    [[PTR0_I2:%.*]] = extractelement <4 x ptr> [[PTR0]], i64 2
 ; CHECK-NEXT:    [[PTR0_I3:%.*]] = extractelement <4 x ptr> [[PTR0]], i64 3
-; CHECK-NEXT:    [[I0_I1:%.*]] = extractelement <4 x i32> [[I0:%.*]], i64 1
+; CHECK-NEXT:    [[I0_I1:%.*]] = extractelement <4 x i32> [[I0]], i64 1
 ; CHECK-NEXT:    [[I0_I3:%.*]] = extractelement <4 x i32> [[I0]], i64 3
 ; CHECK-NEXT:    [[VAL_I0:%.*]] = getelementptr float, ptr [[PTR0_I0]], i32 100
-; CHECK-NEXT:    [[VAL_I1:%.*]] = getelementptr float, ptr [[OTHER:%.*]], i32 [[I0_I1]]
+; CHECK-NEXT:    [[VAL_I1:%.*]] = getelementptr float, ptr [[OTHER]], i32 [[I0_I1]]
 ; CHECK-NEXT:    [[VAL_I2:%.*]] = getelementptr float, ptr [[PTR0_I2]], i32 100
 ; CHECK-NEXT:    [[VAL_I3:%.*]] = getelementptr float, ptr [[PTR0_I3]], i32 [[I0_I3]]
 ; CHECK-NEXT:    store ptr [[VAL_I0]], ptr [[DEST]], align 32
@@ -362,11 +370,12 @@ define void @f8(ptr %dest, <4 x ptr> %ptr0, <4 x i32> %i0,
 
 ; Test the handling of unaligned loads.
 define void @f9(ptr %dest, ptr %src) {
-; CHECK-LABEL: @f9(
-; CHECK-NEXT:    [[DEST_I1:%.*]] = getelementptr float, ptr [[DEST:%.*]], i32 1
+; CHECK-LABEL: define void @f9(
+; CHECK-SAME: ptr [[DEST:%.*]], ptr [[SRC:%.*]]) {
+; CHECK-NEXT:    [[DEST_I1:%.*]] = getelementptr float, ptr [[DEST]], i32 1
 ; CHECK-NEXT:    [[DEST_I2:%.*]] = getelementptr float, ptr [[DEST]], i32 2
 ; CHECK-NEXT:    [[DEST_I3:%.*]] = getelementptr float, ptr [[DEST]], i32 3
-; CHECK-NEXT:    [[VAL_I0:%.*]] = load float, ptr [[SRC:%.*]], align 4
+; CHECK-NEXT:    [[VAL_I0:%.*]] = load float, ptr [[SRC]], align 4
 ; CHECK-NEXT:    [[SRC_I1:%.*]] = getelementptr float, ptr [[SRC]], i32 1
 ; CHECK-NEXT:    [[VAL_I1:%.*]] = load float, ptr [[SRC_I1]], align 4
 ; CHECK-NEXT:    [[SRC_I2:%.*]] = getelementptr float, ptr [[SRC]], i32 2
@@ -386,11 +395,12 @@ define void @f9(ptr %dest, ptr %src) {
 
 ; ...and again with subelement alignment.
 define void @f10(ptr %dest, ptr %src) {
-; CHECK-LABEL: @f10(
-; CHECK-NEXT:    [[DEST_I1:%.*]] = getelementptr float, ptr [[DEST:%.*]], i32 1
+; CHECK-LABEL: define void @f10(
+; CHECK-SAME: ptr [[DEST:%.*]], ptr [[SRC:%.*]]) {
+; CHECK-NEXT:    [[DEST_I1:%.*]] = getelementptr float, ptr [[DEST]], i32 1
 ; CHECK-NEXT:    [[DEST_I2:%.*]] = getelementptr float, ptr [[DEST]], i32 2
 ; CHECK-NEXT:    [[DEST_I3:%.*]] = getelementptr float, ptr [[DEST]], i32 3
-; CHECK-NEXT:    [[VAL_I0:%.*]] = load float, ptr [[SRC:%.*]], align 1
+; CHECK-NEXT:    [[VAL_I0:%.*]] = load float, ptr [[SRC]], align 1
 ; CHECK-NEXT:    [[SRC_I1:%.*]] = getelementptr float, ptr [[SRC]], i32 1
 ; CHECK-NEXT:    [[VAL_I1:%.*]] = load float, ptr [[SRC_I1]], align 1
 ; CHECK-NEXT:    [[SRC_I2:%.*]] = getelementptr float, ptr [[SRC]], i32 2
@@ -410,8 +420,9 @@ define void @f10(ptr %dest, ptr %src) {
 
 ; Test that sub-byte loads aren't scalarized.
 define void @f11(ptr %dest, ptr %src0) {
-; CHECK-LABEL: @f11(
-; CHECK-NEXT:    [[SRC1:%.*]] = getelementptr <32 x i1>, ptr [[SRC0:%.*]], i32 1
+; CHECK-LABEL: define void @f11(
+; CHECK-SAME: ptr [[DEST:%.*]], ptr [[SRC0:%.*]]) {
+; CHECK-NEXT:    [[SRC1:%.*]] = getelementptr <32 x i1>, ptr [[SRC0]], i32 1
 ; CHECK-NEXT:    [[VAL0:%.*]] = load <32 x i1>, ptr [[SRC0]], align 4
 ; CHECK-NEXT:    [[VAL0_I0:%.*]] = extractelement <32 x i1> [[VAL0]], i64 0
 ; CHECK-NEXT:    [[VAL0_I1:%.*]] = extractelement <32 x i1> [[VAL0]], i64 1
@@ -542,7 +553,7 @@ define void @f11(ptr %dest, ptr %src0) {
 ; CHECK-NEXT:    [[AND_UPTO29:%.*]] = insertelement <32 x i1> [[AND_UPTO28]], i1 [[AND_I29]], i64 29
 ; CHECK-NEXT:    [[AND_UPTO30:%.*]] = insertelement <32 x i1> [[AND_UPTO29]], i1 [[AND_I30]], i64 30
 ; CHECK-NEXT:    [[AND:%.*]] = insertelement <32 x i1> [[AND_UPTO30]], i1 [[AND_I31]], i64 31
-; CHECK-NEXT:    store <32 x i1> [[AND]], ptr [[DEST:%.*]], align 4
+; CHECK-NEXT:    store <32 x i1> [[AND]], ptr [[DEST]], align 4
 ; CHECK-NEXT:    ret void
 ;
   %src1 = getelementptr <32 x i1>, ptr %src0, i32 1
@@ -555,12 +566,13 @@ define void @f11(ptr %dest, ptr %src0) {
 
 ; Test vector GEPs with more than one index.
 define void @f13(ptr %dest, <4 x ptr> %ptr, <4 x i32> %i,
-; CHECK-LABEL: @f13(
-; CHECK-NEXT:    [[DEST_I1:%.*]] = getelementptr ptr, ptr [[DEST:%.*]], i32 1
+; CHECK-LABEL: define void @f13(
+; CHECK-SAME: ptr [[DEST:%.*]], <4 x ptr> [[PTR:%.*]], <4 x i32> [[I:%.*]], ptr [[OTHER:%.*]]) {
+; CHECK-NEXT:    [[DEST_I1:%.*]] = getelementptr ptr, ptr [[DEST]], i32 1
 ; CHECK-NEXT:    [[DEST_I2:%.*]] = getelementptr ptr, ptr [[DEST]], i32 2
 ; CHECK-NEXT:    [[DEST_I3:%.*]] = getelementptr ptr, ptr [[DEST]], i32 3
-; CHECK-NEXT:    [[PTR_I0:%.*]] = extractelement <4 x ptr> [[PTR:%.*]], i64 0
-; CHECK-NEXT:    [[I_I0:%.*]] = extractelement <4 x i32> [[I:%.*]], i64 0
+; CHECK-NEXT:    [[PTR_I0:%.*]] = extractelement <4 x ptr> [[PTR]], i64 0
+; CHECK-NEXT:    [[I_I0:%.*]] = extractelement <4 x i32> [[I]], i64 0
 ; CHECK-NEXT:    [[VAL_I0:%.*]] = getelementptr inbounds [4 x float], ptr [[PTR_I0]], i32 0, i32 [[I_I0]]
 ; CHECK-NEXT:    [[PTR_I1:%.*]] = extractelement <4 x ptr> [[PTR]], i64 1
 ; CHECK-NEXT:    [[I_I1:%.*]] = extractelement <4 x i32> [[I]], i64 1
@@ -587,19 +599,20 @@ define void @f13(ptr %dest, <4 x ptr> %ptr, <4 x i32> %i,
 
 ; Test combinations of vector and non-vector PHIs.
 define <4 x float> @f14(<4 x float> %acc, i32 %count) {
-; CHECK-LABEL: @f14(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[ACC_I0:%.*]] = extractelement <4 x float> [[ACC:%.*]], i64 0
+; CHECK-LABEL: define <4 x float> @f14(
+; CHECK-SAME: <4 x float> [[ACC:%.*]], i32 [[COUNT:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[ACC_I0:%.*]] = extractelement <4 x float> [[ACC]], i64 0
 ; CHECK-NEXT:    [[ACC_I1:%.*]] = extractelement <4 x float> [[ACC]], i64 1
 ; CHECK-NEXT:    [[ACC_I2:%.*]] = extractelement <4 x float> [[ACC]], i64 2
 ; CHECK-NEXT:    [[ACC_I3:%.*]] = extractelement <4 x float> [[ACC]], i64 3
-; CHECK-NEXT:    br label [[LOOP:%.*]]
-; CHECK:       loop:
-; CHECK-NEXT:    [[THIS_ACC_I0:%.*]] = phi float [ [[ACC_I0]], [[ENTRY:%.*]] ], [ [[NEXT_ACC_I0:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[THIS_ACC_I1:%.*]] = phi float [ [[ACC_I1]], [[ENTRY]] ], [ [[NEXT_ACC_I1:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[THIS_ACC_I2:%.*]] = phi float [ [[ACC_I2]], [[ENTRY]] ], [ [[NEXT_ACC_I2:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[THIS_ACC_I3:%.*]] = phi float [ [[ACC_I3]], [[ENTRY]] ], [ [[NEXT_ACC_I3:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[THIS_COUNT:%.*]] = phi i32 [ [[COUNT:%.*]], [[ENTRY]] ], [ [[NEXT_COUNT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[THIS_ACC_I0:%.*]] = phi float [ [[ACC_I0]], %[[ENTRY]] ], [ [[NEXT_ACC_I0:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[THIS_ACC_I1:%.*]] = phi float [ [[ACC_I1]], %[[ENTRY]] ], [ [[NEXT_ACC_I1:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[THIS_ACC_I2:%.*]] = phi float [ [[ACC_I2]], %[[ENTRY]] ], [ [[NEXT_ACC_I2:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[THIS_ACC_I3:%.*]] = phi float [ [[ACC_I3]], %[[ENTRY]] ], [ [[NEXT_ACC_I3:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[THIS_COUNT:%.*]] = phi i32 [ [[COUNT]], %[[ENTRY]] ], [ [[NEXT_COUNT:%.*]], %[[LOOP]] ]
 ; CHECK-NEXT:    [[THIS_ACC_UPTO0:%.*]] = insertelement <4 x float> poison, float [[THIS_ACC_I0]], i64 0
 ; CHECK-NEXT:    [[THIS_ACC_UPTO1:%.*]] = insertelement <4 x float> [[THIS_ACC_UPTO0]], float [[THIS_ACC_I1]], i64 1
 ; CHECK-NEXT:    [[THIS_ACC_UPTO2:%.*]] = insertelement <4 x float> [[THIS_ACC_UPTO1]], float [[THIS_ACC_I2]], i64 2
@@ -619,8 +632,8 @@ define <4 x float> @f14(<4 x float> %acc, i32 %count) {
 ; CHECK-NEXT:    [[NEXT_ACC:%.*]] = insertelement <4 x float> [[NEXT_ACC_UPTO2]], float [[NEXT_ACC_I3]], i64 3
 ; CHECK-NEXT:    [[NEXT_COUNT]] = sub i32 [[THIS_COUNT]], 1
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[NEXT_COUNT]], 0
-; CHECK-NEXT:    br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]]
-; CHECK:       exit:
+; CHECK-NEXT:    br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]]
+; CHECK:       [[EXIT]]:
 ; CHECK-NEXT:    ret <4 x float> [[NEXT_ACC]]
 ;
 entry:
@@ -641,13 +654,14 @@ exit:
 
 ; Test unary operator scalarization.
 define void @f15(<4 x float> %init, ptr %base, i32 %count) {
-; CHECK-LABEL: @f15(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br label [[LOOP:%.*]]
-; CHECK:       loop:
-; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[COUNT:%.*]], [[ENTRY:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
+; CHECK-LABEL: define void @f15(
+; CHECK-SAME: <4 x float> [[INIT:%.*]], ptr [[BASE:%.*]], i32 [[COUNT:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[COUNT]], %[[ENTRY]] ], [ [[NEXTI:%.*]], %[[LOOP]] ]
 ; CHECK-NEXT:    [[NEXTI]] = sub i32 [[I]], 1
-; CHECK-NEXT:    [[PTR:%.*]] = getelementptr <4 x float>, ptr [[BASE:%.*]], i32 [[I]]
+; CHECK-NEXT:    [[PTR:%.*]] = getelementptr <4 x float>, ptr [[BASE]], i32 [[I]]
 ; CHECK-NEXT:    [[VAL_I0:%.*]] = load float, ptr [[PTR]], align 16
 ; CHECK-NEXT:    [[PTR_I1:%.*]] = getelementptr float, ptr [[PTR]], i32 1
 ; CHECK-NEXT:    [[VAL_I1:%.*]] = load float, ptr [[PTR_I1]], align 4
@@ -681,8 +695,8 @@ define void @f15(<4 x float> %init, ptr %base, i32 %count) {
 ; CHECK-NEXT:    store float [[SEL_I2]], ptr [[PTR_I2]], align 8
 ; CHECK-NEXT:    store float [[SEL_I3]], ptr [[PTR_I3]], align 4
 ; CHECK-NEXT:    [[TEST:%.*]] = icmp eq i32 [[NEXTI]], 0
-; CHECK-NEXT:    br i1 [[TEST]], label [[LOOP]], label [[EXIT:%.*]]
-; CHECK:       exit:
+; CHECK-NEXT:    br i1 [[TEST]], label %[[LOOP]], label %[[EXIT:.*]]
+; CHECK:       [[EXIT]]:
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -712,9 +726,10 @@ exit:
 
 ; Check that IR flags are preserved.
 define <2 x i32> @f16(<2 x i32> %i, <2 x i32> %j) {
-; CHECK-LABEL: @f16(
-; CHECK-NEXT:    [[I_I0:%.*]] = extractelement <2 x i32> [[I:%.*]], i64 0
-; CHECK-NEXT:    [[J_I0:%.*]] = extractelement <2 x i32> [[J:%.*]], i64 0
+; CHECK-LABEL: define <2 x i32> @f16(
+; CHECK-SAME: <2 x i32> [[I:%.*]], <2 x i32> [[J:%.*]]) {
+; CHECK-NEXT:    [[I_I0:%.*]] = extractelement <2 x i32> [[I]], i64 0
+; CHECK-NEXT:    [[J_I0:%.*]] = extractelement <2 x i32> [[J]], i64 0
 ; CHECK-NEXT:    [[RES_I0:%.*]] = add nuw nsw i32 [[I_I0]], [[J_I0]]
 ; CHECK-NEXT:    [[I_I1:%.*]] = extractelement <2 x i32> [[I]], i64 1
 ; CHECK-NEXT:    [[J_I1:%.*]] = extractelement <2 x i32> [[J]], i64 1
@@ -727,9 +742,10 @@ define <2 x i32> @f16(<2 x i32> %i, <2 x i32> %j) {
   ret <2 x i32> %res
 }
 define <2 x i32> @f17(<2 x i32> %i, <2 x i32> %j) {
-; CHECK-LABEL: @f17(
-; CHECK-NEXT:    [[I_I0:%.*]] = extractelement <2 x i32> [[I:%.*]], i64 0
-; CHECK-NEXT:    [[J_I0:%.*]] = extractelement <2 x i32> [[J:%.*]], i64 0
+; CHECK-LABEL: define <2 x i32> @f17(
+; CHECK-SAME: <2 x i32> [[I:%.*]], <2 x i32> [[J:%.*]]) {
+; CHECK-NEXT:    [[I_I0:%.*]] = extractelement <2 x i32> [[I]], i64 0
+; CHECK-NEXT:    [[J_I0:%.*]] = extractelement <2 x i32> [[J]], i64 0
 ; CHECK-NEXT:    [[RES_I0:%.*]] = sdiv exact i32 [[I_I0]], [[J_I0]]
 ; CHECK-NEXT:    [[I_I1:%.*]] = extractelement <2 x i32> [[I]], i64 1
 ; CHECK-NEXT:    [[J_I1:%.*]] = extractelement <2 x i32> [[J]], i64 1
@@ -742,9 +758,10 @@ define <2 x i32> @f17(<2 x i32> %i, <2 x i32> %j) {
   ret <2 x i32> %res
 }
 define <2 x float> @f18(<2 x float> %x, <2 x float> %y) {
-; CHECK-LABEL: @f18(
-; CHECK-NEXT:    [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i64 0
-; CHECK-NEXT:    [[Y_I0:%.*]] = extractelement <2 x float> [[Y:%.*]], i64 0
+; CHECK-LABEL: define <2 x float> @f18(
+; CHECK-SAME: <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]]) {
+; CHECK-NEXT:    [[X_I0:%.*]] = extractelement <2 x float> [[X]], i64 0
+; CHECK-NEXT:    [[Y_I0:%.*]] = extractelement <2 x float> [[Y]], i64 0
 ; CHECK-NEXT:    [[RES_I0:%.*]] = fadd fast float [[X_I0]], [[Y_I0]]
 ; CHECK-NEXT:    [[X_I1:%.*]] = extractelement <2 x float> [[X]], i64 1
 ; CHECK-NEXT:    [[Y_I1:%.*]] = extractelement <2 x float> [[Y]], i64 1
@@ -757,8 +774,9 @@ define <2 x float> @f18(<2 x float> %x, <2 x float> %y) {
   ret <2 x float> %res
 }
 define <2 x float> @f19(<2 x float> %x) {
-; CHECK-LABEL: @f19(
-; CHECK-NEXT:    [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i64 0
+; CHECK-LABEL: define <2 x float> @f19(
+; CHECK-SAME: <2 x float> [[X:%.*]]) {
+; CHECK-NEXT:    [[X_I0:%.*]] = extractelement <2 x float> [[X]], i64 0
 ; CHECK-NEXT:    [[RES_I0:%.*]] = fneg fast float [[X_I0]]
 ; CHECK-NEXT:    [[X_I1:%.*]] = extractelement <2 x float> [[X]], i64 1
 ; CHECK-NEXT:    [[RES_I1:%.*]] = fneg fast float [[X_I1]]
@@ -770,9 +788,10 @@ define <2 x float> @f19(<2 x float> %x) {
   ret <2 x float> %res
 }
 define <2 x i1> @f20(<2 x float> %x, <2 x float> %y) {
-; CHECK-LABEL: @f20(
-; CHECK-NEXT:    [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i64 0
-; CHECK-NEXT:    [[Y_I0:%.*]] = extractelement <2 x float> [[Y:%.*]], i64 0
+; CHECK-LABEL: define <2 x i1> @f20(
+; CHECK-SAME: <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]]) {
+; CHECK-NEXT:    [[X_I0:%.*]] = extractelement <2 x float> [[X]], i64 0
+; CHECK-NEXT:    [[Y_I0:%.*]] = extractelement <2 x float> [[Y]], i64 0
 ; CHECK-NEXT:    [[RES_I0:%.*]] = fcmp fast ogt float [[X_I0]], [[Y_I0]]
 ; CHECK-NEXT:    [[X_I1:%.*]] = extractelement <2 x float> [[X]], i64 1
 ; CHECK-NEXT:    [[Y_I1:%.*]] = extractelement <2 x float> [[Y]], i64 1
@@ -786,8 +805,9 @@ define <2 x i1> @f20(<2 x float> %x, <2 x float> %y) {
 }
 declare <2 x float> @llvm.sqrt.v2f32(<2 x float>)
 define <2 x float> @f21(<2 x float> %x) {
-; CHECK-LABEL: @f21(
-; CHECK-NEXT:    [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i64 0
+; CHECK-LABEL: define <2 x float> @f21(
+; CHECK-SAME: <2 x float> [[X:%.*]]) {
+; CHECK-NEXT:    [[X_I0:%.*]] = extractelement <2 x float> [[X]], i64 0
 ; CHECK-NEXT:    [[RES_I0:%.*]] = call fast float @llvm.sqrt.f32(float [[X_I0]])
 ; CHECK-NEXT:    [[X_I1:%.*]] = extractelement <2 x float> [[X]], i64 1
 ; CHECK-NEXT:    [[RES_I1:%.*]] = call fast float @llvm.sqrt.f32(float [[X_I1]])
@@ -800,10 +820,11 @@ define <2 x float> @f21(<2 x float> %x) {
 }
 declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>)
 define <2 x float> @f22(<2 x float> %x, <2 x float> %y, <2 x float> %z) {
-; CHECK-LABEL: @f22(
-; CHECK-NEXT:    [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i64 0
-; CHECK-NEXT:    [[Y_I0:%.*]] = extractelement <2 x float> [[Y:%.*]], i64 0
-; CHECK-NEXT:    [[Z_I0:%.*]] = extractelement <2 x float> [[Z:%.*]], i64 0
+; CHECK-LABEL: define <2 x float> @f22(
+; CHECK-SAME: <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) {
+; CHECK-NEXT:    [[X_I0:%.*]] = extractelement <2 x float> [[X]], i64 0
+; CHECK-NEXT:    [[Y_I0:%.*]] = extractelement <2 x float> [[Y]], i64 0
+; CHECK-NEXT:    [[Z_I0:%.*]] = extractelement <2 x float> [[Z]], i64 0
 ; CHECK-NEXT:    [[RES_I0:%.*]] = call fast float @llvm.fma.f32(float [[X_I0]], float [[Y_I0]], float [[Z_I0]])
 ; CHECK-NEXT:    [[X_I1:%.*]] = extractelement <2 x float> [[X]], i64 1
 ; CHECK-NEXT:    [[Y_I1:%.*]] = extractelement <2 x float> [[Y]], i64 1
@@ -819,10 +840,11 @@ define <2 x float> @f22(<2 x float> %x, <2 x float> %y, <2 x float> %z) {
 
 ; See https://reviews.llvm.org/D83101#2133062
 define <2 x i32> @f23_crash(<2 x i32> %srcvec, i32 %v1) {
-; CHECK-LABEL: @f23_crash(
-; CHECK-NEXT:    [[SRCVEC_I0:%.*]] = extractelement <2 x i32> [[SRCVEC:%.*]], i64 0
+; CHECK-LABEL: define <2 x i32> @f23_crash(
+; CHECK-SAME: <2 x i32> [[SRCVEC:%.*]], i32 [[V1:%.*]]) {
+; CHECK-NEXT:    [[SRCVEC_I0:%.*]] = extractelement <2 x i32> [[SRCVEC]], i64 0
 ; CHECK-NEXT:    [[T1_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[SRCVEC_I0]], i64 0
-; CHECK-NEXT:    [[T1:%.*]] = insertelement <2 x i32> [[T1_UPTO0]], i32 [[V1:%.*]], i64 1
+; CHECK-NEXT:    [[T1:%.*]] = insertelement <2 x i32> [[T1_UPTO0]], i32 [[V1]], i64 1
 ; CHECK-NEXT:    ret <2 x i32> [[T1]]
 ;
   %v0 = extractelement <2 x i32> %srcvec, i32 0
@@ -832,8 +854,9 @@ define <2 x i32> @f23_crash(<2 x i32> %srcvec, i32 %v1) {
 }
 
 define <2 x i32> @f24(<2 x i32> %src) {
-; CHECK-LABEL: @f24(
-; CHECK-NEXT:    [[SRC_I0:%.*]] = extractelement <2 x i32> [[SRC:%.*]], i64 0
+; CHECK-LABEL: define <2 x i32> @f24(
+; CHECK-SAME: <2 x i32> [[SRC:%.*]]) {
+; CHECK-NEXT:    [[SRC_I0:%.*]] = extractelement <2 x i32> [[SRC]], i64 0
 ; CHECK-NEXT:    [[FRZ_I0:%.*]] = freeze i32 [[SRC_I0]]
 ; CHECK-NEXT:    [[SRC_I1:%.*]] = extractelement <2 x i32> [[SRC]], i64 1
 ; CHECK-NEXT:    [[FRZ_I1:%.*]] = freeze i32 [[SRC_I1]]
@@ -846,8 +869,9 @@ define <2 x i32> @f24(<2 x i32> %src) {
 }
 
 define <2 x float> @f25(<2 x float> %src) {
-; CHECK-LABEL: @f25(
-; CHECK-NEXT:    [[SRC_I0:%.*]] = extractelement <2 x float> [[SRC:%.*]], i64 0
+; CHECK-LABEL: define <2 x float> @f25(
+; CHECK-SAME: <2 x float> [[SRC:%.*]]) {
+; CHECK-NEXT:    [[SRC_I0:%.*]] = extractelement <2 x float> [[SRC]], i64 0
 ; CHECK-NEXT:    [[ADD_I0:%.*]] = fadd float [[SRC_I0]], [[SRC_I0]]
 ; CHECK-NEXT:    [[SRC_I1:%.*]] = extractelement <2 x float> [[SRC]], i64 1
 ; CHECK-NEXT:    [[ADD_I1:%.*]] = fadd float [[SRC_I1]], [[SRC_I1]]
@@ -866,8 +890,9 @@ define <2 x float> @f25(<2 x float> %src) {
 }
 
 define <2 x i8> @test_copy_trunc_flags(<2 x i32> %src) {
-; CHECK-LABEL: @test_copy_trunc_flags(
-; CHECK-NEXT:    [[SRC_I0:%.*]] = extractelement <2 x i32> [[SRC:%.*]], i64 0
+; CHECK-LABEL: define <2 x i8> @test_copy_trunc_flags(
+; CHECK-SAME: <2 x i32> [[SRC:%.*]]) {
+; CHECK-NEXT:    [[SRC_I0:%.*]] = extractelement <2 x i32> [[SRC]], i64 0
 ; CHECK-NEXT:    [[TRUNC_I0:%.*]] = trunc nuw nsw i32 [[SRC_I0]] to i8
 ; CHECK-NEXT:    [[SRC_I1:%.*]] = extractelement <2 x i32> [[SRC]], i64 1
 ; CHECK-NEXT:    [[TRUNC_I1:%.*]] = trunc nuw nsw i32 [[SRC_I1]] to i8
@@ -886,3 +911,15 @@ define <2 x i8> @test_copy_trunc_flags(<2 x i32> %src) {
 !4 = !{ float 4.0 }
 !5 = !{ i64 0, i64 8, null }
 !13 = distinct !{}
+;.
+; CHECK: [[SET1_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
+; CHECK: [[META1]] = !{!"set1", [[META2:![0-9]+]]}
+; CHECK: [[META2]] = !{!"root"}
+; CHECK: [[SET2_TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0}
+; CHECK: [[META4]] = !{!"set2", [[META2]]}
+; CHECK: [[TBAA_STRUCT5]] = !{i64 0, i64 8, null}
+; CHECK: [[ACC_GRP6]] = distinct !{}
+; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META8:![0-9]+]]}
+; CHECK: [[META8]] = !{!"llvm.loop.parallel_accesses", [[ACC_GRP6]]}
+; CHECK: [[META9]] = !{float 4.000000e+00}
+;.
diff --git a/llvm/test/Transforms/SimplifyCFG/PhiBlockMerge.ll b/llvm/test/Transforms/SimplifyCFG/PhiBlockMerge.ll
index 2c5889a..08397b5 100644
--- a/llvm/test/Transforms/SimplifyCFG/PhiBlockMerge.ll
+++ b/llvm/test/Transforms/SimplifyCFG/PhiBlockMerge.ll
@@ -1,20 +1,21 @@
-; NOTE: Assertions have been autogenerated by update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
 ; Test merging of blocks that only have PHI nodes in them
 ;
 ; RUN: opt < %s -passes=simplifycfg -simplifycfg-require-and-preserve-domtree=1 -S | FileCheck %s
 ;
 
 define i32 @test(i1 %a, i1 %b) {
-; CHECK-LABEL: @test(
-; CHECK:       M:
-; CHECK-NEXT:    [[DOT:%.*]] = select i1 %b, i32 0, i32 1
-; CHECK-NEXT:    [[W:%.*]] = select i1 %a, i32 2, i32 [[DOT]]
+; CHECK-LABEL: define i32 @test(
+; CHECK-SAME: i1 [[A:%.*]], i1 [[B:%.*]]) {
+; CHECK-NEXT:  [[M:.*:]]
+; CHECK-NEXT:    [[SPEC_SELECT:%.*]] = select i1 [[B]], i32 0, i32 1, !prof [[PROF0:![0-9]+]]
+; CHECK-NEXT:    [[W:%.*]] = select i1 [[A]], i32 2, i32 [[SPEC_SELECT]], !prof [[PROF1:![0-9]+]]
 ; CHECK-NEXT:    [[R:%.*]] = add i32 [[W]], 1
 ; CHECK-NEXT:    ret i32 [[R]]
 ;
-  br i1 %a, label %M, label %O
+  br i1 %a, label %M, label %O, !prof !0
 O:              ; preds = %0
-  br i1 %b, label %N, label %Q
+  br i1 %b, label %N, label %Q, !prof !1
 Q:              ; preds = %O
   br label %N
 N:              ; preds = %Q, %O
@@ -27,3 +28,9 @@ M:              ; preds = %N, %0
   ret i32 %R
 }
 
+!0 = !{!"branch_weights", i32 11, i32 7}
+!1 = !{!"branch_weights", i32 3, i32 5}
+;.
+; CHECK: [[PROF0]] = !{!"branch_weights", i32 3, i32 5}
+; CHECK: [[PROF1]] = !{!"branch_weights", i32 11, i32 7}
+;.
diff --git a/llvm/test/Transforms/SimplifyCFG/branch-fold-threshold.ll b/llvm/test/Transforms/SimplifyCFG/branch-fold-threshold.ll
index 4384847..71ad069 100644
--- a/llvm/test/Transforms/SimplifyCFG/branch-fold-threshold.ll
+++ b/llvm/test/Transforms/SimplifyCFG/branch-fold-threshold.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
 ; RUN: opt %s -passes=simplifycfg -simplifycfg-require-and-preserve-domtree=1 -S | FileCheck %s --check-prefixes=NORMAL,BASELINE
 ; RUN: opt %s -passes=simplifycfg -simplifycfg-require-and-preserve-domtree=1 -S -bonus-inst-threshold=2 | FileCheck %s --check-prefixes=NORMAL,AGGRESSIVE
 ; RUN: opt %s -passes=simplifycfg -simplifycfg-require-and-preserve-domtree=1 -S -bonus-inst-threshold=4 | FileCheck %s --check-prefixes=WAYAGGRESSIVE
@@ -11,12 +11,12 @@ define i32 @foo(i32 %a, i32 %b, i32 %c, i32 %d, ptr %input) {
 ; BASELINE-SAME: i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]], i32 [[D:%.*]], ptr [[INPUT:%.*]]) {
 ; BASELINE-NEXT:  [[ENTRY:.*]]:
 ; BASELINE-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[D]], 3
-; BASELINE-NEXT:    br i1 [[CMP]], label %[[COND_END:.*]], label %[[LOR_LHS_FALSE:.*]]
+; BASELINE-NEXT:    br i1 [[CMP]], label %[[COND_END:.*]], label %[[LOR_LHS_FALSE:.*]], !prof [[PROF0:![0-9]+]]
 ; BASELINE:       [[LOR_LHS_FALSE]]:
 ; BASELINE-NEXT:    [[MUL:%.*]] = shl i32 [[C]], 1
 ; BASELINE-NEXT:    [[ADD:%.*]] = add nsw i32 [[MUL]], [[A]]
 ; BASELINE-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[ADD]], [[B]]
-; BASELINE-NEXT:    br i1 [[CMP1]], label %[[COND_FALSE:.*]], label %[[COND_END]]
+; BASELINE-NEXT:    br i1 [[CMP1]], label %[[COND_FALSE:.*]], label %[[COND_END]], !prof [[PROF1:![0-9]+]]
 ; BASELINE:       [[COND_FALSE]]:
 ; BASELINE-NEXT:    [[TMP0:%.*]] = load i32, ptr [[INPUT]], align 4
 ; BASELINE-NEXT:    br label %[[COND_END]]
@@ -31,8 +31,8 @@ define i32 @foo(i32 %a, i32 %b, i32 %c, i32 %d, ptr %input) {
 ; AGGRESSIVE-NEXT:    [[MUL:%.*]] = shl i32 [[C]], 1
 ; AGGRESSIVE-NEXT:    [[ADD:%.*]] = add nsw i32 [[MUL]], [[A]]
 ; AGGRESSIVE-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[ADD]], [[B]]
-; AGGRESSIVE-NEXT:    [[OR_COND:%.*]] = select i1 [[CMP]], i1 [[CMP1]], i1 false
-; AGGRESSIVE-NEXT:    br i1 [[OR_COND]], label %[[COND_FALSE:.*]], label %[[COND_END:.*]]
+; AGGRESSIVE-NEXT:    [[OR_COND:%.*]] = select i1 [[CMP]], i1 [[CMP1]], i1 false, !prof [[PROF0:![0-9]+]]
+; AGGRESSIVE-NEXT:    br i1 [[OR_COND]], label %[[COND_FALSE:.*]], label %[[COND_END:.*]], !prof [[PROF0]]
 ; AGGRESSIVE:       [[COND_FALSE]]:
 ; AGGRESSIVE-NEXT:    [[TMP0:%.*]] = load i32, ptr [[INPUT]], align 4
 ; AGGRESSIVE-NEXT:    br label %[[COND_END]]
@@ -47,8 +47,8 @@ define i32 @foo(i32 %a, i32 %b, i32 %c, i32 %d, ptr %input) {
 ; WAYAGGRESSIVE-NEXT:    [[MUL:%.*]] = shl i32 [[C]], 1
 ; WAYAGGRESSIVE-NEXT:    [[ADD:%.*]] = add nsw i32 [[MUL]], [[A]]
 ; WAYAGGRESSIVE-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[ADD]], [[B]]
-; WAYAGGRESSIVE-NEXT:    [[OR_COND:%.*]] = select i1 [[CMP]], i1 [[CMP1]], i1 false
-; WAYAGGRESSIVE-NEXT:    br i1 [[OR_COND]], label %[[COND_FALSE:.*]], label %[[COND_END:.*]]
+; WAYAGGRESSIVE-NEXT:    [[OR_COND:%.*]] = select i1 [[CMP]], i1 [[CMP1]], i1 false, !prof [[PROF0:![0-9]+]]
+; WAYAGGRESSIVE-NEXT:    br i1 [[OR_COND]], label %[[COND_FALSE:.*]], label %[[COND_END:.*]], !prof [[PROF0]]
 ; WAYAGGRESSIVE:       [[COND_FALSE]]:
 ; WAYAGGRESSIVE-NEXT:    [[TMP0:%.*]] = load i32, ptr [[INPUT]], align 4
 ; WAYAGGRESSIVE-NEXT:    br label %[[COND_END]]
@@ -58,13 +58,13 @@ define i32 @foo(i32 %a, i32 %b, i32 %c, i32 %d, ptr %input) {
 ;
 entry:
   %cmp = icmp sgt i32 %d, 3
-  br i1 %cmp, label %cond.end, label %lor.lhs.false
+  br i1 %cmp, label %cond.end, label %lor.lhs.false, !prof !0
 
 lor.lhs.false:
   %mul = shl i32 %c, 1
   %add = add nsw i32 %mul, %a
   %cmp1 = icmp slt i32 %add, %b
-  br i1 %cmp1, label %cond.false, label %cond.end
+  br i1 %cmp1, label %cond.false, label %cond.end, !prof !1
 
 cond.false:
   %0 = load i32, ptr %input, align 4
@@ -160,3 +160,14 @@ cond.end:
   %cond = phi i32 [ %0, %cond.false ], [ 0, %lor.lhs.false ],[ 0, %pred_a ],[ 0, %pred_b ]
   ret i32 %cond
 }
+
+!0 = !{!"branch_weights", i32 7, i32 11}
+!1 = !{!"branch_weights", i32 13, i32 5}
+;.
+; BASELINE: [[PROF0]] = !{!"branch_weights", i32 7, i32 11}
+; BASELINE: [[PROF1]] = !{!"branch_weights", i32 13, i32 5}
+;.
+; AGGRESSIVE: [[PROF0]] = !{!"branch_weights", i32 143, i32 181}
+;.
+; WAYAGGRESSIVE: [[PROF0]] = !{!"branch_weights", i32 143, i32 181}
+;.
diff --git a/llvm/test/Transforms/SimplifyCFG/branch-fold.ll b/llvm/test/Transforms/SimplifyCFG/branch-fold.ll
index 2f5fb4f..8e7b91e 100644
--- a/llvm/test/Transforms/SimplifyCFG/branch-fold.ll
+++ b/llvm/test/Transforms/SimplifyCFG/branch-fold.ll
@@ -1,12 +1,12 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
 ; RUN: opt < %s -passes=simplifycfg -simplifycfg-require-and-preserve-domtree=1 -S | FileCheck %s
 
 define void @test(ptr %P, ptr %Q, i1 %A, i1 %B) {
 ; CHECK-LABEL: @test(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A_NOT:%.*]] = xor i1 [[A:%.*]], true
-; CHECK-NEXT:    [[BRMERGE:%.*]] = select i1 [[A_NOT]], i1 true, i1 [[B:%.*]]
-; CHECK-NEXT:    br i1 [[BRMERGE]], label [[B:%.*]], label [[COMMON_RET:%.*]]
+; CHECK-NEXT:    [[BRMERGE:%.*]] = select i1 [[A_NOT]], i1 true, i1 [[B:%.*]], !prof [[PROF0:![0-9]+]]
+; CHECK-NEXT:    br i1 [[BRMERGE]], label [[B:%.*]], label [[COMMON_RET:%.*]], !prof [[PROF1:![0-9]+]]
 ; CHECK:       common.ret:
 ; CHECK-NEXT:    ret void
 ; CHECK:       b:
@@ -15,9 +15,9 @@ define void @test(ptr %P, ptr %Q, i1 %A, i1 %B) {
 ;
 
 entry:
-  br i1 %A, label %a, label %b
+  br i1 %A, label %a, label %b, !prof !0
 a:
-  br i1 %B, label %b, label %c
+  br i1 %B, label %b, label %c, !prof !1
 b:
   store i32 123, ptr %P
   ret void
@@ -146,3 +146,12 @@ Succ:
 }
 
 declare void @dummy()
+
+!0 = !{!"branch_weights", i32 3, i32 7}
+!1 = !{!"branch_weights", i32 11, i32 4}
+;.
+; CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind ssp memory(read) uwtable }
+;.
+; CHECK: [[PROF0]] = !{!"branch_weights", i32 7, i32 3}
+; CHECK: [[PROF1]] = !{!"branch_weights", i32 138, i32 12}
+;.
diff --git a/llvm/test/Transforms/SimplifyCFG/preserve-branchweights.ll b/llvm/test/Transforms/SimplifyCFG/preserve-branchweights.ll
index ba54245..0624f72 100644
--- a/llvm/test/Transforms/SimplifyCFG/preserve-branchweights.ll
+++ b/llvm/test/Transforms/SimplifyCFG/preserve-branchweights.ll
@@ -11,8 +11,8 @@ define void @test1(i1 %a, i1 %b) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A_NOT:%.*]] = xor i1 [[A:%.*]], true
 ; CHECK-NEXT:    [[C:%.*]] = or i1 [[B:%.*]], false
-; CHECK-NEXT:    [[OR_COND:%.*]] = select i1 [[A_NOT]], i1 [[C]], i1 false
-; CHECK-NEXT:    br i1 [[OR_COND]], label [[Z:%.*]], label [[Y:%.*]], !prof [[PROF0:![0-9]+]]
+; CHECK-NEXT:    [[OR_COND:%.*]] = select i1 [[A_NOT]], i1 [[C]], i1 false, !prof [[PROF0:![0-9]+]]
+; CHECK-NEXT:    br i1 [[OR_COND]], label [[Z:%.*]], label [[Y:%.*]], !prof [[PROF0]]
 ; CHECK:       common.ret:
 ; CHECK-NEXT:    ret void
 ; CHECK:       Y:
@@ -42,8 +42,8 @@ define void @test2(i1 %a, i1 %b) {
 ; CHECK-LABEL: @test2(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[C:%.*]] = or i1 [[B:%.*]], false
-; CHECK-NEXT:    [[OR_COND:%.*]] = select i1 [[A:%.*]], i1 [[C]], i1 false
-; CHECK-NEXT:    br i1 [[OR_COND]], label [[Z:%.*]], label [[Y:%.*]], !prof [[PROF1:![0-9]+]]
+; CHECK-NEXT:    [[OR_COND:%.*]] = select i1 [[A:%.*]], i1 [[C]], i1 false, !prof [[PROF1:![0-9]+]]
+; CHECK-NEXT:    br i1 [[OR_COND]], label [[Z:%.*]], label [[Y:%.*]], !prof [[PROF1]]
 ; CHECK:       common.ret:
 ; CHECK-NEXT:    ret void
 ; CHECK:       Y:
@@ -73,8 +73,8 @@ define void @test3(i1 %a, i1 %b) {
 ; CHECK-LABEL: @test3(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[C:%.*]] = or i1 [[B:%.*]], false
-; CHECK-NEXT:    [[OR_COND:%.*]] = select i1 [[A:%.*]], i1 [[C]], i1 false
-; CHECK-NEXT:    br i1 [[OR_COND]], label [[Z:%.*]], label [[Y:%.*]], !prof [[PROF2:![0-9]+]]
+; CHECK-NEXT:    [[OR_COND:%.*]] = select i1 [[A:%.*]], i1 [[C]], i1 false, !prof [[PROF2:![0-9]+]]
+; CHECK-NEXT:    br i1 [[OR_COND]], label [[Z:%.*]], label [[Y:%.*]], !prof [[PROF2]]
 ; CHECK:       common.ret:
 ; CHECK-NEXT:    ret void
 ; CHECK:       Y:
@@ -104,7 +104,7 @@ define void @test4(i1 %a, i1 %b) {
 ; CHECK-LABEL: @test4(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[C:%.*]] = or i1 [[B:%.*]], false
-; CHECK-NEXT:    [[OR_COND:%.*]] = select i1 [[A:%.*]], i1 [[C]], i1 false
+; CHECK-NEXT:    [[OR_COND:%.*]] = select i1 [[A:%.*]], i1 [[C]], i1 false, !prof [[PROF2]]
 ; CHECK-NEXT:    br i1 [[OR_COND]], label [[Z:%.*]], label [[Y:%.*]], !prof [[PROF2]]
 ; CHECK:       common.ret:
 ; CHECK-NEXT:    ret void
@@ -237,8 +237,8 @@ define void @test1_swap(i1 %a, i1 %b) {
 ; CHECK-LABEL: @test1_swap(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[C:%.*]] = or i1 [[B:%.*]], false
-; CHECK-NEXT:    [[OR_COND:%.*]] = select i1 [[A:%.*]], i1 true, i1 [[C]]
-; CHECK-NEXT:    br i1 [[OR_COND]], label [[Y:%.*]], label [[Z:%.*]], !prof [[PROF5:![0-9]+]]
+; CHECK-NEXT:    [[OR_COND:%.*]] = select i1 [[A:%.*]], i1 true, i1 [[C]], !prof [[PROF5:![0-9]+]]
+; CHECK-NEXT:    br i1 [[OR_COND]], label [[Y:%.*]], label [[Z:%.*]], !prof [[PROF5]]
 ; CHECK:       common.ret:
 ; CHECK-NEXT:    ret void
 ; CHECK:       Y:
@@ -268,8 +268,8 @@ define void @test7(i1 %a, i1 %b) {
 ; CHECK-LABEL: @test7(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[C:%.*]] = or i1 [[B:%.*]], false
-; CHECK-NEXT:    [[BRMERGE:%.*]] = select i1 [[A:%.*]], i1 true, i1 [[C]]
-; CHECK-NEXT:    br i1 [[BRMERGE]], label [[Y:%.*]], label [[Z:%.*]], !prof [[PROF6:![0-9]+]]
+; CHECK-NEXT:    [[BRMERGE:%.*]] = select i1 [[A:%.*]], i1 true, i1 [[C]], !prof [[PROF6:![0-9]+]]
+; CHECK-NEXT:    br i1 [[BRMERGE]], label [[Y:%.*]], label [[Z:%.*]], !prof [[PROF7:![0-9]+]]
 ; CHECK:       common.ret:
 ; CHECK-NEXT:    ret void
 ; CHECK:       Y:
@@ -300,7 +300,7 @@ define void @test8(i64 %x, i64 %y) nounwind {
 ; CHECK-LABEL: @test8(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[LT:%.*]] = icmp slt i64 [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    br i1 [[LT]], label [[A:%.*]], label [[B:%.*]], !prof [[PROF7:![0-9]+]]
+; CHECK-NEXT:    br i1 [[LT]], label [[A:%.*]], label [[B:%.*]], !prof [[PROF8:![0-9]+]]
 ; CHECK:       common.ret:
 ; CHECK-NEXT:    ret void
 ; CHECK:       a:
@@ -339,7 +339,7 @@ define i1 @test9(i32 %x, i32 %y) nounwind {
 ; CHECK-NEXT:      i32 1, label [[END:%.*]]
 ; CHECK-NEXT:      i32 2, label [[END]]
 ; CHECK-NEXT:      i32 92, label [[END]]
-; CHECK-NEXT:    ], !prof [[PROF8:![0-9]+]]
+; CHECK-NEXT:    ], !prof [[PROF9:![0-9]+]]
 ; CHECK:       common.ret:
 ; CHECK-NEXT:    [[COMMON_RET_OP:%.*]] = phi i1 [ [[RETA:%.*]], [[A]] ], [ [[RET:%.*]], [[END]] ]
 ; CHECK-NEXT:    ret i1 [[COMMON_RET_OP]]
@@ -381,7 +381,7 @@ define void @test10(i32 %x) nounwind readnone ssp noredzone {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[X_OFF:%.*]] = add i32 [[X:%.*]], -1
 ; CHECK-NEXT:    [[SWITCH:%.*]] = icmp ult i32 [[X_OFF]], 3
-; CHECK-NEXT:    br i1 [[SWITCH]], label [[LOR_END:%.*]], label [[LOR_RHS:%.*]], !prof [[PROF9:![0-9]+]]
+; CHECK-NEXT:    br i1 [[SWITCH]], label [[LOR_END:%.*]], label [[LOR_RHS:%.*]], !prof [[PROF10:![0-9]+]]
 ; CHECK:       common.ret:
 ; CHECK-NEXT:    ret void
 ; CHECK:       lor.rhs:
@@ -413,7 +413,7 @@ define void @test11(i32 %x) nounwind {
 ; CHECK-LABEL: @test11(
 ; CHECK-NEXT:    [[I:%.*]] = shl i32 [[X:%.*]], 1
 ; CHECK-NEXT:    [[COND:%.*]] = icmp eq i32 [[I]], 24
-; CHECK-NEXT:    br i1 [[COND]], label [[C:%.*]], label [[A:%.*]], !prof [[PROF10:![0-9]+]]
+; CHECK-NEXT:    br i1 [[COND]], label [[C:%.*]], label [[A:%.*]], !prof [[PROF11:![0-9]+]]
 ; CHECK:       common.ret:
 ; CHECK-NEXT:    ret void
 ; CHECK:       a:
@@ -500,8 +500,8 @@ define void @test14(ptr %old, i32 %final) {
 ; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[BIT_0]], 0
 ; CHECK-NEXT:    [[V3:%.*]] = load i32, ptr @max_regno, align 4
 ; CHECK-NEXT:    [[CMP4:%.*]] = icmp eq i32 [[I_1]], [[V3]]
-; CHECK-NEXT:    [[OR_COND:%.*]] = select i1 [[TOBOOL]], i1 true, i1 [[CMP4]]
-; CHECK-NEXT:    br i1 [[OR_COND]], label [[FOR_EXIT:%.*]], label [[FOR_INC]], !prof [[PROF11:![0-9]+]]
+; CHECK-NEXT:    [[OR_COND:%.*]] = select i1 [[TOBOOL]], i1 true, i1 [[CMP4]], !prof [[PROF12:![0-9]+]]
+; CHECK-NEXT:    br i1 [[OR_COND]], label [[FOR_EXIT:%.*]], label [[FOR_INC]], !prof [[PROF12]]
 ; CHECK:       for.inc:
 ; CHECK-NEXT:    [[SHL]] = shl i32 [[BIT_0]], 1
 ; CHECK-NEXT:    [[INC19]] = add nsw i32 [[I_1]], 1
@@ -534,7 +534,7 @@ define i32 @HoistThenElseCodeToIf(i32 %n) {
 ; CHECK-LABEL: @HoistThenElseCodeToIf(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[N:%.*]], 0
-; CHECK-NEXT:    [[DOT:%.*]] = select i1 [[TOBOOL]], i32 1, i32 234, !prof [[PROF12:![0-9]+]]
+; CHECK-NEXT:    [[DOT:%.*]] = select i1 [[TOBOOL]], i32 1, i32 234, !prof [[PROF6]]
 ; CHECK-NEXT:    ret i32 [[DOT]]
 ;
 entry:
@@ -557,8 +557,8 @@ return:
 define i32 @SimplifyCondBranchToCondBranch(i1 %cmpa, i1 %cmpb) {
 ; CHECK-LABEL: @SimplifyCondBranchToCondBranch(
 ; CHECK-NEXT:  block1:
-; CHECK-NEXT:    [[BRMERGE:%.*]] = select i1 [[CMPA:%.*]], i1 true, i1 [[CMPB:%.*]]
-; CHECK-NEXT:    [[DOTMUX:%.*]] = select i1 [[CMPA]], i32 0, i32 2, !prof [[PROF13:![0-9]+]]
+; CHECK-NEXT:    [[BRMERGE:%.*]] = select i1 [[CMPA:%.*]], i1 true, i1 [[CMPB:%.*]], !prof [[PROF13:![0-9]+]]
+; CHECK-NEXT:    [[DOTMUX:%.*]] = select i1 [[CMPA]], i32 0, i32 2, !prof [[PROF13]]
 ; CHECK-NEXT:    [[OUTVAL:%.*]] = select i1 [[BRMERGE]], i32 [[DOTMUX]], i32 1, !prof [[PROF14:![0-9]+]]
 ; CHECK-NEXT:    ret i32 [[OUTVAL]]
 ;
@@ -584,8 +584,8 @@ define i32 @SimplifyCondBranchToCondBranchSwap(i1 %cmpa, i1 %cmpb) {
 ; CHECK-NEXT:  block1:
 ; CHECK-NEXT:    [[CMPA_NOT:%.*]] = xor i1 [[CMPA:%.*]], true
 ; CHECK-NEXT:    [[CMPB_NOT:%.*]] = xor i1 [[CMPB:%.*]], true
-; CHECK-NEXT:    [[BRMERGE:%.*]] = select i1 [[CMPA_NOT]], i1 true, i1 [[CMPB_NOT]]
-; CHECK-NEXT:    [[DOTMUX:%.*]] = select i1 [[CMPA_NOT]], i32 0, i32 2, !prof [[PROF15:![0-9]+]]
+; CHECK-NEXT:    [[BRMERGE:%.*]] = select i1 [[CMPA_NOT]], i1 true, i1 [[CMPB_NOT]], !prof [[PROF15:![0-9]+]]
+; CHECK-NEXT:    [[DOTMUX:%.*]] = select i1 [[CMPA_NOT]], i32 0, i32 2, !prof [[PROF15]]
 ; CHECK-NEXT:    [[OUTVAL:%.*]] = select i1 [[BRMERGE]], i32 [[DOTMUX]], i32 1, !prof [[PROF16:![0-9]+]]
 ; CHECK-NEXT:    ret i32 [[OUTVAL]]
 ;
@@ -609,7 +609,7 @@ define i32 @SimplifyCondBranchToCondBranchSwapMissingWeight(i1 %cmpa, i1 %cmpb)
 ; CHECK-NEXT:  block1:
 ; CHECK-NEXT:    [[CMPA_NOT:%.*]] = xor i1 [[CMPA:%.*]], true
 ; CHECK-NEXT:    [[CMPB_NOT:%.*]] = xor i1 [[CMPB:%.*]], true
-; CHECK-NEXT:    [[BRMERGE:%.*]] = select i1 [[CMPA_NOT]], i1 true, i1 [[CMPB_NOT]]
+; CHECK-NEXT:    [[BRMERGE:%.*]] = select i1 [[CMPA_NOT]], i1 true, i1 [[CMPB_NOT]], !prof [[PROF15]]
 ; CHECK-NEXT:    [[DOTMUX:%.*]] = select i1 [[CMPA_NOT]], i32 0, i32 2, !prof [[PROF15]]
 ; CHECK-NEXT:    [[OUTVAL:%.*]] = select i1 [[BRMERGE]], i32 [[DOTMUX]], i32 1, !prof [[PROF17:![0-9]+]]
 ; CHECK-NEXT:    ret i32 [[OUTVAL]]
@@ -701,8 +701,8 @@ define void @or_icmps_probably_not_harmful(i32 %x, i32 %y, ptr %p) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[EXPECTED_TRUE:%.*]] = icmp sgt i32 [[X:%.*]], -1
 ; CHECK-NEXT:    [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0
-; CHECK-NEXT:    [[OR_COND:%.*]] = select i1 [[EXPECTED_TRUE]], i1 true, i1 [[EXPENSIVE]]
-; CHECK-NEXT:    br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof [[PROF20:![0-9]+]], !unpredictable [[META21:![0-9]+]]
+; CHECK-NEXT:    [[OR_COND:%.*]] = select i1 [[EXPECTED_TRUE]], i1 true, i1 [[EXPENSIVE]], !prof [[PROF20:![0-9]+]]
+; CHECK-NEXT:    br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof [[PROF20]], !unpredictable [[META21:![0-9]+]]
 ; CHECK:       false:
 ; CHECK-NEXT:    store i8 42, ptr [[P:%.*]], align 1
 ; CHECK-NEXT:    br label [[EXIT]]
@@ -733,8 +733,8 @@ define void @or_icmps_not_that_harmful(i32 %x, i32 %y, ptr %p) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[EXPECTED_TRUE:%.*]] = icmp sgt i32 [[X:%.*]], -1
 ; CHECK-NEXT:    [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0
-; CHECK-NEXT:    [[OR_COND:%.*]] = select i1 [[EXPECTED_TRUE]], i1 true, i1 [[EXPENSIVE]]
-; CHECK-NEXT:    br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof [[PROF22:![0-9]+]]
+; CHECK-NEXT:    [[OR_COND:%.*]] = select i1 [[EXPECTED_TRUE]], i1 true, i1 [[EXPENSIVE]], !prof [[PROF22:![0-9]+]]
+; CHECK-NEXT:    br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof [[PROF22]]
 ; CHECK:       false:
 ; CHECK-NEXT:    store i8 42, ptr [[P:%.*]], align 1
 ; CHECK-NEXT:    br label [[EXIT]]
@@ -765,8 +765,8 @@ define void @or_icmps_not_that_harmful_inverted(i32 %x, i32 %y, ptr %p) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[EXPECTED_TRUE:%.*]] = icmp sgt i32 [[X:%.*]], -1
 ; CHECK-NEXT:    [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0
-; CHECK-NEXT:    [[OR_COND:%.*]] = select i1 [[EXPECTED_TRUE]], i1 true, i1 [[EXPENSIVE]]
-; CHECK-NEXT:    br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof [[PROF23:![0-9]+]]
+; CHECK-NEXT:    [[OR_COND:%.*]] = select i1 [[EXPECTED_TRUE]], i1 true, i1 [[EXPENSIVE]], !prof [[PROF23:![0-9]+]]
+; CHECK-NEXT:    br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof [[PROF23]]
 ; CHECK:       false:
 ; CHECK-NEXT:    store i8 42, ptr [[P:%.*]], align 1
 ; CHECK-NEXT:    br label [[EXIT]]
@@ -796,8 +796,8 @@ define void @or_icmps_useful(i32 %x, i32 %y, ptr %p) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[EXPECTED_TRUE:%.*]] = icmp sle i32 [[X:%.*]], -1
 ; CHECK-NEXT:    [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0
-; CHECK-NEXT:    [[OR_COND:%.*]] = select i1 [[EXPECTED_TRUE]], i1 true, i1 [[EXPENSIVE]]
-; CHECK-NEXT:    br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof [[PROF24:![0-9]+]]
+; CHECK-NEXT:    [[OR_COND:%.*]] = select i1 [[EXPECTED_TRUE]], i1 true, i1 [[EXPENSIVE]], !prof [[PROF24:![0-9]+]]
+; CHECK-NEXT:    br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof [[PROF24]]
 ; CHECK:       false:
 ; CHECK-NEXT:    store i8 42, ptr [[P:%.*]], align 1
 ; CHECK-NEXT:    br label [[EXIT]]
@@ -827,7 +827,7 @@ define void @or_icmps_useful_inverted(i32 %x, i32 %y, ptr %p) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[EXPECTED_FALSE:%.*]] = icmp sgt i32 [[X:%.*]], -1
 ; CHECK-NEXT:    [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0
-; CHECK-NEXT:    [[OR_COND:%.*]] = select i1 [[EXPECTED_FALSE]], i1 true, i1 [[EXPENSIVE]]
+; CHECK-NEXT:    [[OR_COND:%.*]] = select i1 [[EXPECTED_FALSE]], i1 true, i1 [[EXPENSIVE]], !prof [[PROF24]]
 ; CHECK-NEXT:    br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof [[PROF24]]
 ; CHECK:       false:
 ; CHECK-NEXT:    store i8 42, ptr [[P:%.*]], align 1
@@ -956,8 +956,8 @@ define void @and_icmps_not_that_harmful(i32 %x, i32 %y, ptr %p) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[EXPECTED_FALSE:%.*]] = icmp sgt i32 [[X:%.*]], -1
 ; CHECK-NEXT:    [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0
-; CHECK-NEXT:    [[OR_COND:%.*]] = select i1 [[EXPECTED_FALSE]], i1 [[EXPENSIVE]], i1 false
-; CHECK-NEXT:    br i1 [[OR_COND]], label [[FALSE:%.*]], label [[EXIT:%.*]], !prof [[PROF25:![0-9]+]]
+; CHECK-NEXT:    [[OR_COND:%.*]] = select i1 [[EXPECTED_FALSE]], i1 [[EXPENSIVE]], i1 false, !prof [[PROF25:![0-9]+]]
+; CHECK-NEXT:    br i1 [[OR_COND]], label [[FALSE:%.*]], label [[EXIT:%.*]], !prof [[PROF25]]
 ; CHECK:       false:
 ; CHECK-NEXT:    store i8 42, ptr [[P:%.*]], align 1
 ; CHECK-NEXT:    br label [[EXIT]]
@@ -988,7 +988,7 @@ define void @and_icmps_not_that_harmful_inverted(i32 %x, i32 %y, ptr %p) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[EXPECTED_TRUE:%.*]] = icmp sle i32 [[X:%.*]], -1
 ; CHECK-NEXT:    [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0
-; CHECK-NEXT:    [[OR_COND:%.*]] = select i1 [[EXPECTED_TRUE]], i1 [[EXPENSIVE]], i1 false
+; CHECK-NEXT:    [[OR_COND:%.*]] = select i1 [[EXPECTED_TRUE]], i1 [[EXPENSIVE]], i1 false, !prof [[PROF25]]
 ; CHECK-NEXT:    br i1 [[OR_COND]], label [[FALSE:%.*]], label [[EXIT:%.*]], !prof [[PROF25]]
 ; CHECK:       false:
 ; CHECK-NEXT:    store i8 42, ptr [[P:%.*]], align 1
@@ -1019,8 +1019,8 @@ define void @and_icmps_useful(i32 %x, i32 %y, ptr %p) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[EXPECTED_TRUE:%.*]] = icmp sgt i32 [[X:%.*]], -1
 ; CHECK-NEXT:    [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0
-; CHECK-NEXT:    [[OR_COND:%.*]] = select i1 [[EXPECTED_TRUE]], i1 [[EXPENSIVE]], i1 false
-; CHECK-NEXT:    br i1 [[OR_COND]], label [[FALSE:%.*]], label [[EXIT:%.*]], !prof [[PROF26:![0-9]+]]
+; CHECK-NEXT:    [[OR_COND:%.*]] = select i1 [[EXPECTED_TRUE]], i1 [[EXPENSIVE]], i1 false, !prof [[PROF26:![0-9]+]]
+; CHECK-NEXT:    br i1 [[OR_COND]], label [[FALSE:%.*]], label [[EXIT:%.*]], !prof [[PROF26]]
 ; CHECK:       false:
 ; CHECK-NEXT:    store i8 42, ptr [[P:%.*]], align 1
 ; CHECK-NEXT:    br label [[EXIT]]
@@ -1050,7 +1050,7 @@ define void @and_icmps_useful_inverted(i32 %x, i32 %y, ptr %p) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[EXPECTED_FALSE:%.*]] = icmp sle i32 [[X:%.*]], -1
 ; CHECK-NEXT:    [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0
-; CHECK-NEXT:    [[OR_COND:%.*]] = select i1 [[EXPECTED_FALSE]], i1 [[EXPENSIVE]], i1 false
+; CHECK-NEXT:    [[OR_COND:%.*]] = select i1 [[EXPECTED_FALSE]], i1 [[EXPENSIVE]], i1 false, !prof [[PROF26]]
 ; CHECK-NEXT:    br i1 [[OR_COND]], label [[FALSE:%.*]], label [[EXIT:%.*]], !prof [[PROF26]]
 ; CHECK:       false:
 ; CHECK-NEXT:    store i8 42, ptr [[P:%.*]], align 1
@@ -1097,23 +1097,26 @@ exit:
 !20 = !{}
 
 ; .
+; .
+; .
+;.
 ; CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind uwtable }
 ; CHECK: attributes #[[ATTR1]] = { nounwind }
 ; CHECK: attributes #[[ATTR2:[0-9]+]] = { noredzone nounwind ssp memory(none) }
-; .
+;.
 ; CHECK: [[PROF0]] = !{!"branch_weights", i32 5, i32 11}
 ; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 5}
 ; CHECK: [[PROF2]] = !{!"branch_weights", i32 1, i32 3}
 ; CHECK: [[PROF3]] = !{!"branch_weights", i32 7, i32 1, i32 2}
 ; CHECK: [[PROF4]] = !{!"branch_weights", i32 49, i32 12, i32 24, i32 35}
 ; CHECK: [[PROF5]] = !{!"branch_weights", i32 11, i32 5}
-; CHECK: [[PROF6]] = !{!"branch_weights", i32 17, i32 15}
-; CHECK: [[PROF7]] = !{!"branch_weights", i32 9, i32 7}
-; CHECK: [[PROF8]] = !{!"branch_weights", i32 17, i32 9, i32 8, i32 7, i32 17}
-; CHECK: [[PROF9]] = !{!"branch_weights", i32 24, i32 33}
-; CHECK: [[PROF10]] = !{!"branch_weights", i32 8, i32 33}
-; CHECK: [[PROF11]] = !{!"branch_weights", i32 112017436, i32 -735157296}
-; CHECK: [[PROF12]] = !{!"branch_weights", i32 3, i32 5}
+; CHECK: [[PROF6]] = !{!"branch_weights", i32 3, i32 5}
+; CHECK: [[PROF7]] = !{!"branch_weights", i32 17, i32 15}
+; CHECK: [[PROF8]] = !{!"branch_weights", i32 9, i32 7}
+; CHECK: [[PROF9]] = !{!"branch_weights", i32 17, i32 9, i32 8, i32 7, i32 17}
+; CHECK: [[PROF10]] = !{!"branch_weights", i32 24, i32 33}
+; CHECK: [[PROF11]] = !{!"branch_weights", i32 8, i32 33}
+; CHECK: [[PROF12]] = !{!"branch_weights", i32 112017436, i32 -735157296}
 ; CHECK: [[PROF13]] = !{!"branch_weights", i32 2, i32 3}
 ; CHECK: [[PROF14]] = !{!"branch_weights", i32 34, i32 21}
 ; CHECK: [[PROF15]] = !{!"branch_weights", i32 3, i32 2}
@@ -1128,4 +1131,4 @@ exit:
 ; CHECK: [[PROF24]] = !{!"branch_weights", i32 101, i32 99}
 ; CHECK: [[PROF25]] = !{!"branch_weights", i32 1, i32 197}
 ; CHECK: [[PROF26]] = !{!"branch_weights", i32 99, i32 101}
-; .
+;.
diff --git a/llvm/test/Verifier/llvm.loop.estimated_trip_count.ll b/llvm/test/Verifier/llvm.loop.estimated_trip_count.ll
index 3c0bc8a..b1e456f 100644
--- a/llvm/test/Verifier/llvm.loop.estimated_trip_count.ll
+++ b/llvm/test/Verifier/llvm.loop.estimated_trip_count.ll
@@ -26,36 +26,43 @@ exit:
 
 ; No value.
 ; RUN: cp %s %t
+; RUN: chmod u+w %t
 ; RUN: echo '!1 = !{!"llvm.loop.estimated_trip_count"}' >> %t
 ; RUN: not %{RUN} TOO-FEW
 
 ; i16 value.
 ; RUN: cp %s %t
+; RUN: chmod u+w %t
 ; RUN: echo '!1 = !{!"llvm.loop.estimated_trip_count", i16 5}' >> %t
 ; RUN: %{RUN} GOOD
 
 ; i32 value.
 ; RUN: cp %s %t
+; RUN: chmod u+w %t
 ; RUN: echo '!1 = !{!"llvm.loop.estimated_trip_count", i32 5}' >> %t
 ; RUN: %{RUN} GOOD
 
 ; i64 value.
 ; RUN: cp %s %t
+; RUN: chmod u+w %t
 ; RUN: echo '!1 = !{!"llvm.loop.estimated_trip_count", i64 5}' >> %t
 ; RUN: not %{RUN} BAD-VALUE
 
 ; MDString value.
 ; RUN: cp %s %t
+; RUN: chmod u+w %t
 ; RUN: echo '!1 = !{!"llvm.loop.estimated_trip_count", !"5"}' >> %t
 ; RUN: not %{RUN} BAD-VALUE
 
 ; MDNode value.
 ; RUN: cp %s %t
+; RUN: chmod u+w %t
 ; RUN: echo '!1 = !{!"llvm.loop.estimated_trip_count", !2}' >> %t
 ; RUN: echo '!2 = !{i32 5}' >> %t
 ; RUN: not %{RUN} BAD-VALUE
 
 ; Too many values.
 ; RUN: cp %s %t
+; RUN: chmod u+w %t
 ; RUN: echo '!1 = !{!"llvm.loop.estimated_trip_count", i32 5, i32 5}' >> %t
 ; RUN: not %{RUN} TOO-MANY
diff --git a/llvm/test/tools/llvm-profdata/Inputs/profile-symbol-list-ext.expected b/llvm/test/tools/llvm-profdata/Inputs/profile-symbol-list-ext.expected
new file mode 100644
index 0000000..f7e7499
--- /dev/null
+++ b/llvm/test/tools/llvm-profdata/Inputs/profile-symbol-list-ext.expected
@@ -0,0 +1,44 @@
+Function: main: 368038, 0, 7 sampled lines
+Samples collected in the function's body {
+  4: 1068
+  4.2: 1068
+  5: 2150
+  5.1: 2150
+  6: 4160
+  7: 1068
+  9: 4128, calls: _Z3bari:2942 _Z3fooi:1262
+  9: vtables: _ZTVbar:2942 _ZTVfoo:1260
+}
+Samples collected in inlined callsites {
+  10: inlined callee: inline1: 2000, 0, 1 sampled lines
+    Samples collected in the function's body {
+      1: 2000
+    }
+    No inlined callsites in this function
+  10: inlined callee: inline2: 4000, 0, 1 sampled lines
+    Samples collected in the function's body {
+      1: 4000
+    }
+    No inlined callsites in this function
+  10: vtables: _ZTVinline1:2000 _ZTVinline2:4000
+}
+Function: _Z3bari: 40602, 2874, 1 sampled lines
+Samples collected in the function's body {
+  1: 2874
+}
+No inlined callsites in this function
+Function: _Z3fooi: 15422, 1220, 1 sampled lines
+Samples collected in the function's body {
+  1: 1220
+}
+No inlined callsites in this function
+======== Dump profile symbol list ========
+_Z3goov
+_Z3sumii
+__libc_csu_fini
+__libc_csu_init
+_dl_relocate_static_pie
+_fini
+_init
+_start
+main
diff --git a/llvm/test/tools/llvm-profdata/Inputs/sample-profile-ext.proftext b/llvm/test/tools/llvm-profdata/Inputs/sample-profile-ext.proftext
new file mode 100644
index 0000000..100133f
--- /dev/null
+++ b/llvm/test/tools/llvm-profdata/Inputs/sample-profile-ext.proftext
@@ -0,0 +1,18 @@
+main:184019:0
+ 4: 534
+ 4.2: 534
+ 5: 1075
+ 5.1: 1075
+ 6: 2080
+ 7: 534
+ 9: 2064 _Z3bari:1471 _Z3fooi:631
+ 9: vtables _ZTVbar:1471 _ZTVfoo:630
+ 10: inline1:1000
+  1: 1000
+ 10: inline2:2000
+  1: 2000
+ 10: vtables _ZTVinline1:1000 _ZTVinline2:2000
+_Z3bari:20301:1437
+ 1: 1437
+_Z3fooi:7711:610
+ 1: 610
diff --git a/llvm/test/tools/llvm-profdata/profile-symbol-list-compress.test b/llvm/test/tools/llvm-profdata/profile-symbol-list-compress.test
index b445695..8383bcc 100644
--- a/llvm/test/tools/llvm-profdata/profile-symbol-list-compress.test
+++ b/llvm/test/tools/llvm-profdata/profile-symbol-list-compress.test
@@ -4,3 +4,12 @@ REQUIRES: zlib
 ; RUN: llvm-profdata merge -sample -extbinary -compress-all-sections %t.1.output %t.2.output -o %t.3.output
 ; RUN: llvm-profdata show -sample -show-prof-sym-list %t.3.output > %t.4.output
 ; RUN: diff -b %S/Inputs/profile-symbol-list.expected %t.4.output
+
+;; Generate two SampleFDO binary profiles and merge them.
+;; Tests that the vtable counters in the merged profile are the aggregated
+;; result from both sources.
+; RUN: llvm-profdata merge -sample -extbinary -compress-all-sections -extbinary-write-vtable-type-prof -prof-sym-list=%S/Inputs/profile-symbol-list-1.text %S/Inputs/sample-profile-ext.proftext -o %t.1.output
+; RUN: llvm-profdata merge -sample -extbinary -compress-all-sections -extbinary-write-vtable-type-prof -prof-sym-list=%S/Inputs/profile-symbol-list-2.text %S/Inputs/sample-profile-ext.proftext -o %t.2.output
+; RUN: llvm-profdata merge -sample -extbinary -compress-all-sections -extbinary-write-vtable-type-prof %t.1.output %t.2.output -o %t.3.output
+; RUN: llvm-profdata show -sample -show-prof-sym-list %t.3.output > %t.4.output
+; RUN: diff -b %S/Inputs/profile-symbol-list-ext.expected %t.4.output
diff --git a/llvm/test/tools/llvm-profdata/profile-symbol-list.test b/llvm/test/tools/llvm-profdata/profile-symbol-list.test
index 39dcd11..6845531 100644
--- a/llvm/test/tools/llvm-profdata/profile-symbol-list.test
+++ b/llvm/test/tools/llvm-profdata/profile-symbol-list.test
@@ -7,3 +7,12 @@
 ; RUN: llvm-profdata show -sample -show-sec-info-only %t.5.output  | FileCheck %s -check-prefix=NOSYMLIST
 
 ; NOSYMLIST: ProfileSymbolListSection {{.*}} Size: 0
+
+;; Generate two SampleFDO binary profiles and merge them.
+;; Tests that the vtable counters in the merged profile are the aggregated
+;; result from both sources.
+; RUN: llvm-profdata merge -sample -extbinary -extbinary-write-vtable-type-prof -prof-sym-list=%S/Inputs/profile-symbol-list-1.text %S/Inputs/sample-profile-ext.proftext -o %t.1.output
+; RUN: llvm-profdata merge -sample -extbinary -extbinary-write-vtable-type-prof -prof-sym-list=%S/Inputs/profile-symbol-list-2.text %S/Inputs/sample-profile-ext.proftext -o %t.2.output
+; RUN: llvm-profdata merge -sample -extbinary -extbinary-write-vtable-type-prof %t.1.output %t.2.output -o %t.3.output
+; RUN: llvm-profdata show -sample -show-prof-sym-list %t.3.output > %t.4.output
+; RUN: diff -b %S/Inputs/profile-symbol-list-ext.expected %t.4.output
diff --git a/llvm/test/tools/llvm-profdata/roundtrip.test b/llvm/test/tools/llvm-profdata/roundtrip.test
index 7af76e0..eb55534 100644
--- a/llvm/test/tools/llvm-profdata/roundtrip.test
+++ b/llvm/test/tools/llvm-profdata/roundtrip.test
@@ -16,3 +16,9 @@ RUN: llvm-profdata merge --sample --binary -output=%t.4.profdata %S/Inputs/sampl
 RUN: llvm-profdata merge --sample --extbinary -output=%t.5.profdata %t.4.profdata
 RUN: llvm-profdata merge --sample --text -output=%t.4.proftext %t.5.profdata
 RUN: diff -b %t.4.proftext %S/Inputs/sample-profile.proftext
+# Round trip from text --> extbinary --> text.
+# The vtable profile is supported by ext-binary profile but not raw binary profile format,
+# so we don't use raw binary profile format in this roundtrip.
+RUN: llvm-profdata merge --sample --extbinary -extbinary-write-vtable-type-prof --output=%t.5.profdata %S/Inputs/sample-profile-ext.proftext
+RUN: llvm-profdata merge --sample --text --output=%t.5.proftext %t.5.profdata
+RUN: diff -b %t.5.proftext %S/Inputs/sample-profile-ext.proftext
diff --git a/llvm/unittests/ObjectYAML/DXContainerYAMLTest.cpp b/llvm/unittests/ObjectYAML/DXContainerYAMLTest.cpp
index a264ca7..b0ad208 100644
--- a/llvm/unittests/ObjectYAML/DXContainerYAMLTest.cpp
+++ b/llvm/unittests/ObjectYAML/DXContainerYAMLTest.cpp
@@ -172,8 +172,8 @@ TEST(RootSignature, HeaderData) {
         NumStaticSamplers: 0
         StaticSamplersOffset: 48
         Parameters:
-          - ParameterType: 1
-            ShaderVisibility: 2
+          - ParameterType: Constants32Bit
+            ShaderVisibility: Hull
             Constants:
               Num32BitValues: 16
               ShaderRegister: 15
@@ -224,8 +224,8 @@ TEST(RootSignature, ParseRootConstants) {
         NumStaticSamplers: 0
         StaticSamplersOffset: 48
         Parameters:
-          - ParameterType: 1
-            ShaderVisibility: 2
+          - ParameterType: Constants32Bit
+            ShaderVisibility: Hull
             Constants:
               Num32BitValues: 16
               ShaderRegister: 15
@@ -276,8 +276,8 @@ TEST(RootSignature, ParseRootDescriptorsV10) {
       NumStaticSamplers: 0
       StaticSamplersOffset: 44
       Parameters:         
-      - ParameterType: 2 # SRV
-        ShaderVisibility: 3 # Domain
+      - ParameterType: CBV 
+        ShaderVisibility: Domain 
         Descriptor:
           ShaderRegister: 31
           RegisterSpace: 32
@@ -327,8 +327,8 @@ TEST(RootSignature, ParseRootDescriptorsV11) {
       NumStaticSamplers: 0
       StaticSamplersOffset: 48
       Parameters:         
-      - ParameterType: 2 # SRV
-        ShaderVisibility: 3 # Domain
+      - ParameterType: CBV
+        ShaderVisibility: Domain
         Descriptor:
           ShaderRegister: 31
           RegisterSpace: 32
@@ -379,12 +379,12 @@ TEST(RootSignature, ParseDescriptorTableV10) {
       NumStaticSamplers: 0
       StaticSamplersOffset: 64
       Parameters:         
-      - ParameterType: 0 # SRV
-        ShaderVisibility: 3 # Domain
+      - ParameterType: DescriptorTable
+        ShaderVisibility: Domain
         Table:
           NumRanges: 1
           Ranges:
-            - RangeType: 0
+            - RangeType: SRV
               NumDescriptors: 41
               BaseShaderRegister: 42
               RegisterSpace: 43
@@ -435,12 +435,12 @@ TEST(RootSignature, ParseDescriptorTableV11) {
       NumStaticSamplers: 0
       StaticSamplersOffset: 68
       Parameters:         
-      - ParameterType: 0 # Descriptor Table
-        ShaderVisibility: 3 # Domain
+      - ParameterType: DescriptorTable
+        ShaderVisibility: Domain
         Table:
           NumRanges: 1
           Ranges:
-            - RangeType: 0
+            - RangeType: SRV
               NumDescriptors: -1
               BaseShaderRegister: 42
               RegisterSpace: 43
@@ -492,19 +492,19 @@ Parts:
       StaticSamplersOffset: 24
       Parameters: []
       Samplers: 
-        - Filter: 16 
-          AddressU: 1
-          AddressV: 2
-          AddressW: 5
+        - Filter: MinLinearMagMipPoint 
+          AddressU: Wrap
+          AddressV: Mirror
+          AddressW: MirrorOnce
           MipLODBias: 1.23
           MaxAnisotropy: 20
-          ComparisonFunc: 4
-          BorderColor: 0
+          ComparisonFunc: LessEqual
+          BorderColor: TransparentBlack
           MinLOD: 4.56
           MaxLOD: 8.90
           ShaderRegister: 31 
           RegisterSpace: 32
-          ShaderVisibility:  7
+          ShaderVisibility:  Mesh
       AllowInputAssemblerInputLayout: true
       DenyGeometryShaderRootAccess: true
     )"));
diff --git a/llvm/unittests/Support/DebugLogTest.cpp b/llvm/unittests/Support/DebugLogTest.cpp
index e087705..da3851e 100644
--- a/llvm/unittests/Support/DebugLogTest.cpp
+++ b/llvm/unittests/Support/DebugLogTest.cpp
@@ -27,7 +27,7 @@ TEST(DebugLogTest, Basic) {
   {
     std::string str;
     raw_string_ostream os(str);
-    DEBUGLOG_WITH_STREAM_AND_TYPE(os, 0, nullptr) << "NoType";
+    LDGB_STREAM_LEVEL_AND_TYPE(os, "", 0) << "NoType";
     EXPECT_FALSE(StringRef(os.str()).starts_with('['));
     EXPECT_TRUE(StringRef(os.str()).ends_with("NoType\n"));
   }
@@ -36,8 +36,8 @@ TEST(DebugLogTest, Basic) {
   {
     std::string str;
     raw_string_ostream os(str);
-    DEBUGLOG_WITH_STREAM_AND_TYPE(os, 0, "A") << "A";
-    DEBUGLOG_WITH_STREAM_AND_TYPE(os, 0, "B") << "B";
+    LDGB_STREAM_LEVEL_AND_TYPE(os, 0, "A") << "A";
+    LDGB_STREAM_LEVEL_AND_TYPE(os, "B", 0) << "B";
     EXPECT_TRUE(StringRef(os.str()).starts_with('['));
     EXPECT_THAT(os.str(), AllOf(HasSubstr("A\n"), HasSubstr("B\n")));
   }
@@ -48,18 +48,18 @@ TEST(DebugLogTest, Basic) {
     raw_string_ostream os(str);
     // Just check that the macro doesn't result in dangling else.
     if (true)
-      DEBUGLOG_WITH_STREAM_AND_TYPE(os, 0, "A") << "A";
+      LDGB_STREAM_LEVEL_AND_TYPE(os, 0, "A") << "A";
     else
-      DEBUGLOG_WITH_STREAM_AND_TYPE(os, 0, "A") << "B";
-    DEBUGLOG_WITH_STREAM_AND_TYPE(os, 0, "B") << "B";
+      LDGB_STREAM_LEVEL_AND_TYPE(os, 0, "A") << "B";
+    LDGB_STREAM_LEVEL_AND_TYPE(os, 0, "B") << "B";
     EXPECT_THAT(os.str(), AllOf(HasSubstr("A\n"), Not(HasSubstr("B\n"))));
 
     int count = 0;
     auto inc = [&]() { return ++count; };
     EXPECT_THAT(count, Eq(0));
-    DEBUGLOG_WITH_STREAM_AND_TYPE(os, 0, "A") << inc();
+    LDGB_STREAM_LEVEL_AND_TYPE(os, 0, "A") << inc();
     EXPECT_THAT(count, Eq(1));
-    DEBUGLOG_WITH_STREAM_AND_TYPE(os, 0, "B") << inc();
+    LDGB_STREAM_LEVEL_AND_TYPE(os, 0, "B") << inc();
     EXPECT_THAT(count, Eq(1));
   }
 }
@@ -75,7 +75,7 @@ TEST(DebugLogTest, BasicWithLevel) {
   raw_string_ostream os(str);
   for (auto type : {"A", "B", "C", "D"})
     for (int level : llvm::seq<int>(0, 4))
-      DEBUGLOG_WITH_STREAM_TYPE_FILE_AND_LINE(os, level, type, type, level)
+      LDBG_STREAM_LEVEL_TYPE_FILE_AND_LINE(os, level, type, type, level)
           << level;
   EXPECT_EQ(os.str(), "[A:0] A:0 0\n[A:1] A:1 1\n[A:2] A:2 2\n[A:3] A:3 "
                       "3\n[B:0] B:0 0\n[B:1] B:1 1\n[C:0] C:0 0\n");
@@ -92,7 +92,7 @@ TEST(DebugLogTest, NegativeLevel) {
   raw_string_ostream os(str);
   for (auto type : {"A", "B"})
     for (int level : llvm::seq<int>(0, 2))
-      DEBUGLOG_WITH_STREAM_TYPE_FILE_AND_LINE(os, level, type, type, level)
+      LDBG_STREAM_LEVEL_TYPE_FILE_AND_LINE(os, level, type, type, level)
           << level;
   EXPECT_EQ(os.str(), "[A:0] A:0 0\n[B:0] B:0 0\n[B:1] B:1 1\n");
 }
@@ -128,6 +128,115 @@ TEST(DebugLogTest, DestructorPrefix) {
   // After destructors, nothing should have been printed.
   EXPECT_EQ(os.str(), "");
 }
+
+TEST(DebugLogTest, LDBG_MACROS) {
+  llvm::DebugFlag = true;
+  static const char *DT[] = {"A:3", "B:2"};
+  setCurrentDebugTypes(DT, sizeof(DT) / sizeof(DT[0]));
+  std::string Str;
+  raw_string_ostream DebugOs(Str);
+  std::string StrExpected;
+  raw_string_ostream ExpectedOs(StrExpected);
+#undef LDBG_STREAM
+#define LDBG_STREAM DebugOs
+#define DEBUG_TYPE "A"
+  LDBG() << "Hello, world!";
+  ExpectedOs << "[A:1] " << __LLVM_FILE_NAME__ << ":" << (__LINE__ - 1)
+             << " Hello, world!\n";
+  EXPECT_EQ(DebugOs.str(), ExpectedOs.str());
+  Str.clear();
+  StrExpected.clear();
+
+  // Test with a level, no type.
+  LDBG(2) << "Hello, world!";
+  ExpectedOs << "[A:2] " << __LLVM_FILE_NAME__ << ":" << (__LINE__ - 1)
+             << " Hello, world!\n";
+  EXPECT_EQ(DebugOs.str(), ExpectedOs.str());
+  Str.clear();
+  StrExpected.clear();
+
+// Now the type will be explicit, check we don't use DEBUG_TYPE.
+#undef DEBUG_TYPE
+
+  // Test with a type
+  LDBG("B") << "Hello, world!";
+  ExpectedOs << "[B:1] " << __LLVM_FILE_NAME__ << ":" << (__LINE__ - 1)
+             << " Hello, world!\n";
+  EXPECT_EQ(DebugOs.str(), ExpectedOs.str());
+  Str.clear();
+  StrExpected.clear();
+
+  // Test with a type and a level
+  LDBG("B", 2) << "Hello, world!";
+  ExpectedOs << "[B:2] " << __LLVM_FILE_NAME__ << ":" << (__LINE__ - 1)
+             << " Hello, world!\n";
+  EXPECT_EQ(DebugOs.str(), ExpectedOs.str());
+  Str.clear();
+  StrExpected.clear();
+
+  // Test with a type not enabled.
+  LDBG("C", 1) << "Hello, world!";
+  EXPECT_EQ(DebugOs.str(), "");
+
+  // Test with a level not enabled.
+  LDBG("B", 3) << "Hello, world!";
+  EXPECT_EQ(DebugOs.str(), "");
+}
+
+TEST(DebugLogTest, LDBG_OS_MACROS) {
+  llvm::DebugFlag = true;
+  static const char *DT[] = {"A:3", "B:2"};
+  setCurrentDebugTypes(DT, sizeof(DT) / sizeof(DT[0]));
+  std::string Str;
+  raw_string_ostream DebugOs(Str);
+  std::string StrExpected;
+  raw_string_ostream ExpectedOs(StrExpected);
+#undef LDBG_STREAM
+#define LDBG_STREAM DebugOs
+#define DEBUG_TYPE "A"
+  LDBG_OS([](raw_ostream &Os) { Os << "Hello, world!"; });
+  ExpectedOs << "[A:1] " << __LLVM_FILE_NAME__ << ":" << (__LINE__ - 1)
+             << " Hello, world!\n";
+  EXPECT_EQ(DebugOs.str(), ExpectedOs.str());
+  Str.clear();
+  StrExpected.clear();
+
+  // Test with a level, no type.
+  LDBG_OS(2, [](raw_ostream &Os) { Os << "Hello, world!"; });
+  ExpectedOs << "[A:2] " << __LLVM_FILE_NAME__ << ":" << (__LINE__ - 1)
+             << " Hello, world!\n";
+  EXPECT_EQ(DebugOs.str(), ExpectedOs.str());
+  Str.clear();
+  StrExpected.clear();
+
+// Now the type will be explicit, check we don't use DEBUG_TYPE.
+#undef DEBUG_TYPE
+
+  // Test with a type.
+  LDBG_OS("B", [](raw_ostream &Os) { Os << "Hello, world!"; });
+  ExpectedOs << "[B:1] " << __LLVM_FILE_NAME__ << ":" << (__LINE__ - 1)
+             << " Hello, world!\n";
+  EXPECT_EQ(DebugOs.str(), ExpectedOs.str());
+  Str.clear();
+  StrExpected.clear();
+
+  // Test with a type and a level
+  LDBG_OS("B", 2, [](raw_ostream &Os) { Os << "Hello, world!"; });
+  ExpectedOs << "[B:2] " << __LLVM_FILE_NAME__ << ":" << (__LINE__ - 1)
+             << " Hello, world!\n";
+  EXPECT_EQ(DebugOs.str(), ExpectedOs.str());
+  Str.clear();
+  StrExpected.clear();
+
+  // Test with a type not enabled.
+  LDBG_OS("C", 1, [](raw_ostream &Os) { Os << "Hello, world!"; });
+  EXPECT_EQ(DebugOs.str(), "");
+
+  // Test with a level not enabled.
+  LDBG_OS("B", 3, [](raw_ostream &Os) { Os << "Hello, world!"; });
+  EXPECT_EQ(DebugOs.str(), "");
+}
+
 #else
 TEST(DebugLogTest, Basic) {
   // LDBG should be compiled out in NDEBUG, so just check it compiles and has
diff --git a/llvm/unittests/Support/EndianTest.cpp b/llvm/unittests/Support/EndianTest.cpp
index 59281c0..c48b770 100644
--- a/llvm/unittests/Support/EndianTest.cpp
+++ b/llvm/unittests/Support/EndianTest.cpp
@@ -201,26 +201,26 @@ TEST(Endian, WriteBitAligned) {
 
 TEST(Endian, Write) {
   unsigned char data[5];
-  endian::write<int32_t, llvm::endianness::big, unaligned>(data, -1362446643);
+  endian::write<int32_t, unaligned>(data, -1362446643, llvm::endianness::big);
   EXPECT_EQ(data[0], 0xAE);
   EXPECT_EQ(data[1], 0xCA);
   EXPECT_EQ(data[2], 0xB6);
   EXPECT_EQ(data[3], 0xCD);
-  endian::write<int32_t, llvm::endianness::big, unaligned>(data + 1,
-                                                           -1362446643);
+  endian::write<int32_t, unaligned>(data + 1, -1362446643,
+                                    llvm::endianness::big);
   EXPECT_EQ(data[1], 0xAE);
   EXPECT_EQ(data[2], 0xCA);
   EXPECT_EQ(data[3], 0xB6);
   EXPECT_EQ(data[4], 0xCD);
 
-  endian::write<int32_t, llvm::endianness::little, unaligned>(data,
-                                                              -1362446643);
+  endian::write<int32_t, unaligned>(data, -1362446643,
+                                    llvm::endianness::little);
   EXPECT_EQ(data[0], 0xCD);
   EXPECT_EQ(data[1], 0xB6);
   EXPECT_EQ(data[2], 0xCA);
   EXPECT_EQ(data[3], 0xAE);
-  endian::write<int32_t, llvm::endianness::little, unaligned>(data + 1,
-                                                              -1362446643);
+  endian::write<int32_t, unaligned>(data + 1, -1362446643,
+                                    llvm::endianness::little);
   EXPECT_EQ(data[1], 0xCD);
   EXPECT_EQ(data[2], 0xB6);
   EXPECT_EQ(data[3], 0xCA);
diff --git a/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp b/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp
index febd06e..e953c0d 100644
--- a/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp
+++ b/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp
@@ -1184,6 +1184,7 @@ R"(All available -march extensions for RISC-V
 
 Experimental extensions
     p                    0.15
+    zibi                 0.1
     zicfilp              1.0       This is a long dummy description
     zicfiss              1.0
     zalasr               0.1
diff --git a/llvm/utils/TableGen/Basic/IntrinsicEmitter.cpp b/llvm/utils/TableGen/Basic/IntrinsicEmitter.cpp
index 09d29b8..a702838 100644
--- a/llvm/utils/TableGen/Basic/IntrinsicEmitter.cpp
+++ b/llvm/utils/TableGen/Basic/IntrinsicEmitter.cpp
@@ -629,21 +629,24 @@ static constexpr uint16_t IntrinsicsToAttributesMap[] = {)";
     UniqAttributes.try_emplace(&Int, ID);
   }
 
-  constexpr uint16_t NoFunctionAttrsID = 255;
-  if (UniqAttributes.size() > 256)
-    PrintFatalError("Too many unique argument attributes for table!");
-  // Note, ID 255 is used to indicate no function attributes.
-  if (UniqFnAttributes.size() > 255)
-    PrintFatalError("Too many unique function attributes for table!");
-
-  // Assign a 16-bit packed ID for each intrinsic. The lower 8-bits will be its
-  // "argument attribute ID" (index in UniqAttributes) and upper 8 bits will be
+  const uint8_t UniqAttributesBitSize = Log2_32_Ceil(UniqAttributes.size() + 1);
+  // Note, ID `-1` is used to indicate no function attributes.
+  const uint8_t UniqFnAttributesBitSize =
+      Log2_32_Ceil(UniqFnAttributes.size() + 2);
+  const uint16_t NoFunctionAttrsID =
+      maskTrailingOnes<uint16_t>(UniqFnAttributesBitSize);
+  if (UniqAttributesBitSize + UniqFnAttributesBitSize > 16)
+    PrintFatalError(
+        "More than 16 bits are used for IntrinsicsToAttributesMap's entry!");
+
+  // Assign a 16-bit packed ID for each intrinsic. The lower bits will be its
+  // "argument attribute ID" (index in UniqAttributes) and upper bits will be
   // its "function attribute ID" (index in UniqFnAttributes).
   for (const CodeGenIntrinsic &Int : Ints) {
     uint16_t FnAttrIndex =
         hasFnAttributes(Int) ? UniqFnAttributes[&Int] : NoFunctionAttrsID;
-    OS << formatv("\n    {} << 8 | {}, // {}", FnAttrIndex,
-                  UniqAttributes[&Int], Int.Name);
+    OS << formatv("\n    {} << {} | {}, // {}", FnAttrIndex,
+                  UniqAttributesBitSize, UniqAttributes[&Int], Int.Name);
   }
 
   OS << R"(
@@ -749,8 +752,8 @@ AttributeList Intrinsic::getAttributes(LLVMContext &C, ID id,
     return AttributeList();
 
   uint16_t PackedID = IntrinsicsToAttributesMap[id - 1];
-  uint8_t FnAttrID = PackedID >> 8;
-  uint8_t ArgAttrID = PackedID & 0xFF;
+  uint16_t FnAttrID = PackedID >> ({});
+  uint16_t ArgAttrID = PackedID & ({});
   using PairTy = std::pair<unsigned, AttributeSet>;
   alignas(PairTy) char ASStorage[sizeof(PairTy) * {}];
   PairTy *AS = reinterpret_cast<PairTy *>(ASStorage);
@@ -772,10 +775,20 @@ AttributeList Intrinsic::getAttributes(LLVMContext &C, ID id,
   }
   return AttributeList::get(C, ArrayRef(AS, NumAttrs));
 }
+
+AttributeSet Intrinsic::getFnAttributes(LLVMContext &C, ID id) {
+  if (id == 0)
+    return AttributeSet();
+  uint16_t PackedID = IntrinsicsToAttributesMap[id - 1];
+  uint16_t FnAttrID = PackedID >> ({});
+  return getIntrinsicFnAttributeSet(C, FnAttrID);
+}
 #endif // GET_INTRINSIC_ATTRIBUTES
 
 )",
-                MaxNumAttrs, NoFunctionAttrsID);
+                UniqAttributesBitSize,
+                maskTrailingOnes<uint16_t>(UniqAttributesBitSize), MaxNumAttrs,
+                NoFunctionAttrsID, UniqAttributesBitSize);
 }
 
 void IntrinsicEmitter::EmitIntrinsicToBuiltinMap(
diff --git a/llvm/utils/TableGen/DecoderEmitter.cpp b/llvm/utils/TableGen/DecoderEmitter.cpp
index 8747d02..a8a9036 100644
--- a/llvm/utils/TableGen/DecoderEmitter.cpp
+++ b/llvm/utils/TableGen/DecoderEmitter.cpp
@@ -33,6 +33,7 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
 #include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/KnownBits.h"
@@ -169,8 +170,6 @@ struct OperandInfo {
     Fields.emplace_back(Base, Width, Offset);
   }
 
-  unsigned numFields() const { return Fields.size(); }
-
   ArrayRef<EncodingField> fields() const { return Fields; }
 };
 
@@ -1104,31 +1103,29 @@ void DecoderTableBuilder::emitBinaryParser(raw_ostream &OS, indent Indent,
     return;
   }
 
-  if (OpInfo.Fields.empty() && OpInfo.InitValue && IgnoreFullyDefinedOperands)
-    return;
-
-  // We need to construct the encoding of the operand from pieces if it is not
-  // encoded sequentially or has a non-zero constant part in the encoding.
-  bool UseInsertBits = OpInfo.numFields() > 1 || OpInfo.InitValue.value_or(0);
-
-  if (UseInsertBits) {
-    OS << Indent << "tmp = 0x";
-    OS.write_hex(OpInfo.InitValue.value_or(0));
-    OS << ";\n";
-  }
-
-  for (const auto &[Base, Width, Offset] : OpInfo.fields()) {
-    OS << Indent;
-    if (UseInsertBits)
-      OS << "insertBits(tmp, ";
-    else
-      OS << "tmp = ";
-    OS << "fieldFromInstruction(insn, " << Base << ", " << Width << ')';
-    if (UseInsertBits)
-      OS << ", " << Offset << ", " << Width << ')';
-    else if (Offset != 0)
+  if (OpInfo.fields().empty()) {
+    // Only a constant part. The old behavior is to not decode this operand.
+    if (IgnoreFullyDefinedOperands)
+      return;
+    // Initialize `tmp` with the constant part.
+    OS << Indent << "tmp = " << format_hex(*OpInfo.InitValue, 0) << ";\n";
+  } else if (OpInfo.fields().size() == 1 && !OpInfo.InitValue.value_or(0)) {
+    // One variable part and no/zero constant part. Initialize `tmp` with the
+    // variable part.
+    auto [Base, Width, Offset] = OpInfo.fields().front();
+    OS << Indent << "tmp = fieldFromInstruction(insn, " << Base << ", " << Width
+       << ')';
+    if (Offset)
       OS << " << " << Offset;
     OS << ";\n";
+  } else {
+    // General case. Initialize `tmp` with the constant part, if any, and
+    // insert the variable parts into it.
+    OS << Indent << "tmp = " << format_hex(OpInfo.InitValue.value_or(0), 0)
+       << ";\n";
+    for (auto [Base, Width, Offset] : OpInfo.fields())
+      OS << Indent << "insertBits(tmp, fieldFromInstruction(insn, " << Base
+         << ", " << Width << "), " << Offset << ", " << Width << ");\n";
   }
 
   StringRef Decoder = OpInfo.Decoder;
diff --git a/llvm/utils/gn/secondary/bolt/unittests/Core/BUILD.gn b/llvm/utils/gn/secondary/bolt/unittests/Core/BUILD.gn
index 14e6671..b0c2ca3 100644
--- a/llvm/utils/gn/secondary/bolt/unittests/Core/BUILD.gn
+++ b/llvm/utils/gn/secondary/bolt/unittests/Core/BUILD.gn
@@ -16,6 +16,7 @@ unittest("CoreTests") {
   ]
   sources = [
     "BinaryContext.cpp",
+    "ClusteredRows.cpp",
     "DynoStats.cpp",
     "MCPlusBuilder.cpp",
     "MemoryMaps.cpp",
diff --git a/llvm/utils/gn/secondary/clang/lib/Format/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/Format/BUILD.gn
index 93c0f3c..57e9300 100644
--- a/llvm/utils/gn/secondary/clang/lib/Format/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang/lib/Format/BUILD.gn
@@ -21,6 +21,7 @@ static_library("Format") {
     "MacroExpander.cpp",
     "MatchFilePath.cpp",
     "NamespaceEndCommentsFixer.cpp",
+    "NumericLiteralCaseFixer.cpp",
     "NumericLiteralInfo.cpp",
     "ObjCPropertyAttributeOrderFixer.cpp",
     "QualifierAlignmentFixer.cpp",
diff --git a/llvm/utils/gn/secondary/clang/unittests/Format/BUILD.gn b/llvm/utils/gn/secondary/clang/unittests/Format/BUILD.gn
index 88521a8..c501f12 100644
--- a/llvm/utils/gn/secondary/clang/unittests/Format/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang/unittests/Format/BUILD.gn
@@ -38,6 +38,7 @@ unittest("FormatTests") {
     "MacroExpanderTest.cpp",
     "MatchFilePathTest.cpp",
     "NamespaceEndCommentsFixerTest.cpp",
+    "NumericLiteralCaseTest.cpp",
     "NumericLiteralInfoTest.cpp",
     "ObjCPropertyAttributeOrderFixerTest.cpp",
     "QualifierFixerTest.cpp",
diff --git a/llvm/utils/gn/secondary/llvm/lib/TargetParser/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/TargetParser/BUILD.gn
index 183fa57..73ed834 100644
--- a/llvm/utils/gn/secondary/llvm/lib/TargetParser/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/TargetParser/BUILD.gn
@@ -17,6 +17,7 @@ static_library("TargetParser") {
     "RISCVISAInfo.cpp",
     "RISCVTargetParser.cpp",
     "SubtargetFeature.cpp",
+    "TargetDataLayout.cpp",
     "TargetParser.cpp",
     "Triple.cpp",
     "X86TargetParser.cpp",
diff --git a/llvm/utils/lit/CMakeLists.txt b/llvm/utils/lit/CMakeLists.txt
index d22a778..97b1d7c 100644
--- a/llvm/utils/lit/CMakeLists.txt
+++ b/llvm/utils/lit/CMakeLists.txt
@@ -22,7 +22,7 @@ add_custom_target(prepare-check-lit
 # Add rules for lit's own test suite
 add_lit_testsuite(check-lit "Running lit's tests"
   ${CMAKE_CURRENT_BINARY_DIR}
-  DEPENDS "FileCheck" "not" "prepare-check-lit"
+  DEPENDS "FileCheck" "not" "split-file" "prepare-check-lit"
 )
 
 # For IDEs
diff --git a/llvm/utils/lit/lit/DiffUpdater.py b/llvm/utils/lit/lit/DiffUpdater.py
index fefcdcc..a29c46f 100644
--- a/llvm/utils/lit/lit/DiffUpdater.py
+++ b/llvm/utils/lit/lit/DiffUpdater.py
@@ -62,17 +62,19 @@ class SplitFileTarget:
 
     @staticmethod
     def get_target_dir(commands, test_path):
+        # posix=True breaks Windows paths because \ is treated as an escaping character
         for cmd in commands:
-            split = shlex.split(cmd)
+            split = shlex.split(cmd, posix=False)
             if "split-file" not in split:
                 continue
             start_idx = split.index("split-file")
             split = split[start_idx:]
             if len(split) < 3:
                 continue
-            if split[1].strip() != test_path:
+            p = unquote(split[1].strip())
+            if not test_path.samefile(p):
                 continue
-            return split[2].strip()
+            return unquote(split[2].strip())
         return None
 
     @staticmethod
@@ -104,6 +106,12 @@ class SplitFileTarget:
         return l.rstrip()
 
 
+def unquote(s):
+    if len(s) > 1 and s[0] == s[-1] and (s[0] == '"' or s[0] == "'"):
+        return s[1:-1]
+    return s
+
+
 def get_source_and_target(a, b, test_path, commands):
     """
     Try to figure out which file is the test output and which is the reference.
@@ -145,7 +153,7 @@ def diff_test_updater(result, test, commands):
     [cmd, a, b] = args
     if cmd != "diff":
         return None
-    res = get_source_and_target(a, b, test.getFilePath(), commands)
+    res = get_source_and_target(a, b, pathlib.Path(test.getFilePath()), commands)
     if not res:
         return f"update-diff-test: could not deduce source and target from {a} and {b}"
     source, target = res
diff --git a/llvm/utils/lit/lit/TestRunner.py b/llvm/utils/lit/lit/TestRunner.py
index a769919..90c2c64 100644
--- a/llvm/utils/lit/lit/TestRunner.py
+++ b/llvm/utils/lit/lit/TestRunner.py
@@ -92,11 +92,12 @@ class ShellEnvironment(object):
     we maintain a dir stack for pushd/popd.
     """
 
-    def __init__(self, cwd, env, umask=-1):
+    def __init__(self, cwd, env, umask=-1, ulimit={}):
         self.cwd = cwd
         self.env = dict(env)
         self.umask = umask
         self.dirStack = []
+        self.ulimit = ulimit
 
     def change_dir(self, newdir):
         if os.path.isabs(newdir):
@@ -595,6 +596,27 @@ def executeBuiltinUmask(cmd, shenv):
     return ShellCommandResult(cmd, "", "", 0, False)
 
 
+def executeBuiltinUlimit(cmd, shenv):
+    """executeBuiltinUlimit - Change the current limits."""
+    if os.name != "posix":
+        raise InternalShellError(cmd, "'ulimit' not supported on this system")
+    if len(cmd.args) != 3:
+        raise InternalShellError(cmd, "'ulimit' requires two arguments")
+    try:
+        new_limit = int(cmd.args[2])
+    except ValueError as err:
+        raise InternalShellError(cmd, "Error: 'ulimit': %s" % str(err))
+    if cmd.args[1] == "-v":
+        shenv.ulimit["RLIMIT_AS"] = new_limit * 1024
+    elif cmd.args[1] == "-n":
+        shenv.ulimit["RLIMIT_NOFILE"] = new_limit
+    else:
+        raise InternalShellError(
+            cmd, "'ulimit' does not support option: %s" % cmd.args[1]
+        )
+    return ShellCommandResult(cmd, "", "", 0, False)
+
+
 def executeBuiltinColon(cmd, cmd_shenv):
     """executeBuiltinColon - Discard arguments and exit with status 0."""
     return ShellCommandResult(cmd, "", "", 0, False)
@@ -749,6 +771,7 @@ def _executeShCmd(cmd, shenv, results, timeoutHelper):
         "popd": executeBuiltinPopd,
         "pushd": executeBuiltinPushd,
         "rm": executeBuiltinRm,
+        "ulimit": executeBuiltinUlimit,
         "umask": executeBuiltinUmask,
         ":": executeBuiltinColon,
     }
@@ -914,6 +937,19 @@ def _executeShCmd(cmd, shenv, results, timeoutHelper):
         if kIsWindows:
             args = quote_windows_command(args)
 
+        # Handle any resource limits. We do this by launching the command with
+        # a wrapper that sets the necessary limits. We use a wrapper rather than
+        # setting the limits in process as we cannot reraise the limits back to
+        # their defaults without elevated permissions.
+        if cmd_shenv.ulimit:
+            executable = sys.executable
+            args.insert(0, sys.executable)
+            args.insert(1, os.path.join(builtin_commands_dir, "_launch_with_limit.py"))
+            for limit in cmd_shenv.ulimit:
+                cmd_shenv.env["LIT_INTERNAL_ULIMIT_" + limit] = str(
+                    cmd_shenv.ulimit[limit]
+                )
+
         try:
             # TODO(boomanaiden154): We currently wrap the subprocess.Popen with
             # os.umask as the umask argument in subprocess.Popen is not
diff --git a/llvm/utils/lit/lit/builtin_commands/_launch_with_limit.py b/llvm/utils/lit/lit/builtin_commands/_launch_with_limit.py
new file mode 100644
index 0000000..33d2d59
--- /dev/null
+++ b/llvm/utils/lit/lit/builtin_commands/_launch_with_limit.py
@@ -0,0 +1,25 @@
+import sys
+import subprocess
+import resource
+import os
+
+ULIMIT_ENV_VAR_PREFIX = "LIT_INTERNAL_ULIMIT_"
+
+
+def main(argv):
+    command_args = argv[1:]
+    for env_var in os.environ:
+        if env_var.startswith(ULIMIT_ENV_VAR_PREFIX):
+            limit_str = env_var[len(ULIMIT_ENV_VAR_PREFIX) :]
+            limit_value = int(os.environ[env_var])
+            limit = (limit_value, limit_value)
+            if limit_str == "RLIMIT_AS":
+                resource.setrlimit(resource.RLIMIT_AS, limit)
+            elif limit_str == "RLIMIT_NOFILE":
+                resource.setrlimit(resource.RLIMIT_NOFILE, limit)
+    process_output = subprocess.run(command_args)
+    sys.exit(process_output.returncode)
+
+
+if __name__ == "__main__":
+    main(sys.argv)
diff --git a/llvm/utils/lit/tests/Inputs/shtest-ulimit/lit.cfg b/llvm/utils/lit/tests/Inputs/shtest-ulimit/lit.cfg
new file mode 100644
index 0000000..c7bdc7e
--- /dev/null
+++ b/llvm/utils/lit/tests/Inputs/shtest-ulimit/lit.cfg
@@ -0,0 +1,8 @@
+import lit.formats
+
+config.name = "shtest-ulimit"
+config.suffixes = [".txt"]
+config.test_format = lit.formats.ShTest(execute_external=False)
+config.test_source_root = None
+config.test_exec_root = None
+config.substitutions.append(("%{python}", '"%s"' % (sys.executable)))
diff --git a/llvm/utils/lit/tests/Inputs/shtest-ulimit/print_limits.py b/llvm/utils/lit/tests/Inputs/shtest-ulimit/print_limits.py
new file mode 100644
index 0000000..632f954
--- /dev/null
+++ b/llvm/utils/lit/tests/Inputs/shtest-ulimit/print_limits.py
@@ -0,0 +1,4 @@
+import resource
+
+print("RLIMIT_AS=" + str(resource.getrlimit(resource.RLIMIT_AS)[0]))
+print("RLIMIT_NOFILE=" + str(resource.getrlimit(resource.RLIMIT_NOFILE)[0]))
diff --git a/llvm/utils/lit/tests/Inputs/shtest-ulimit/ulimit-bad-arg.txt b/llvm/utils/lit/tests/Inputs/shtest-ulimit/ulimit-bad-arg.txt
new file mode 100644
index 0000000..efa2288
--- /dev/null
+++ b/llvm/utils/lit/tests/Inputs/shtest-ulimit/ulimit-bad-arg.txt
@@ -0,0 +1 @@
+# RUN: ulimit -n
diff --git a/llvm/utils/lit/tests/Inputs/shtest-ulimit/ulimit_okay.txt b/llvm/utils/lit/tests/Inputs/shtest-ulimit/ulimit_okay.txt
new file mode 100644
index 0000000..ad353b5
--- /dev/null
+++ b/llvm/utils/lit/tests/Inputs/shtest-ulimit/ulimit_okay.txt
@@ -0,0 +1,5 @@
+# RUN: ulimit -v 1048576
+# RUN: ulimit -n 50
+# RUN: %{python} %S/print_limits.py
+# Fail the test so that we can assert on the output.
+# RUN: not echo return
diff --git a/llvm/utils/lit/tests/diff-test-update.py b/llvm/utils/lit/tests/diff-test-update.py
index ad14034..8b9f461 100644
--- a/llvm/utils/lit/tests/diff-test-update.py
+++ b/llvm/utils/lit/tests/diff-test-update.py
@@ -8,13 +8,13 @@
 
 # RUN: not %{lit} --update-tests -v %S/Inputs/diff-test-update | FileCheck %s
 
-# RUN: diff %S/Inputs/diff-test-update/single-split-file.out %S/Inputs/diff-test-update/single-split-file.test
-# RUN: diff %S/Inputs/diff-test-update/single-split-file.out %S/Inputs/diff-test-update/single-split-file-populated.test
-# RUN: diff %S/Inputs/diff-test-update/multiple-split-file.out %S/Inputs/diff-test-update/multiple-split-file.test
-# RUN: diff %S/Inputs/diff-test-update/multiple-split-file.out %S/Inputs/diff-test-update/multiple-split-file-populated.test
-# RUN: diff %S/Inputs/diff-test-update/single-split-file-no-expected.out %S/Inputs/diff-test-update/single-split-file-no-expected.test
-# RUN: diff %S/Inputs/diff-test-update/split-c-comments.out %S/Inputs/diff-test-update/split-c-comments.test
-# RUN: diff %S/Inputs/diff-test-update/split-whitespace.out "%S/Inputs/diff-test-update/split whitespace.test"
+# RUN: diff --strip-trailing-cr %S/Inputs/diff-test-update/single-split-file.out %S/Inputs/diff-test-update/single-split-file.test
+# RUN: diff --strip-trailing-cr %S/Inputs/diff-test-update/single-split-file.out %S/Inputs/diff-test-update/single-split-file-populated.test
+# RUN: diff --strip-trailing-cr %S/Inputs/diff-test-update/multiple-split-file.out %S/Inputs/diff-test-update/multiple-split-file.test
+# RUN: diff --strip-trailing-cr %S/Inputs/diff-test-update/multiple-split-file.out %S/Inputs/diff-test-update/multiple-split-file-populated.test
+# RUN: diff --strip-trailing-cr %S/Inputs/diff-test-update/single-split-file-no-expected.out %S/Inputs/diff-test-update/single-split-file-no-expected.test
+# RUN: diff --strip-trailing-cr %S/Inputs/diff-test-update/split-c-comments.out %S/Inputs/diff-test-update/split-c-comments.test
+# RUN: diff --strip-trailing-cr %S/Inputs/diff-test-update/split-whitespace.out "%S/Inputs/diff-test-update/split whitespace.test"
 
 
 # CHECK: # update-diff-test: could not deduce source and target from {{.*}}1.in and {{.*}}2.in
diff --git a/llvm/utils/lit/tests/shtest-ulimit.py b/llvm/utils/lit/tests/shtest-ulimit.py
new file mode 100644
index 0000000..b86578a
--- /dev/null
+++ b/llvm/utils/lit/tests/shtest-ulimit.py
@@ -0,0 +1,24 @@
+# Check the ulimit command
+
+# ulimit does not work on non-POSIX platforms.
+# UNSUPPORTED: system-windows
+
+# TODO(boomanaiden154): The test fails on some non-Linux POSIX
+# platforms (like MacOS) due to the underlying system not supporting
+# ulimit -v. This test needs to be carved up so we keep full test
+# coverage on Linux and as much as possible on other platforms.
+# REQUIRES: system-linux
+
+# RUN: not %{lit} -a -v %{inputs}/shtest-ulimit | FileCheck %s
+
+# CHECK: -- Testing: 2 tests{{.*}}
+
+# CHECK-LABEL: FAIL: shtest-ulimit :: ulimit-bad-arg.txt ({{[^)]*}})
+# CHECK: ulimit -n
+# CHECK: 'ulimit' requires two arguments
+
+# CHECK-LABEL: FAIL: shtest-ulimit :: ulimit_okay.txt ({{[^)]*}})
+# CHECK: ulimit -v 1048576
+# CHECK: ulimit -n 50
+# CHECK: RLIMIT_AS=1073741824
+# CHECK: RLIMIT_NOFILE=50
diff --git a/llvm/utils/profcheck-xfail.txt b/llvm/utils/profcheck-xfail.txt
index e1ee7c3..9d170b3 100644
--- a/llvm/utils/profcheck-xfail.txt
+++ b/llvm/utils/profcheck-xfail.txt
@@ -830,7 +830,6 @@ Transforms/IndVarSimplify/invalidate-modified-lcssa-phi.ll
 Transforms/IndVarSimplify/pr45835.ll
 Transforms/IndVarSimplify/preserving-debugloc-rem-div.ll
 Transforms/Inline/optimization-remarks-hotness-threshold.ll
-Transforms/InstCombine/2004-09-20-BadLoadCombine2.ll
 Transforms/InstCombine/2004-09-20-BadLoadCombine.ll
 Transforms/InstCombine/2005-04-07-UDivSelectCrash.ll
 Transforms/InstCombine/2011-02-14-InfLoop.ll
diff --git a/mlir/Maintainers.md b/mlir/Maintainers.md
index 02e93eb..5d3b576c 100644
--- a/mlir/Maintainers.md
+++ b/mlir/Maintainers.md
@@ -46,7 +46,7 @@ dialects, build system and language bindings.
 * ‘ptr’ Dialect ([fabianmcg](https://github.com/fabianmcg))
 
 #### Basic Compute Dialects
-* ‘arith’ Dialect (core)
+* ‘arith’ Dialect (core + [kuhar](https://github.com/kuhar))
 * ‘math’ Dialect (core)
 * Rewrite System Dialects (core)
 * Transform Dialect ([martin-luecke](https://github.com/martin-luecke), [ftynse](https://github.com/ftynse), [rolfmorel](https://github.com/rolfmorel))
diff --git a/mlir/docs/Tutorials/transform/Ch0.md b/mlir/docs/Tutorials/transform/Ch0.md
index dc4b753..0d7a7036 100644
--- a/mlir/docs/Tutorials/transform/Ch0.md
+++ b/mlir/docs/Tutorials/transform/Ch0.md
@@ -134,7 +134,7 @@ Furthermore, the operation now contains a region that explicitly specifies the m
 
 ## “Loop” Fusion
 
-Since the region of the `linalg.generic` operation can contain arbitrarily many operations, we can use it to express “fusion” of the implicit loops by simply having more operations chained in the region. For example, the common machine learning rectified linear unit layer (ReLU), which can be defined as `relu(x) = max(0, x)`, can be defined be expressed using the “compare-and-select” idiom in one `linalg.generic` operation, without the temporary buffer for the comparison result and without repeating the outer operation:
+Since the region of the `linalg.generic` operation can contain arbitrarily many operations, we can use it to express “fusion” of the implicit loops by simply having more operations chained in the region. For example, the common machine learning rectified linear unit layer (ReLU), which can be defined as `relu(x) = max(0, x)`, can be expressed using the “compare-and-select” idiom in one `linalg.generic` operation, without the temporary buffer for the comparison result and without repeating the outer operation:
 
 ```mlir
 linalg.generic {
diff --git a/mlir/examples/standalone/CMakeLists.txt b/mlir/examples/standalone/CMakeLists.txt
index 88dfa3e..03627c0 100644
--- a/mlir/examples/standalone/CMakeLists.txt
+++ b/mlir/examples/standalone/CMakeLists.txt
@@ -60,5 +60,7 @@ if(MLIR_ENABLE_BINDINGS_PYTHON)
 endif()
 add_subdirectory(test)
 add_subdirectory(standalone-opt)
-add_subdirectory(standalone-plugin)
+if(NOT WIN32)
+  add_subdirectory(standalone-plugin)
+endif()
 add_subdirectory(standalone-translate)
diff --git a/mlir/examples/standalone/python/CMakeLists.txt b/mlir/examples/standalone/python/CMakeLists.txt
index a0eca9c..1ab27ce 100644
--- a/mlir/examples/standalone/python/CMakeLists.txt
+++ b/mlir/examples/standalone/python/CMakeLists.txt
@@ -26,6 +26,8 @@ declare_mlir_python_extension(StandalonePythonSources.Pybind11Extension
   ADD_TO_PARENT StandalonePythonSources
   SOURCES
     StandaloneExtensionPybind11.cpp
+  PRIVATE_LINK_LIBS
+    LLVMSupport
   EMBED_CAPI_LINK_LIBS
     StandaloneCAPI
   PYTHON_BINDINGS_LIBRARY pybind11
@@ -36,6 +38,8 @@ declare_mlir_python_extension(StandalonePythonSources.NanobindExtension
   ADD_TO_PARENT StandalonePythonSources
   SOURCES
     StandaloneExtensionNanobind.cpp
+  PRIVATE_LINK_LIBS
+    LLVMSupport
   EMBED_CAPI_LINK_LIBS
     StandaloneCAPI
   PYTHON_BINDINGS_LIBRARY nanobind
diff --git a/mlir/examples/standalone/test/CMakeLists.txt b/mlir/examples/standalone/test/CMakeLists.txt
index fdde159..8864563 100644
--- a/mlir/examples/standalone/test/CMakeLists.txt
+++ b/mlir/examples/standalone/test/CMakeLists.txt
@@ -14,8 +14,10 @@ set(STANDALONE_TEST_DEPENDS
         standalone-capi-test
         standalone-opt
         standalone-translate
-        StandalonePlugin
         )
+if(NOT WIN32)
+  list(APPEND STANDALONE_TEST_DEPENDS StandalonePlugin)
+endif()
 if(MLIR_ENABLE_BINDINGS_PYTHON)
   list(APPEND STANDALONE_TEST_DEPENDS StandalonePythonModules)
 endif()
diff --git a/mlir/examples/standalone/test/Standalone/standalone-pass-plugin.mlir b/mlir/examples/standalone/test/Standalone/standalone-pass-plugin.mlir
index 1d652dc..3020097 100644
--- a/mlir/examples/standalone/test/Standalone/standalone-pass-plugin.mlir
+++ b/mlir/examples/standalone/test/Standalone/standalone-pass-plugin.mlir
@@ -1,3 +1,4 @@
+// UNSUPPORTED: system-windows
 // RUN: mlir-opt %s --load-pass-plugin=%standalone_libs/StandalonePlugin%shlibext --pass-pipeline="builtin.module(standalone-switch-bar-foo)" | FileCheck %s
 
 module {
diff --git a/mlir/examples/standalone/test/Standalone/standalone-plugin.mlir b/mlir/examples/standalone/test/Standalone/standalone-plugin.mlir
index 468932b..900b524 100644
--- a/mlir/examples/standalone/test/Standalone/standalone-plugin.mlir
+++ b/mlir/examples/standalone/test/Standalone/standalone-plugin.mlir
@@ -1,3 +1,4 @@
+// UNSUPPORTED: system-windows
 // RUN: mlir-opt %s --load-dialect-plugin=%standalone_libs/StandalonePlugin%shlibext --pass-pipeline="builtin.module(standalone-switch-bar-foo)" | FileCheck %s
 
 module {
diff --git a/mlir/include/mlir/Dialect/ArmSME/IR/ArmSMEIntrinsicOps.td b/mlir/include/mlir/Dialect/ArmSME/IR/ArmSMEIntrinsicOps.td
index 06fb851..4d19fa5 100644
--- a/mlir/include/mlir/Dialect/ArmSME/IR/ArmSMEIntrinsicOps.td
+++ b/mlir/include/mlir/Dialect/ArmSME/IR/ArmSMEIntrinsicOps.td
@@ -201,9 +201,6 @@ class ArmSME_IntrCountOp<string mnemonic>
                     /*traits*/[PredOpTrait<"`res` is i64", TypeIsPred<"res", I64>>],
                     /*numResults=*/1, /*overloadedResults=*/[]>;
 
-def LLVM_aarch64_sme_cntsb : ArmSME_IntrCountOp<"cntsb">;
-def LLVM_aarch64_sme_cntsh : ArmSME_IntrCountOp<"cntsh">;
-def LLVM_aarch64_sme_cntsw : ArmSME_IntrCountOp<"cntsw">;
 def LLVM_aarch64_sme_cntsd : ArmSME_IntrCountOp<"cntsd">;
 
 #endif // ARMSME_INTRINSIC_OPS
diff --git a/mlir/include/mlir/Dialect/ArmSME/Utils/Utils.h b/mlir/include/mlir/Dialect/ArmSME/Utils/Utils.h
index 1f40eb6..b57b27d 100644
--- a/mlir/include/mlir/Dialect/ArmSME/Utils/Utils.h
+++ b/mlir/include/mlir/Dialect/ArmSME/Utils/Utils.h
@@ -32,6 +32,9 @@ namespace mlir::arm_sme {
 
 constexpr unsigned MinStreamingVectorLengthInBits = 128;
 
+/// Return the size represented by arm_sme::TypeSize in bytes.
+unsigned getSizeInBytes(TypeSize type);
+
 /// Return minimum number of elements for the given element `type` in
 /// a vector of SVL bits.
 unsigned getSMETileSliceMinNumElts(Type type);
diff --git a/mlir/include/mlir/Dialect/SCF/IR/SCFOps.td b/mlir/include/mlir/Dialect/SCF/IR/SCFOps.td
index 88df541..d3c01c3 100644
--- a/mlir/include/mlir/Dialect/SCF/IR/SCFOps.td
+++ b/mlir/include/mlir/Dialect/SCF/IR/SCFOps.td
@@ -654,7 +654,7 @@ def ForallOp : SCF_Op<"forall", [
 def InParallelOp : SCF_Op<"forall.in_parallel", [
        Pure,
        Terminator,
-       DeclareOpInterfaceMethods<ParallelCombiningOpInterface>,
+       DeclareOpInterfaceMethods<InParallelOpInterface>,
        HasParent<"ForallOp">,
       ] # GraphRegionNoTerminator.traits> {
   let summary = "terminates a `forall` block";
@@ -679,8 +679,6 @@ def InParallelOp : SCF_Op<"forall.in_parallel", [
     OpBuilder<(ins)>,
   ];
 
-  // TODO: Add a `InParallelOpInterface` interface for ops that can
-  // appear inside in_parallel.
   let extraClassDeclaration = [{
     ::llvm::SmallVector<::mlir::BlockArgument> getDests();
     ::llvm::iterator_range<::mlir::Block::iterator> getYieldingOps();
diff --git a/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td b/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td
index 7d396e5..2453cf5 100644
--- a/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td
+++ b/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td
@@ -1470,24 +1470,25 @@ def Tensor_PadOp : Tensor_Op<"pad", [
 // ParallelInsertSliceOp
 //===----------------------------------------------------------------------===//
 
-// TODO: Implement InParallelOpInterface.
 def Tensor_ParallelInsertSliceOp : Tensor_Op<"parallel_insert_slice", [
        AttrSizedOperandSegments,
        OffsetSizeAndStrideOpInterface,
+       DeclareOpInterfaceMethods<ParallelCombiningOpInterface,
+          ["getUpdatedDestinations", "getIteratingParent"]>,
        // TODO: Cannot use an interface here atm, verify this manually for now.
-       // HasParent<"ParallelCombiningOpInterface">
+       // HasParent<"InParallelOpInterface">
   ]> {
   let summary = [{
     Specify the tensor slice update of a single thread of a parent
-    ParallelCombiningOpInterface op.
+    InParallelOpInterface op.
   }];
   let description = [{
     The `parallel_insert_slice` yields a subset tensor value to its parent
-    ParallelCombiningOpInterface. These subset tensor values are aggregated to
+    InParallelOpInterface. These subset tensor values are aggregated to
     in some unspecified order into a full tensor value returned by the parent
     parallel iterating op.
     The `parallel_insert_slice` is one such op allowed in the
-    ParallelCombiningOpInterface op.
+    InParallelOpInterface op.
 
     Conflicting writes result in undefined semantics, in that the indices written
     to by multiple parallel updates might contain data from any of the updates,
@@ -1569,8 +1570,8 @@ def Tensor_ParallelInsertSliceOp : Tensor_Op<"parallel_insert_slice", [
       return ::llvm::cast<RankedTensorType>(getDest().getType());
     }
 
-    ParallelCombiningOpInterface getParallelCombiningParent() {
-      return dyn_cast<ParallelCombiningOpInterface>(
+    InParallelOpInterface getParallelCombiningParent() {
+      return dyn_cast<InParallelOpInterface>(
         getOperation()->getParentOp());
     }
 
diff --git a/mlir/include/mlir/Dialect/Vector/TransformOps/VectorTransformOps.td b/mlir/include/mlir/Dialect/Vector/TransformOps/VectorTransformOps.td
index 72a69a0..03d2550 100644
--- a/mlir/include/mlir/Dialect/Vector/TransformOps/VectorTransformOps.td
+++ b/mlir/include/mlir/Dialect/Vector/TransformOps/VectorTransformOps.td
@@ -85,6 +85,20 @@ def ApplyDropUnitDimWithShapeCastPatternsOp : Op<Transform_Dialect,
   let assemblyFormat = "attr-dict";
 }
 
+def ApplyDropInnerMostUnitDimsFromXferOpsPatternsOp : Op<Transform_Dialect,
+    "apply_patterns.vector.drop_inner_most_unit_dims_from_xfer_ops",
+    [DeclareOpInterfaceMethods<PatternDescriptorOpInterface>]> {
+  let description = [{
+    Apply vector patterns to drop the inner most unit dims from
+    vector.transfer_read and vector.transfer_write Ops by taking a subview (via
+    memref.subview) of the original source/destination MemRef. Since it
+    requires the input/ouptu to be MemRefs, this Op is only helpful
+    past-bufferization.
+  }];
+
+  let assemblyFormat = "attr-dict";
+}
+
 def ApplyTransferPermutationPatternsOp : Op<Transform_Dialect,
     "apply_patterns.vector.transfer_permutation_patterns",
     [DeclareOpInterfaceMethods<PatternDescriptorOpInterface>]> {
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
index cfe3e80..1f1d367 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
@@ -194,26 +194,29 @@ def DistributeLayoutAttr: AttrInterface<"DistributeLayoutAttr"> {
     InterfaceMethod<"Get the num of effective subgroups",
                     "int64_t",
                     "getNumSubgroups", (ins), [{
-                        std::optional<SmallVector<int64_t>> sgLayout = llvm::cast<ConcreteAttr>(tablegen_opaque_val).getSgLayoutAsInt();
+                        std::optional<SmallVector<int64_t>> sgLayout = llvm::cast<ConcreteAttr>(tablegen_opaque_val).getEffectiveSgLayoutAsInt();
                         if (sgLayout.has_value())
                           return computeProduct(*sgLayout);
                         return 0;
                     }], [{}]>,
-    InterfaceMethod<"Get the SgLayout field of the attribute as integer array",
+    InterfaceMethod<"Get the order of the layout attribute",
+                    "DenseI32ArrayAttr",
+                    "getOrder">,
+    InterfaceMethod<"Get the effective SgLayout of the layout attribute as integer array",
                     "SmallVector<int64_t>",
-                    "getSgLayoutAsInt">,
-    InterfaceMethod<"Get the SgData field of the attribute as integer array",
+                    "getEffectiveSgLayoutAsInt">,
+    InterfaceMethod<"Get the effective SgData of the layout attribute as integer array",
                     "SmallVector<int64_t>",
-                    "getSgDataAsInt">,
-    InterfaceMethod<"Get the InstData field of the attribute as integer array",
+                    "getEffectiveSgDataAsInt">,
+    InterfaceMethod<"Get the effective InstData of the layout attribute as integer array",
                     "SmallVector<int64_t>",
-                    "getInstDataAsInt">,
-    InterfaceMethod<"Get the LaneLayout field of the attribute as integer array",
+                    "getEffectiveInstDataAsInt">,
+    InterfaceMethod<"Get the effective LaneLayout of the layout attribute as integer array",
                     "SmallVector<int64_t>",
-                    "getLaneLayoutAsInt">,
-    InterfaceMethod<"Get the LaneData field of the attribute as integer array",
+                    "getEffectiveLaneLayoutAsInt">,
+    InterfaceMethod<"Get the effective LaneData of the layout attribute as integer array",
                     "SmallVector<int64_t>",
-                    "getLaneDataAsInt">,
+                    "getEffectiveLaneDataAsInt">,
     InterfaceMethod<"Derive a new layout by dropping sgLayout and sgData",
                     "xegpu::DistributeLayoutAttr",
                     "dropSgLayoutAndData">,
@@ -231,7 +234,11 @@ def DistributeLayoutAttr: AttrInterface<"DistributeLayoutAttr"> {
                       multiple blocks according to round-robin distribution rules.}],
                     "FailureOr<SmallVector<SmallVector<Value>>>",
                     "getOffsets",
-                    (ins "OpBuilder &": $builder, "Location":$loc, "Value":$linearId, "ArrayRef<int64_t>":$shape)>
+                    (ins "OpBuilder &": $builder, "Location":$loc, "Value":$linearId, "ArrayRef<int64_t>":$shape)>,
+    InterfaceMethod</*desc=*/[{Check if this layout is a slice of some other layout.}],
+                    /*retTy=*/"bool",
+                    /*methodName=*/"isSliceOf",
+                    /*args=*/(ins "const xegpu::DistributeLayoutAttr&": $other)>
   ];
 }
 
@@ -391,31 +398,31 @@ def XeGPU_LayoutAttr : XeGPUAttr<"Layout", "layout", [DistributeLayoutAttr]> {
                              getLaneLayout(), getLaneData(), getOrder());
     }
 
-    SmallVector<int64_t> getSgLayoutAsInt() const {
+    SmallVector<int64_t> getEffectiveSgLayoutAsInt() const {
       if (DenseI32ArrayAttr layout = getSgLayout())
         return llvm::to_vector_of<int64_t>(layout.asArrayRef());
       return {};
     }
 
-    SmallVector<int64_t> getSgDataAsInt() const {
+    SmallVector<int64_t> getEffectiveSgDataAsInt() const {
       if (DenseI32ArrayAttr data = getSgData())
         return llvm::to_vector_of<int64_t>(data.asArrayRef());
       return {};
     }
 
-    SmallVector<int64_t> getInstDataAsInt() const {
+    SmallVector<int64_t> getEffectiveInstDataAsInt() const {
       if (DenseI32ArrayAttr inst = getInstData())
         return llvm::to_vector_of<int64_t>(inst.asArrayRef());
       return {};
     }
 
-    SmallVector<int64_t> getLaneLayoutAsInt() const {
+    SmallVector<int64_t> getEffectiveLaneLayoutAsInt() const {
       if (DenseI32ArrayAttr layout = getLaneLayout())
         return llvm::to_vector_of<int64_t>(layout.asArrayRef());
       return {};
     }
 
-    SmallVector<int64_t> getLaneDataAsInt() const {
+    SmallVector<int64_t> getEffectiveLaneDataAsInt() const {
       if (DenseI32ArrayAttr data = getLaneData())
         return llvm::to_vector_of<int64_t>(data.asArrayRef());
       return {};
@@ -433,6 +440,9 @@ def XeGPU_LayoutAttr : XeGPUAttr<"Layout", "layout", [DistributeLayoutAttr]> {
     FailureOr<SmallVector<SmallVector<Value>>>
     getOffsets(OpBuilder &builder, Location loc, Value linearId, ArrayRef<int64_t> shape);
 
+    /// Check if this is slice of some other layout.
+    bool isSliceOf(const xegpu::DistributeLayoutAttr &other) { return false; }
+
   }];
 
   let assemblyFormat = "`<` struct(params) `>`";
@@ -499,10 +509,10 @@ def XeGPU_SliceAttr : XeGPUAttr<"Slice", "slice", [DistributeLayoutAttr]> {
 
     /// Returns the SgLayout of the attribute, computed by applying
     /// the slice dimensions to the underlying LayoutAttr.
-    SmallVector<int64_t> getSgLayoutAsInt() const {
+    SmallVector<int64_t> getEffectiveSgLayoutAsInt() const {
       SliceAttr attr = flatten();
       auto parent = dyn_cast<LayoutAttr>(attr.getParent());
-      auto layout = parent.getSgLayoutAsInt();
+      auto layout = parent.getEffectiveSgLayoutAsInt();
       if (layout.size()) {
         ArrayRef<int64_t> dims = attr.getDims().asArrayRef();
         return XeGPUDialect::slice(ArrayRef<int64_t>(layout), dims);
@@ -512,10 +522,10 @@ def XeGPU_SliceAttr : XeGPUAttr<"Slice", "slice", [DistributeLayoutAttr]> {
 
     /// Returns the SgData of the attribute, computed by applying
     /// the slice dimensions to the underlying LayoutAttr.
-    SmallVector<int64_t> getSgDataAsInt() const {
+    SmallVector<int64_t> getEffectiveSgDataAsInt() const {
       SliceAttr attr = flatten();
       auto parent = dyn_cast<LayoutAttr>(attr.getParent());
-      auto data = parent.getSgDataAsInt();
+      auto data = parent.getEffectiveSgDataAsInt();
       if (data.size()) {
         ArrayRef<int64_t> dims = attr.getDims().asArrayRef();
         return XeGPUDialect::slice(ArrayRef<int64_t>(data), dims);
@@ -525,10 +535,10 @@ def XeGPU_SliceAttr : XeGPUAttr<"Slice", "slice", [DistributeLayoutAttr]> {
 
     /// Returns the InstData of the attribute, computed by applying
     /// the slice dimensions to the underlying LayoutAttr.
-    SmallVector<int64_t> getInstDataAsInt() const {
+    SmallVector<int64_t> getEffectiveInstDataAsInt() const {
       SliceAttr attr = flatten();
       auto parent = dyn_cast<LayoutAttr>(attr.getParent());
-      auto inst = parent.getInstDataAsInt();
+      auto inst = parent.getEffectiveInstDataAsInt();
       if (inst.size()) {
         ArrayRef<int64_t> dims = attr.getDims().asArrayRef();
         return XeGPUDialect::slice(llvm::ArrayRef<int64_t>(inst), dims);
@@ -538,10 +548,10 @@ def XeGPU_SliceAttr : XeGPUAttr<"Slice", "slice", [DistributeLayoutAttr]> {
 
     /// Returns the LaneLayout of the attribute, computed by applying
     /// the slice dimensions to the underlying LayoutAttr.
-    SmallVector<int64_t> getLaneLayoutAsInt() const {
+    SmallVector<int64_t> getEffectiveLaneLayoutAsInt() const {
       SliceAttr attr = flatten();
       auto parent = dyn_cast<LayoutAttr>(attr.getParent());
-      auto layout = parent.getLaneLayoutAsInt();
+      auto layout = parent.getEffectiveLaneLayoutAsInt();
       if (layout.size()) {
         ArrayRef<int64_t> dims = attr.getDims().asArrayRef();
         return XeGPUDialect::slice(llvm::ArrayRef<int64_t>(layout), dims);
@@ -551,10 +561,10 @@ def XeGPU_SliceAttr : XeGPUAttr<"Slice", "slice", [DistributeLayoutAttr]> {
 
     /// Returns the LaneData of the attribute, computed by applying
     /// the slice dimensions to the underlying LayoutAttr.
-    SmallVector<int64_t> getLaneDataAsInt() const {
+    SmallVector<int64_t> getEffectiveLaneDataAsInt() const {
       SliceAttr attr = flatten();
       auto parent = dyn_cast<LayoutAttr>(attr.getParent());
-      auto data = parent.getLaneDataAsInt();
+      auto data = parent.getEffectiveLaneDataAsInt();
       if (data.size()) {
         ArrayRef<int64_t> dims = attr.getDims().asArrayRef();
         return XeGPUDialect::slice(llvm::ArrayRef<int64_t>(data), dims);
@@ -594,6 +604,9 @@ def XeGPU_SliceAttr : XeGPUAttr<"Slice", "slice", [DistributeLayoutAttr]> {
     FailureOr<SmallVector<SmallVector<Value>>>
     getOffsets(OpBuilder &builder, Location loc, Value linearId, ArrayRef<int64_t> shape);
 
+    /// Check if this is slice of some other layout.
+    bool isSliceOf(const xegpu::DistributeLayoutAttr &other);
+
   }];
 
   let assemblyFormat = "`<` qualified($parent) `,` `dims` `=` $dims `>`";
diff --git a/mlir/include/mlir/Dialect/XeGPU/Transforms/Passes.td b/mlir/include/mlir/Dialect/XeGPU/Transforms/Passes.td
index ddf6b4a..59dca9f 100644
--- a/mlir/include/mlir/Dialect/XeGPU/Transforms/Passes.td
+++ b/mlir/include/mlir/Dialect/XeGPU/Transforms/Passes.td
@@ -27,6 +27,10 @@ def XeGPUSubgroupDistribute : Pass<"xegpu-subgroup-distribute"> {
   }];
   let dependentDialects = ["memref::MemRefDialect", "xegpu::XeGPUDialect",
                            "vector::VectorDialect"];
+  let options = [Option<
+    "enableSGReductions", "enable-sg-reductions", "bool",
+    /*default=*/"true",
+    "Enable subgroup reductions using subgroup shuffles.">];
 }
 
 def XeGPUPropagateLayout : Pass<"xegpu-propagate-layout"> {
diff --git a/mlir/include/mlir/IR/Remarks.h b/mlir/include/mlir/IR/Remarks.h
index 26d6547..20e84ec 100644
--- a/mlir/include/mlir/IR/Remarks.h
+++ b/mlir/include/mlir/IR/Remarks.h
@@ -29,7 +29,7 @@ namespace mlir::remark {
 /// Define an the set of categories to accept. By default none are, the provided
 /// regex matches against the category names for each kind of remark.
 struct RemarkCategories {
-  std::optional<std::string> passed, missed, analysis, failed;
+  std::optional<std::string> all, passed, missed, analysis, failed;
 };
 
 /// Categories describe the outcome of an transformation, not the mechanics of
diff --git a/mlir/include/mlir/Interfaces/ParallelCombiningOpInterface.h b/mlir/include/mlir/Interfaces/ParallelCombiningOpInterface.h
index 72db061..82ab427 100644
--- a/mlir/include/mlir/Interfaces/ParallelCombiningOpInterface.h
+++ b/mlir/include/mlir/Interfaces/ParallelCombiningOpInterface.h
@@ -19,7 +19,7 @@
 namespace mlir {
 namespace detail {
 // TODO: Single region single block interface on interfaces ?
-LogicalResult verifyParallelCombiningOpInterface(Operation *op);
+LogicalResult verifyInParallelOpInterface(Operation *op);
 } // namespace detail
 } // namespace mlir
 
diff --git a/mlir/include/mlir/Interfaces/ParallelCombiningOpInterface.td b/mlir/include/mlir/Interfaces/ParallelCombiningOpInterface.td
index 424b4cf..ace26f7 100644
--- a/mlir/include/mlir/Interfaces/ParallelCombiningOpInterface.td
+++ b/mlir/include/mlir/Interfaces/ParallelCombiningOpInterface.td
@@ -6,7 +6,8 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// Defines the interface for ops that perform parallel combining operations.
+// Defines the interface for ops that perform in parallel combining
+// operations.
 //
 //===----------------------------------------------------------------------===//
 
@@ -15,9 +16,9 @@
 
 include "mlir/IR/OpBase.td"
 
-def ParallelCombiningOpInterface : OpInterface<"ParallelCombiningOpInterface"> {
+def InParallelOpInterface : OpInterface<"InParallelOpInterface"> {
   let description = [{
-    A parallel combining op is an op with a region.
+    An in parallel op is an op with a region.
 
     This is useful as a terminator to parallel operations that iterate over
     some set and return tensors while avoiding tight coupling between the
@@ -52,8 +53,60 @@ def ParallelCombiningOpInterface : OpInterface<"ParallelCombiningOpInterface"> {
   ];
   // TODO: Single region single block interface on interfaces ?
   let verify = [{
-    return verifyParallelCombiningOpInterface($_op);
+    return verifyInParallelOpInterface($_op);
+  }];
+}
+
+def ParallelCombiningOpInterface : OpInterface<"ParallelCombiningOpInterface"> {
+  let description = [{
+    A parallel combining op is an operation that models parallel contributions
+    to result tensors within the context of a parent iterating operation.
+    
+    This interface is designed for operations that need to coordinate parallel
+    insertions or contributions to tensors that are being constructed across
+    multiple parallel iterations. The destination refers to a tensor value that
+    is assembled by aggregating results from parallel computations; each
+    parallel iteration may contribute a slice, element, or region to the final
+    result. No in-place mutation of tensors is implied.
+
+    One significant use case for this interface is `tensor.parallel_insert_slice`
+    which allows parallel insertion of slices that are aggregated into a
+    destination tensor. With this interface, other operations that express
+    similar parallel contributions can also be defined.
+
+    This op works within an op implementing the `InParallelOpInterface` that
+    specifies how the parallel results are combined.
+
+    Key semantics:
+    - The operation identifies destination tensors to which iterations
+      contribute through the `getUpdatedDestinations` method
+    - Each parallel iteration may produce elements or regions that are
+      incorporated into the destination tensor
+    - The parent iterating operation manages the coordination and ensures
+      proper synchronization of these contributions
+  
+    Note: This interface does not verify itself, it is up to the implementing operation
+    to verify the correctness of the op.
   }];
+  let cppNamespace = "::mlir";
+
+  let methods = [
+    InterfaceMethod<[{ 
+        Returns the list of destination values this op contributes to.
+      }],
+      /*retTy=*/"::mlir::MutableOperandRange",
+      /*methodName=*/"getUpdatedDestinations",
+      /*args=*/(ins)
+    >,
+    InterfaceMethod<
+      /*desc=*/[{
+        Returns the iterating parent for this op.
+      }],
+      /*retTy=*/"::mlir::Operation*",
+      /*methodName=*/"getIteratingParent",
+      /*args=*/(ins)
+    >,
+  ];
 }
 
 #endif // MLIR_INTERFACES_PARALLELCOMBININGOPINTERFACE
diff --git a/mlir/include/mlir/Tools/mlir-opt/MlirOptMain.h b/mlir/include/mlir/Tools/mlir-opt/MlirOptMain.h
index 9423122..c3ac9d9 100644
--- a/mlir/include/mlir/Tools/mlir-opt/MlirOptMain.h
+++ b/mlir/include/mlir/Tools/mlir-opt/MlirOptMain.h
@@ -38,6 +38,12 @@ enum class VerbosityLevel {
   ErrorsWarningsAndRemarks
 };
 
+using RemarkFormat = enum {
+  REMARK_FORMAT_STDOUT,
+  REMARK_FORMAT_YAML,
+  REMARK_FORMAT_BITSTREAM,
+};
+
 /// Configuration options for the mlir-opt tool.
 /// This is intended to help building tools like mlir-opt by collecting the
 /// supported options.
@@ -221,15 +227,53 @@ public:
   }
   bool shouldVerifyRoundtrip() const { return verifyRoundtripFlag; }
 
+  /// Checks if any remark filters are set.
+  bool shouldEmitRemarks() const {
+    // Emit all remarks only when no filters are specified.
+    const bool hasFilters =
+        !getRemarksAllFilter().empty() || !getRemarksPassedFilter().empty() ||
+        !getRemarksFailedFilter().empty() ||
+        !getRemarksMissedFilter().empty() || !getRemarksAnalyseFilter().empty();
+    return hasFilters;
+  }
+
   /// Reproducer file generation (no crash required).
   StringRef getReproducerFilename() const { return generateReproducerFileFlag; }
 
+  /// Set the reproducer output filename
+  RemarkFormat getRemarkFormat() const { return remarkFormatFlag; }
+  /// Set the remark format to use.
+  std::string getRemarksAllFilter() const { return remarksAllFilterFlag; }
+  /// Set the remark output file.
+  std::string getRemarksOutputFile() const { return remarksOutputFileFlag; }
+  /// Set the remark passed filters.
+  std::string getRemarksPassedFilter() const { return remarksPassedFilterFlag; }
+  /// Set the remark failed filters.
+  std::string getRemarksFailedFilter() const { return remarksFailedFilterFlag; }
+  /// Set the remark missed filters.
+  std::string getRemarksMissedFilter() const { return remarksMissedFilterFlag; }
+  /// Set the remark analyse filters.
+  std::string getRemarksAnalyseFilter() const {
+    return remarksAnalyseFilterFlag;
+  }
+
 protected:
   /// Allow operation with no registered dialects.
   /// This option is for convenience during testing only and discouraged in
   /// general.
   bool allowUnregisteredDialectsFlag = false;
 
+  /// Remark format
+  RemarkFormat remarkFormatFlag;
+  /// Remark file to output to
+  std::string remarksOutputFileFlag = "";
+  /// Remark filters
+  std::string remarksAllFilterFlag = "";
+  std::string remarksPassedFilterFlag = "";
+  std::string remarksFailedFilterFlag = "";
+  std::string remarksMissedFilterFlag = "";
+  std::string remarksAnalyseFilterFlag = "";
+
   /// Configuration for the debugging hooks.
   tracing::DebugConfig debugConfig;
 
diff --git a/mlir/include/mlir/Transforms/DialectConversion.h b/mlir/include/mlir/Transforms/DialectConversion.h
index a096f82..f8caae3 100644
--- a/mlir/include/mlir/Transforms/DialectConversion.h
+++ b/mlir/include/mlir/Transforms/DialectConversion.h
@@ -1429,6 +1429,9 @@ struct ConversionConfig {
 /// In the above example, %0 can be used instead of %3 and all cast ops are
 /// folded away.
 void reconcileUnrealizedCasts(
+    const DenseSet<UnrealizedConversionCastOp> &castOps,
+    SmallVectorImpl<UnrealizedConversionCastOp> *remainingCastOps = nullptr);
+void reconcileUnrealizedCasts(
     ArrayRef<UnrealizedConversionCastOp> castOps,
     SmallVectorImpl<UnrealizedConversionCastOp> *remainingCastOps = nullptr);
 
diff --git a/mlir/lib/Analysis/DataFlow/IntegerRangeAnalysis.cpp b/mlir/lib/Analysis/DataFlow/IntegerRangeAnalysis.cpp
index e79f6a8..70b56ca 100644
--- a/mlir/lib/Analysis/DataFlow/IntegerRangeAnalysis.cpp
+++ b/mlir/lib/Analysis/DataFlow/IntegerRangeAnalysis.cpp
@@ -26,6 +26,7 @@
 #include "mlir/Interfaces/ControlFlowInterfaces.h"
 #include "mlir/Interfaces/InferIntRangeInterface.h"
 #include "mlir/Interfaces/LoopLikeInterface.h"
+#include "mlir/Support/DebugStringHelper.h"
 #include "mlir/Support/LLVM.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/Casting.h"
@@ -76,9 +77,17 @@ void IntegerValueRangeLattice::onUpdate(DataFlowSolver *solver) const {
   else
     dialect = value.getParentBlock()->getParentOp()->getDialect();
 
-  Type type = getElementTypeOrSelf(value);
-  solver->propagateIfChanged(
-      cv, cv->join(ConstantValue(IntegerAttr::get(type, *constant), dialect)));
+  Attribute cstAttr;
+  if (isa<IntegerType, IndexType>(value.getType())) {
+    cstAttr = IntegerAttr::get(value.getType(), *constant);
+  } else if (auto shapedTy = dyn_cast<ShapedType>(value.getType())) {
+    cstAttr = SplatElementsAttr::get(shapedTy, *constant);
+  } else {
+    llvm::report_fatal_error(
+        Twine("FIXME: Don't know how to create a constant for this type: ") +
+        mlir::debugString(value.getType()));
+  }
+  solver->propagateIfChanged(cv, cv->join(ConstantValue(cstAttr, dialect)));
 }
 
 LogicalResult IntegerRangeAnalysis::visitOperation(
diff --git a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
index 203790e..0078eed 100644
--- a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
+++ b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
@@ -1915,7 +1915,16 @@ struct AMDGPUPermlaneLowering : public ConvertOpToLLVMPattern<PermlaneSwapOp> {
       else
         llvm_unreachable("unsupported row length");
 
-      Value vdstNew = LLVM::ExtractValueOp::create(rewriter, loc, res, {0});
+      const Value vdst0 = LLVM::ExtractValueOp::create(rewriter, loc, res, {0});
+      const Value vdst1 = LLVM::ExtractValueOp::create(rewriter, loc, res, {1});
+
+      const Value isEqual =
+          rewriter.create<LLVM::ICmpOp>(loc, LLVM::ICmpPredicate::eq, vdst0, v);
+
+      // Per `permlane(16|32)` semantics: if the first extracted element equals
+      // 'v', the result is the second element; otherwise it is the first.
+      Value vdstNew =
+          rewriter.create<LLVM::SelectOp>(loc, isEqual, vdst1, vdst0);
       permuted.emplace_back(vdstNew);
     }
 
diff --git a/mlir/lib/Conversion/ArmSMEToLLVM/ArmSMEToLLVM.cpp b/mlir/lib/Conversion/ArmSMEToLLVM/ArmSMEToLLVM.cpp
index 8a2e3b63..033e9ae 100644
--- a/mlir/lib/Conversion/ArmSMEToLLVM/ArmSMEToLLVM.cpp
+++ b/mlir/lib/Conversion/ArmSMEToLLVM/ArmSMEToLLVM.cpp
@@ -822,7 +822,7 @@ struct OuterProductWideningOpConversion
   }
 };
 
-/// Lower `arm_sme.streaming_vl` to SME CNTS intrinsics.
+/// Lower `arm_sme.streaming_vl` to SME CNTSD intrinsic.
 ///
 /// Example:
 ///
@@ -830,8 +830,10 @@ struct OuterProductWideningOpConversion
 ///
 /// is converted to:
 ///
-///   %cnt = "arm_sme.intr.cntsh"() : () -> i64
-///   %0 = arith.index_cast %cnt : i64 to index
+///   %cnt = "arm_sme.intr.cntsd"() : () -> i64
+///   %scale = arith.constant 4 : index
+///   %cntIndex = arith.index_cast %cnt : i64 to index
+///   %0 = arith.muli %cntIndex, %scale : index
 ///
 struct StreamingVLOpConversion
     : public ConvertArmSMEOpToLLVMPattern<arm_sme::StreamingVLOp,
@@ -844,21 +846,13 @@ struct StreamingVLOpConversion
                   ConversionPatternRewriter &rewriter) const override {
     auto loc = streamingVlOp.getLoc();
     auto i64Type = rewriter.getI64Type();
-    auto *intrOp = [&]() -> Operation * {
-      switch (streamingVlOp.getTypeSize()) {
-      case arm_sme::TypeSize::Byte:
-        return arm_sme::aarch64_sme_cntsb::create(rewriter, loc, i64Type);
-      case arm_sme::TypeSize::Half:
-        return arm_sme::aarch64_sme_cntsh::create(rewriter, loc, i64Type);
-      case arm_sme::TypeSize::Word:
-        return arm_sme::aarch64_sme_cntsw::create(rewriter, loc, i64Type);
-      case arm_sme::TypeSize::Double:
-        return arm_sme::aarch64_sme_cntsd::create(rewriter, loc, i64Type);
-      }
-      llvm_unreachable("unknown type size in StreamingVLOpConversion");
-    }();
-    rewriter.replaceOpWithNewOp<arith::IndexCastOp>(
-        streamingVlOp, rewriter.getIndexType(), intrOp->getResult(0));
+    auto cntsd = arm_sme::aarch64_sme_cntsd::create(rewriter, loc, i64Type);
+    auto cntsdIdx = arith::IndexCastOp::create(rewriter, loc,
+                                               rewriter.getIndexType(), cntsd);
+    auto scale = arith::ConstantIndexOp::create(
+        rewriter, loc,
+        8 / arm_sme::getSizeInBytes(streamingVlOp.getTypeSize()));
+    rewriter.replaceOpWithNewOp<arith::MulIOp>(streamingVlOp, cntsdIdx, scale);
     return success();
   }
 };
@@ -964,9 +958,7 @@ void mlir::configureArmSMEToLLVMConversionLegality(ConversionTarget &target) {
       arm_sme::aarch64_sme_smops_za32, arm_sme::aarch64_sme_umopa_za32,
       arm_sme::aarch64_sme_umops_za32, arm_sme::aarch64_sme_sumopa_wide,
       arm_sme::aarch64_sme_sumops_wide, arm_sme::aarch64_sme_usmopa_wide,
-      arm_sme::aarch64_sme_usmops_wide, arm_sme::aarch64_sme_cntsb,
-      arm_sme::aarch64_sme_cntsh, arm_sme::aarch64_sme_cntsw,
-      arm_sme::aarch64_sme_cntsd>();
+      arm_sme::aarch64_sme_usmops_wide, arm_sme::aarch64_sme_cntsd>();
   target.addLegalDialect<arith::ArithDialect,
                          /* The following are used to lower tile spills/fills */
                          vector::VectorDialect, scf::SCFDialect,
diff --git a/mlir/lib/Dialect/Arith/Transforms/IntRangeOptimizations.cpp b/mlir/lib/Dialect/Arith/Transforms/IntRangeOptimizations.cpp
index 777ff0e..2017905 100644
--- a/mlir/lib/Dialect/Arith/Transforms/IntRangeOptimizations.cpp
+++ b/mlir/lib/Dialect/Arith/Transforms/IntRangeOptimizations.cpp
@@ -8,6 +8,7 @@
 
 #include <utility>
 
+#include "mlir/Analysis/DataFlow/ConstantPropagationAnalysis.h"
 #include "mlir/Analysis/DataFlowFramework.h"
 #include "mlir/Dialect/Arith/Transforms/Passes.h"
 
@@ -485,6 +486,7 @@ struct IntRangeOptimizationsPass final
     MLIRContext *ctx = op->getContext();
     DataFlowSolver solver;
     solver.load<DeadCodeAnalysis>();
+    solver.load<SparseConstantPropagation>();
     solver.load<IntegerRangeAnalysis>();
     if (failed(solver.initializeAndRun(op)))
       return signalPassFailure();
diff --git a/mlir/lib/Dialect/ArmSME/IR/Utils.cpp b/mlir/lib/Dialect/ArmSME/IR/Utils.cpp
index e5e1312..e64ae42 100644
--- a/mlir/lib/Dialect/ArmSME/IR/Utils.cpp
+++ b/mlir/lib/Dialect/ArmSME/IR/Utils.cpp
@@ -14,6 +14,21 @@
 
 namespace mlir::arm_sme {
 
+unsigned getSizeInBytes(TypeSize type) {
+  switch (type) {
+  case arm_sme::TypeSize::Byte:
+    return 1;
+  case arm_sme::TypeSize::Half:
+    return 2;
+  case arm_sme::TypeSize::Word:
+    return 4;
+  case arm_sme::TypeSize::Double:
+    return 8;
+  }
+  llvm_unreachable("unknown type size");
+  return 0;
+}
+
 unsigned getSMETileSliceMinNumElts(Type type) {
   assert(isValidSMETileElementType(type) && "invalid tile type!");
   return MinStreamingVectorLengthInBits / type.getIntOrFloatBitWidth();
diff --git a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
index f3db8f7c..715eebb 100644
--- a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
+++ b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
@@ -36,6 +36,7 @@
 #include "mlir/IR/BuiltinTypeInterfaces.h"
 #include "mlir/IR/PatternMatch.h"
 #include "mlir/IR/TypeUtilities.h"
+#include "mlir/Interfaces/ParallelCombiningOpInterface.h"
 #include "mlir/Interfaces/TilingInterface.h"
 #include "mlir/Support/LLVM.h"
 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"
@@ -4147,12 +4148,11 @@ DiagnosedSilenceableFailure doit(RewriterBase &rewriter, OpTy target,
     return DiagnosedSilenceableFailure::success();
   }
 
-  // If we are inside an InParallel region, temporarily set the insertion point
-  // outside: only tensor.parallel_insert_slice ops are allowed in there.
-  if constexpr (std::is_same_v<OpTy, tensor::ParallelInsertSliceOp>) {
-    rewriter.setInsertionPoint(
-        target->template getParentOfType<scf::InParallelOp>());
-  }
+  // If we are inside a `ParallelCombiningOp` region, temporarily set the
+  // insertion point outside: only ops implementing ParallelCombiningOpInterface
+  // are allowed in there.
+  if (isa<mlir::ParallelCombiningOpInterface>(target.getOperation()))
+    rewriter.setInsertionPoint(target->getParentOp());
 
   Value extracted = tensor::ExtractSliceOp::create(
       rewriter, target.getLoc(), target.getDest(), target.getMixedOffsets(),
diff --git a/mlir/lib/Dialect/Linalg/Transforms/DataLayoutPropagation.cpp b/mlir/lib/Dialect/Linalg/Transforms/DataLayoutPropagation.cpp
index ed2efd6..6c17c3c 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/DataLayoutPropagation.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/DataLayoutPropagation.cpp
@@ -1245,21 +1245,21 @@ struct SliceDimInfo {
   OpFoldResult outputSize;
 };
 
-/// Return the first input extract slice operand, if present, for the current
+/// Return all extract slice operands, if present, for the current
 /// generic op.
-static FailureOr<OpOperand *> getSliceOperand(GenericOp genericOp) {
-  OpOperand *sliceOperand = nullptr;
+static FailureOr<SmallVector<OpOperand *>>
+getSliceOperands(GenericOp genericOp) {
+  SmallVector<OpOperand *> sliceOperands;
   for (auto operand : genericOp.getDpsInputOperands()) {
     auto extractOp = operand->get().getDefiningOp<tensor::ExtractSliceOp>();
     if (!extractOp)
       continue;
-    sliceOperand = operand;
-    break;
+    sliceOperands.push_back(operand);
   }
-  if (!sliceOperand) {
+  if (sliceOperands.empty()) {
     return failure();
   }
-  return sliceOperand;
+  return sliceOperands;
 }
 
 // Return a map of dims that have partial slices on them so that other operands
@@ -1336,14 +1336,24 @@ pushDownExtractSliceOpThroughGenericOp(RewriterBase &rewriter,
         genericOp,
         "propagation through generic with gather semantics is unsupported.");
   // Collect the sliced operand, if present.
-  auto maybeSliceOperand = getSliceOperand(genericOp);
-  if (failed(maybeSliceOperand))
+  auto maybeSliceOperands = getSliceOperands(genericOp);
+  if (failed(maybeSliceOperands))
     return failure();
-  OpOperand *sliceOperand = *maybeSliceOperand;
-  unsigned OperandIndex = sliceOperand->getOperandNumber();
-
-  if (!controlFn(sliceOperand))
+  SmallVector<OpOperand *> sliceOperands = *maybeSliceOperands;
+  OpOperand *sliceOperand;
+
+  bool foundValidOperand = false;
+  for (auto currSliceOperand : sliceOperands) {
+    if (controlFn(currSliceOperand)) {
+      sliceOperand = currSliceOperand;
+      foundValidOperand = true;
+      break;
+    }
+  }
+  if (!foundValidOperand) {
     return failure();
+  }
+  unsigned OperandIndex = sliceOperand->getOperandNumber();
 
   tensor::ExtractSliceOp producerSliceOp =
       sliceOperand->get().getDefiningOp<tensor::ExtractSliceOp>();
diff --git a/mlir/lib/Dialect/SCF/IR/SCF.cpp b/mlir/lib/Dialect/SCF/IR/SCF.cpp
index 84f9777..45b14fc 100644
--- a/mlir/lib/Dialect/SCF/IR/SCF.cpp
+++ b/mlir/lib/Dialect/SCF/IR/SCF.cpp
@@ -21,6 +21,7 @@
 #include "mlir/IR/Matchers.h"
 #include "mlir/IR/PatternMatch.h"
 #include "mlir/Interfaces/FunctionInterfaces.h"
+#include "mlir/Interfaces/ParallelCombiningOpInterface.h"
 #include "mlir/Interfaces/ValueBoundsOpInterface.h"
 #include "mlir/Transforms/InliningUtils.h"
 #include "llvm/ADT/MapVector.h"
@@ -681,7 +682,9 @@ void mlir::scf::promote(RewriterBase &rewriter, scf::ForallOp forallOp) {
   results.reserve(forallOp.getResults().size());
   for (auto &yieldingOp : terminator.getYieldingOps()) {
     auto parallelInsertSliceOp =
-        cast<tensor::ParallelInsertSliceOp>(yieldingOp);
+        dyn_cast<tensor::ParallelInsertSliceOp>(yieldingOp);
+    if (!parallelInsertSliceOp)
+      continue;
 
     Value dst = parallelInsertSliceOp.getDest();
     Value src = parallelInsertSliceOp.getSource();
@@ -1439,12 +1442,9 @@ InParallelOp ForallOp::getTerminator() {
 
 SmallVector<Operation *> ForallOp::getCombiningOps(BlockArgument bbArg) {
   SmallVector<Operation *> storeOps;
-  InParallelOp inParallelOp = getTerminator();
-  for (Operation &yieldOp : inParallelOp.getYieldingOps()) {
-    if (auto parallelInsertSliceOp =
-            dyn_cast<tensor::ParallelInsertSliceOp>(yieldOp);
-        parallelInsertSliceOp && parallelInsertSliceOp.getDest() == bbArg) {
-      storeOps.push_back(parallelInsertSliceOp);
+  for (Operation *user : bbArg.getUsers()) {
+    if (auto parallelOp = dyn_cast<ParallelCombiningOpInterface>(user)) {
+      storeOps.push_back(parallelOp);
     }
   }
   return storeOps;
@@ -1911,8 +1911,10 @@ struct FoldTensorCastOfOutputIntoForallOp
     auto terminator = newForallOp.getTerminator();
     for (auto [yieldingOp, outputBlockArg] : llvm::zip(
              terminator.getYieldingOps(), newForallOp.getRegionIterArgs())) {
-      auto insertSliceOp = cast<tensor::ParallelInsertSliceOp>(yieldingOp);
-      insertSliceOp.getDestMutable().assign(outputBlockArg);
+      if (auto parallelCombingingOp =
+              dyn_cast<ParallelCombiningOpInterface>(yieldingOp)) {
+        parallelCombingingOp.getUpdatedDestinations().assign(outputBlockArg);
+      }
     }
 
     // Cast results back to the original types.
@@ -1971,19 +1973,22 @@ LogicalResult InParallelOp::verify() {
   if (!forallOp)
     return this->emitOpError("expected forall op parent");
 
-  // TODO: InParallelOpInterface.
   for (Operation &op : getRegion().front().getOperations()) {
-    if (!isa<tensor::ParallelInsertSliceOp>(op)) {
-      return this->emitOpError("expected only ")
-             << tensor::ParallelInsertSliceOp::getOperationName() << " ops";
+    auto parallelCombiningOp = dyn_cast<ParallelCombiningOpInterface>(&op);
+    if (!parallelCombiningOp) {
+      return this->emitOpError("expected only ParallelCombiningOpInterface")
+             << " ops";
     }
 
     // Verify that inserts are into out block arguments.
-    Value dest = cast<tensor::ParallelInsertSliceOp>(op).getDest();
+    MutableOperandRange dests = parallelCombiningOp.getUpdatedDestinations();
     ArrayRef<BlockArgument> regionOutArgs = forallOp.getRegionOutArgs();
-    if (!llvm::is_contained(regionOutArgs, dest))
-      return op.emitOpError("may only insert into an output block argument");
+    for (OpOperand &dest : dests) {
+      if (!llvm::is_contained(regionOutArgs, dest.get()))
+        return op.emitOpError("may only insert into an output block argument");
+    }
   }
+
   return success();
 }
 
@@ -2018,12 +2023,17 @@ OpResult InParallelOp::getParentResult(int64_t idx) {
 }
 
 SmallVector<BlockArgument> InParallelOp::getDests() {
-  return llvm::to_vector<4>(
-      llvm::map_range(getYieldingOps(), [](Operation &op) {
-        // Add new ops here as needed.
-        auto insertSliceOp = cast<tensor::ParallelInsertSliceOp>(&op);
-        return llvm::cast<BlockArgument>(insertSliceOp.getDest());
-      }));
+  SmallVector<BlockArgument> updatedDests;
+  for (Operation &yieldingOp : getYieldingOps()) {
+    auto parallelCombiningOp =
+        dyn_cast<ParallelCombiningOpInterface>(&yieldingOp);
+    if (!parallelCombiningOp)
+      continue;
+    for (OpOperand &updatedOperand :
+         parallelCombiningOp.getUpdatedDestinations())
+      updatedDests.push_back(cast<BlockArgument>(updatedOperand.get()));
+  }
+  return updatedDests;
 }
 
 llvm::iterator_range<Block::iterator> InParallelOp::getYieldingOps() {
diff --git a/mlir/lib/Dialect/SCF/Transforms/BufferDeallocationOpInterfaceImpl.cpp b/mlir/lib/Dialect/SCF/Transforms/BufferDeallocationOpInterfaceImpl.cpp
index a446124..63216e7 100644
--- a/mlir/lib/Dialect/SCF/Transforms/BufferDeallocationOpInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/SCF/Transforms/BufferDeallocationOpInterfaceImpl.cpp
@@ -16,7 +16,7 @@ using namespace mlir::bufferization;
 namespace {
 /// The `scf.forall.in_parallel` terminator is special in a few ways:
 /// * It does not implement the BranchOpInterface or
-///   RegionBranchTerminatorOpInterface, but the ParallelCombiningOpInterface
+///   RegionBranchTerminatorOpInterface, but the InParallelOpInterface
 ///   which is not supported by BufferDeallocation.
 /// * It has a graph-like region which only allows one specific tensor op
 /// * After bufferization the nested region is always empty
@@ -40,9 +40,9 @@ namespace {
 ///   <implicit in_parallel terminator here>
 /// }
 /// ```
-struct InParallelOpInterface
-    : public BufferDeallocationOpInterface::ExternalModel<InParallelOpInterface,
-                                                          scf::InParallelOp> {
+struct InParallelDeallocOpInterface
+    : public BufferDeallocationOpInterface::ExternalModel<
+          InParallelDeallocOpInterface, scf::InParallelOp> {
   FailureOr<Operation *> process(Operation *op, DeallocationState &state,
                                  const DeallocationOptions &options) const {
     auto inParallelOp = cast<scf::InParallelOp>(op);
@@ -75,7 +75,7 @@ struct ReduceReturnOpInterface
 void mlir::scf::registerBufferDeallocationOpInterfaceExternalModels(
     DialectRegistry &registry) {
   registry.addExtension(+[](MLIRContext *ctx, SCFDialect *dialect) {
-    InParallelOp::attachInterface<InParallelOpInterface>(*ctx);
+    InParallelOp::attachInterface<InParallelDeallocOpInterface>(*ctx);
     ReduceReturnOp::attachInterface<ReduceReturnOpInterface>(*ctx);
   });
 }
diff --git a/mlir/lib/Dialect/SPIRV/IR/SPIRVDialect.cpp b/mlir/lib/Dialect/SPIRV/IR/SPIRVDialect.cpp
index fcf1526..44c86bc 100644
--- a/mlir/lib/Dialect/SPIRV/IR/SPIRVDialect.cpp
+++ b/mlir/lib/Dialect/SPIRV/IR/SPIRVDialect.cpp
@@ -1066,7 +1066,12 @@ LogicalResult SPIRVDialect::verifyRegionArgAttribute(Operation *op,
 }
 
 LogicalResult SPIRVDialect::verifyRegionResultAttribute(
-    Operation *op, unsigned /*regionIndex*/, unsigned /*resultIndex*/,
+    Operation *op, unsigned /*regionIndex*/, unsigned resultIndex,
     NamedAttribute attribute) {
-  return op->emitError("cannot attach SPIR-V attributes to region result");
+  if (auto graphOp = dyn_cast<spirv::GraphARMOp>(op))
+    return verifyRegionAttribute(
+        op->getLoc(), graphOp.getResultTypes()[resultIndex], attribute);
+  return op->emitError(
+      "cannot attach SPIR-V attributes to region result which is "
+      "not part of a spirv::GraphARMOp type");
 }
diff --git a/mlir/lib/Dialect/SPIRV/Transforms/LowerABIAttributesPass.cpp b/mlir/lib/Dialect/SPIRV/Transforms/LowerABIAttributesPass.cpp
index 3911ec0..5607a3c 100644
--- a/mlir/lib/Dialect/SPIRV/Transforms/LowerABIAttributesPass.cpp
+++ b/mlir/lib/Dialect/SPIRV/Transforms/LowerABIAttributesPass.cpp
@@ -22,6 +22,7 @@
 #include "mlir/Dialect/SPIRV/Utils/LayoutUtils.h"
 #include "mlir/IR/BuiltinAttributes.h"
 #include "mlir/Transforms/DialectConversion.h"
+#include "llvm/Support/FormatVariadic.h"
 
 namespace mlir {
 namespace spirv {
@@ -85,10 +86,36 @@ createGlobalVarForEntryPointArgument(OpBuilder &builder, spirv::FuncOp funcOp,
                                          abiInfo.getBinding());
 }
 
+/// Creates a global variable for an argument or result based on the ABI info.
+static spirv::GlobalVariableOp
+createGlobalVarForGraphEntryPoint(OpBuilder &builder, spirv::GraphARMOp graphOp,
+                                  unsigned index, bool isArg,
+                                  spirv::InterfaceVarABIAttr abiInfo) {
+  auto spirvModule = graphOp->getParentOfType<spirv::ModuleOp>();
+  if (!spirvModule)
+    return nullptr;
+
+  OpBuilder::InsertionGuard moduleInsertionGuard(builder);
+  builder.setInsertionPoint(graphOp.getOperation());
+  std::string varName = llvm::formatv("{}_{}_{}", graphOp.getName(),
+                                      isArg ? "arg" : "res", index);
+
+  Type varType = isArg ? graphOp.getFunctionType().getInput(index)
+                       : graphOp.getFunctionType().getResult(index);
+
+  auto pointerType = spirv::PointerType::get(
+      varType,
+      abiInfo.getStorageClass().value_or(spirv::StorageClass::UniformConstant));
+
+  return spirv::GlobalVariableOp::create(builder, graphOp.getLoc(), pointerType,
+                                         varName, abiInfo.getDescriptorSet(),
+                                         abiInfo.getBinding());
+}
+
 /// Gets the global variables that need to be specified as interface variable
 /// with an spirv.EntryPointOp. Traverses the body of a entry function to do so.
 static LogicalResult
-getInterfaceVariables(spirv::FuncOp funcOp,
+getInterfaceVariables(mlir::FunctionOpInterface funcOp,
                       SmallVectorImpl<Attribute> &interfaceVars) {
   auto module = funcOp->getParentOfType<spirv::ModuleOp>();
   if (!module) {
@@ -224,6 +251,21 @@ public:
                   ConversionPatternRewriter &rewriter) const override;
 };
 
+/// A pattern to convert graph signature according to interface variable ABI
+/// attributes.
+///
+/// Specifically, this pattern creates global variables according to interface
+/// variable ABI attributes attached to graph arguments and results.
+class ProcessGraphInterfaceVarABI final
+    : public OpConversionPattern<spirv::GraphARMOp> {
+public:
+  using OpConversionPattern::OpConversionPattern;
+
+  LogicalResult
+  matchAndRewrite(spirv::GraphARMOp graphOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override;
+};
+
 /// Pass to implement the ABI information specified as attributes.
 class LowerABIAttributesPass final
     : public spirv::impl::SPIRVLowerABIAttributesPassBase<
@@ -297,6 +339,63 @@ LogicalResult ProcessInterfaceVarABI::matchAndRewrite(
   return success();
 }
 
+LogicalResult ProcessGraphInterfaceVarABI::matchAndRewrite(
+    spirv::GraphARMOp graphOp, OpAdaptor adaptor,
+    ConversionPatternRewriter &rewriter) const {
+  // Non-entry point graphs are not handled.
+  if (!graphOp.getEntryPoint().value_or(false))
+    return failure();
+
+  TypeConverter::SignatureConversion signatureConverter(
+      graphOp.getFunctionType().getNumInputs());
+
+  StringRef attrName = spirv::getInterfaceVarABIAttrName();
+  SmallVector<Attribute, 4> interfaceVars;
+
+  // Convert arguments.
+  unsigned numInputs = graphOp.getFunctionType().getNumInputs();
+  unsigned numResults = graphOp.getFunctionType().getNumResults();
+  for (unsigned index = 0; index < numInputs; ++index) {
+    auto abiInfo =
+        graphOp.getArgAttrOfType<spirv::InterfaceVarABIAttr>(index, attrName);
+    if (!abiInfo)
+      return failure();
+    spirv::GlobalVariableOp var = createGlobalVarForGraphEntryPoint(
+        rewriter, graphOp, index, true, abiInfo);
+    if (!var)
+      return failure();
+    interfaceVars.push_back(
+        SymbolRefAttr::get(rewriter.getContext(), var.getSymName()));
+  }
+
+  for (unsigned index = 0; index < numResults; ++index) {
+    auto abiInfo = graphOp.getResultAttrOfType<spirv::InterfaceVarABIAttr>(
+        index, attrName);
+    if (!abiInfo)
+      return failure();
+    spirv::GlobalVariableOp var = createGlobalVarForGraphEntryPoint(
+        rewriter, graphOp, index, false, abiInfo);
+    if (!var)
+      return failure();
+    interfaceVars.push_back(
+        SymbolRefAttr::get(rewriter.getContext(), var.getSymName()));
+  }
+
+  // Update graph signature.
+  rewriter.modifyOpInPlace(graphOp, [&] {
+    for (unsigned index = 0; index < numInputs; ++index) {
+      graphOp.removeArgAttr(index, attrName);
+    }
+    for (unsigned index = 0; index < numResults; ++index) {
+      graphOp.removeResultAttr(index, rewriter.getStringAttr(attrName));
+    }
+  });
+
+  spirv::GraphEntryPointARMOp::create(rewriter, graphOp.getLoc(), graphOp,
+                                      interfaceVars);
+  return success();
+}
+
 void LowerABIAttributesPass::runOnOperation() {
   // Uses the signature conversion methodology of the dialect conversion
   // framework to implement the conversion.
@@ -322,7 +421,8 @@ void LowerABIAttributesPass::runOnOperation() {
   });
 
   RewritePatternSet patterns(context);
-  patterns.add<ProcessInterfaceVarABI>(typeConverter, context);
+  patterns.add<ProcessInterfaceVarABI, ProcessGraphInterfaceVarABI>(
+      typeConverter, context);
 
   ConversionTarget target(*context);
   // "Legal" function ops should have no interface variable ABI attributes.
@@ -333,6 +433,17 @@ void LowerABIAttributesPass::runOnOperation() {
         return false;
     return true;
   });
+  target.addDynamicallyLegalOp<spirv::GraphARMOp>([&](spirv::GraphARMOp op) {
+    StringRef attrName = spirv::getInterfaceVarABIAttrName();
+    for (unsigned i = 0, e = op.getNumArguments(); i < e; ++i)
+      if (op.getArgAttr(i, attrName))
+        return false;
+    for (unsigned i = 0, e = op.getNumResults(); i < e; ++i)
+      if (op.getResultAttr(i, attrName))
+        return false;
+    return true;
+  });
+
   // All other SPIR-V ops are legal.
   target.markUnknownOpDynamicallyLegal([](Operation *op) {
     return op->getDialect()->getNamespace() ==
diff --git a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp
index 68584ec..fa97b49 100644
--- a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp
+++ b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp
@@ -2976,9 +2976,9 @@ public:
     if (sourceType != insertSliceOp.getSourceType()) {
       OpBuilder::InsertionGuard g(rewriter);
       // The only difference between InsertSliceOp and ParallelInsertSliceOp
-      // is that the insertion point is just before the ParallelCombiningOp in
+      // is that the insertion point is just before the InParallelOp in
       // the parallel case.
-      if (std::is_same<InsertOpTy, ParallelInsertSliceOp>::value)
+      if (isa<InParallelOpInterface>(insertSliceOp->getParentOp()))
         rewriter.setInsertionPoint(insertSliceOp->getParentOp());
       toInsert = tensor::CastOp::create(rewriter, insertSliceOp.getLoc(),
                                         sourceType, toInsert);
@@ -3153,9 +3153,9 @@ struct InsertSliceOpSourceCastInserter final
     // Insert the cast.
     OpBuilder::InsertionGuard g(rewriter);
     // The only difference between InsertSliceOp and ParallelInsertSliceOp is
-    // that the insertion point is just before the ParallelCombiningOp in the
+    // that the insertion point is just before the InParallelOp in the
     // parallel case.
-    if (std::is_same<InsertOpTy, ParallelInsertSliceOp>::value)
+    if (isa<ParallelCombiningOpInterface>(insertSliceOp->getParentOp()))
       rewriter.setInsertionPoint(insertSliceOp->getParentOp());
     Value cast = tensor::CastOp::create(rewriter, insertSliceOp.getLoc(),
                                         newSrcType, insertSliceOp.getSource());
@@ -3846,8 +3846,7 @@ OpFoldResult PadOp::fold(FoldAdaptor) {
 //===----------------------------------------------------------------------===//
 
 OpResult ParallelInsertSliceOp::getTiedOpResult() {
-  ParallelCombiningOpInterface parallelCombiningParent =
-      getParallelCombiningParent();
+  InParallelOpInterface parallelCombiningParent = getParallelCombiningParent();
   for (const auto &it :
        llvm::enumerate(parallelCombiningParent.getYieldingOps())) {
     Operation &nextOp = it.value();
@@ -3901,8 +3900,8 @@ void ParallelInsertSliceOp::build(OpBuilder &b, OperationState &result,
 }
 
 LogicalResult ParallelInsertSliceOp::verify() {
-  if (!isa<ParallelCombiningOpInterface>(getOperation()->getParentOp()))
-    return this->emitError("expected ParallelCombiningOpInterface parent, got:")
+  if (!isa<InParallelOpInterface>(getOperation()->getParentOp()))
+    return this->emitError("expected InParallelOpInterface parent, got:")
            << *(getOperation()->getParentOp());
 
   // Verify result type against inferred type.
@@ -3935,6 +3934,19 @@ llvm::SmallBitVector ParallelInsertSliceOp::getDroppedDims() {
   return ::getDroppedDims(getSourceType().getShape(), getMixedSizes());
 }
 
+// ParallelCombiningOpInterface implementation.
+MutableOperandRange ParallelInsertSliceOp::getUpdatedDestinations() {
+  return getDestMutable();
+}
+
+Operation *ParallelInsertSliceOp::getIteratingParent() {
+  // Return the parent InParallelOpInterface's parent.
+  if (auto combiningOp =
+          dyn_cast<InParallelOpInterface>(getOperation()->getParentOp()))
+    return combiningOp->getParentOp();
+  return nullptr;
+}
+
 //===----------------------------------------------------------------------===//
 // ScatterOp
 //===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp
index c3356c1..bce964e 100644
--- a/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp
@@ -970,10 +970,10 @@ struct ParallelInsertSliceOpInterface
                           BufferizationState &state) const {
     OpBuilder::InsertionGuard g(rewriter);
     auto parallelInsertSliceOp = cast<ParallelInsertSliceOp>(op);
-    ParallelCombiningOpInterface parallelCombiningParent =
+    InParallelOpInterface parallelCombiningParent =
         parallelInsertSliceOp.getParallelCombiningParent();
 
-    // Bufferize the op outside of the parallel combining terminator.
+    // Bufferize the op outside of the in parallel terminator.
     rewriter.setInsertionPoint(parallelCombiningParent);
 
     // Get source and destination buffers.
diff --git a/mlir/lib/Dialect/Tensor/Transforms/FoldTensorSubsetOps.cpp b/mlir/lib/Dialect/Tensor/Transforms/FoldTensorSubsetOps.cpp
index d76c02a..b32faf4 100644
--- a/mlir/lib/Dialect/Tensor/Transforms/FoldTensorSubsetOps.cpp
+++ b/mlir/lib/Dialect/Tensor/Transforms/FoldTensorSubsetOps.cpp
@@ -215,12 +215,11 @@ struct InsertSliceOfInsertSliceFolder : public OpRewritePattern<OpTy> {
                                         sourceInsertSliceOp.getMixedSizes(),
                                         droppedDims, resolvedSizes);
 
-    // If we are inside an InParallel region, temporarily set the insertion
-    // point outside: only tensor.parallel_insert_slice ops are allowed in
-    // there.
-    if (std::is_same_v<OpTy, tensor::ParallelInsertSliceOp>) {
-      rewriter.setInsertionPoint(
-          insertSliceOp->template getParentOfType<scf::InParallelOp>());
+    // If we are inside a ParallelCombining region, temporarily set the
+    // insertion point outside: only ops of ParallelCombiningOpInterface are
+    // allowed in there.
+    if (isa<mlir::ParallelCombiningOpInterface>(insertSliceOp.getOperation())) {
+      rewriter.setInsertionPoint(insertSliceOp->getParentOp());
     }
 
     // Resolve offsets according to source offsets and strides.
diff --git a/mlir/lib/Dialect/Tensor/Transforms/ReshapePatterns.cpp b/mlir/lib/Dialect/Tensor/Transforms/ReshapePatterns.cpp
index dfce835..7ec61c7 100644
--- a/mlir/lib/Dialect/Tensor/Transforms/ReshapePatterns.cpp
+++ b/mlir/lib/Dialect/Tensor/Transforms/ReshapePatterns.cpp
@@ -319,7 +319,7 @@ struct BubbleUpExpandThroughParallelCollapse
 /// Note - this pattern could be extended to be a swap pattern between
 /// `tensor.expand_shape` and `tensor.extract_slice`, but is currently
 /// implemented only as a bubble up pattern for `tensor.extract_slice`.
-struct BubbleUpExpandShapeThroughExtractSlice
+struct BubbleUpExtractSliceThroughExpandShape
     : public OpRewritePattern<tensor::ExtractSliceOp> {
   using OpRewritePattern<tensor::ExtractSliceOp>::OpRewritePattern;
 
@@ -427,7 +427,7 @@ struct BubbleUpExpandShapeThroughExtractSlice
 ///                                                       to tensor<15xf32>
 /// ```
 /// But this is not the intended purpose of the transformation.
-struct BubbleUpCollapseShapeThroughExtractSlice
+struct BubbleUpExtractSliceThroughCollapseShape
     : public OpRewritePattern<tensor::ExtractSliceOp> {
   using OpRewritePattern<tensor::ExtractSliceOp>::OpRewritePattern;
 
@@ -735,6 +735,6 @@ void mlir::tensor::populateBubbleUpExpandShapePatterns(
 
 void mlir::tensor::populateBubbleUpExtractSliceOpPatterns(
     RewritePatternSet &patterns) {
-  patterns.add<BubbleUpExpandShapeThroughExtractSlice,
-               BubbleUpCollapseShapeThroughExtractSlice>(patterns.getContext());
+  patterns.add<BubbleUpExtractSliceThroughExpandShape,
+               BubbleUpExtractSliceThroughCollapseShape>(patterns.getContext());
 }
diff --git a/mlir/lib/Dialect/Transform/IR/TransformOps.cpp b/mlir/lib/Dialect/Transform/IR/TransformOps.cpp
index aba6178..132ed81 100644
--- a/mlir/lib/Dialect/Transform/IR/TransformOps.cpp
+++ b/mlir/lib/Dialect/Transform/IR/TransformOps.cpp
@@ -1151,7 +1151,7 @@ transform::CollectMatchingOp::apply(transform::TransformRewriter &rewriter,
   std::optional<DiagnosedSilenceableFailure> maybeFailure;
   for (Operation *root : state.getPayloadOps(getRoot())) {
     WalkResult walkResult = root->walk([&](Operation *op) {
-      LDBG(1, DEBUG_TYPE_MATCHER)
+      LDBG(DEBUG_TYPE_MATCHER, 1)
           << "matching "
           << OpWithFlags(op, OpPrintingFlags().assumeVerified().skipRegions())
           << " @" << op;
@@ -1166,7 +1166,7 @@ transform::CollectMatchingOp::apply(transform::TransformRewriter &rewriter,
       if (diag.isDefiniteFailure())
         return WalkResult::interrupt();
       if (diag.isSilenceableFailure()) {
-        LDBG(1, DEBUG_TYPE_MATCHER) << "matcher " << matcher.getName()
+        LDBG(DEBUG_TYPE_MATCHER, 1) << "matcher " << matcher.getName()
                                     << " failed: " << diag.getMessage();
         return WalkResult::advance();
       }
@@ -1298,7 +1298,7 @@ transform::ForeachMatchOp::apply(transform::TransformRewriter &rewriter,
       if (!getRestrictRoot() && op == root)
         return WalkResult::advance();
 
-      LDBG(1, DEBUG_TYPE_MATCHER)
+      LDBG(DEBUG_TYPE_MATCHER, 1)
           << "matching "
           << OpWithFlags(op, OpPrintingFlags().assumeVerified().skipRegions())
           << " @" << op;
@@ -1314,7 +1314,7 @@ transform::ForeachMatchOp::apply(transform::TransformRewriter &rewriter,
         if (diag.isDefiniteFailure())
           return WalkResult::interrupt();
         if (diag.isSilenceableFailure()) {
-          LDBG(1, DEBUG_TYPE_MATCHER) << "matcher " << matcher.getName()
+          LDBG(DEBUG_TYPE_MATCHER, 1) << "matcher " << matcher.getName()
                                       << " failed: " << diag.getMessage();
           continue;
         }
@@ -2165,10 +2165,10 @@ DiagnosedSilenceableFailure transform::MatchOperationEmptyOp::matchOperation(
     ::std::optional<::mlir::Operation *> maybeCurrent,
     transform::TransformResults &results, transform::TransformState &state) {
   if (!maybeCurrent.has_value()) {
-    LDBG(1, DEBUG_TYPE_MATCHER) << "MatchOperationEmptyOp success";
+    LDBG(DEBUG_TYPE_MATCHER, 1) << "MatchOperationEmptyOp success";
     return DiagnosedSilenceableFailure::success();
   }
-  LDBG(1, DEBUG_TYPE_MATCHER) << "MatchOperationEmptyOp failure";
+  LDBG(DEBUG_TYPE_MATCHER, 1) << "MatchOperationEmptyOp failure";
   return emitSilenceableError() << "operation is not empty";
 }
 
diff --git a/mlir/lib/Dialect/Vector/TransformOps/VectorTransformOps.cpp b/mlir/lib/Dialect/Vector/TransformOps/VectorTransformOps.cpp
index 6bb390a..18f105e 100644
--- a/mlir/lib/Dialect/Vector/TransformOps/VectorTransformOps.cpp
+++ b/mlir/lib/Dialect/Vector/TransformOps/VectorTransformOps.cpp
@@ -88,6 +88,11 @@ void transform::ApplyDropUnitDimWithShapeCastPatternsOp::populatePatterns(
   vector::populateDropUnitDimWithShapeCastPatterns(patterns);
 }
 
+void transform::ApplyDropInnerMostUnitDimsFromXferOpsPatternsOp::
+    populatePatterns(RewritePatternSet &patterns) {
+  vector::populateDropInnerMostUnitDimsXferOpPatterns(patterns);
+}
+
 void transform::ApplyLowerBitCastPatternsOp::populatePatterns(
     RewritePatternSet &patterns) {
   vector::populateVectorBitCastLoweringPatterns(patterns);
diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
index 7f3be7f..94c5509 100644
--- a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
+++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
@@ -133,22 +133,23 @@ bool XeGPUDialect::isEvenlyDistributable(llvm::ArrayRef<int64_t> shape,
   };
 
   // check the sgLayout and sgData
-  auto maybeSgShape =
-      tryDistribute(shape, attr.getSgLayoutAsInt(), attr.getSgDataAsInt());
+  auto maybeSgShape = tryDistribute(shape, attr.getEffectiveSgLayoutAsInt(),
+                                    attr.getEffectiveSgDataAsInt());
   if (!maybeSgShape)
     return false;
   auto sgShape = maybeSgShape.value();
 
   // check InstData, it neither have layout nor need round-robin
   auto maybeInstShape =
-      tryDistribute(sgShape, {}, attr.getInstDataAsInt(), false);
+      tryDistribute(sgShape, {}, attr.getEffectiveInstDataAsInt(), false);
   if (!maybeInstShape)
     return false;
   auto instShape = maybeInstShape.value();
 
   // check LaneLayout and LaneData
-  auto maybeLaneShape = tryDistribute(instShape, attr.getLaneLayoutAsInt(),
-                                      attr.getLaneDataAsInt(), false);
+  auto maybeLaneShape =
+      tryDistribute(instShape, attr.getEffectiveLaneLayoutAsInt(),
+                    attr.getEffectiveLaneDataAsInt(), false);
   return maybeLaneShape.has_value();
 }
 
@@ -282,9 +283,10 @@ LayoutAttr::delinearizeSubgroupId(OpBuilder &builder, Location loc,
   if (!hasDefaultOrder())
     return mlir::emitError(loc, "order attribute is currently not supported.");
 
-  auto dims = llvm::map_to_vector(getSgLayoutAsInt(), [&](int64_t d) -> Value {
-    return builder.createOrFold<arith::ConstantIndexOp>(loc, d);
-  });
+  auto dims =
+      llvm::map_to_vector(getEffectiveSgLayoutAsInt(), [&](int64_t d) -> Value {
+        return builder.createOrFold<arith::ConstantIndexOp>(loc, d);
+      });
 
   return affine::delinearizeIndex(builder, loc, linearId, dims);
 }
@@ -298,8 +300,8 @@ LayoutAttr::getOffsets(OpBuilder &builder, Location loc, Value linearId,
   if (!isForWorkgroup())
     return failure();
 
-  SmallVector<int64_t> sgLayout = getSgLayoutAsInt();
-  SmallVector<int64_t> sgShape = getSgDataAsInt();
+  SmallVector<int64_t> sgLayout = getEffectiveSgLayoutAsInt();
+  SmallVector<int64_t> sgShape = getEffectiveSgDataAsInt();
   if (sgShape.empty()) {
     if (auto derivedShape = computeShapeRatio(shape, sgLayout))
       sgShape = derivedShape.value();
@@ -385,8 +387,8 @@ SliceAttr::getOffsets(OpBuilder &builder, Location loc, Value linearId,
   if (!isForWorkgroup())
     return failure();
 
-  SmallVector<int64_t> sgLayout = getSgLayoutAsInt();
-  SmallVector<int64_t> sgShape = getSgDataAsInt();
+  SmallVector<int64_t> sgLayout = getEffectiveSgLayoutAsInt();
+  SmallVector<int64_t> sgShape = getEffectiveSgDataAsInt();
   if (sgShape.empty()) {
     if (auto derivedShape = computeShapeRatio(shape, sgLayout))
       sgShape = derivedShape.value();
@@ -409,6 +411,26 @@ SliceAttr::getOffsets(OpBuilder &builder, Location loc, Value linearId,
                                   shape);
 }
 
+bool SliceAttr::isSliceOf(const xegpu::DistributeLayoutAttr &other) {
+  auto flattenedThis = flatten();
+  // If other is a LayoutAttr, just compare directly with parent of
+  // flattenedThis.
+  if (auto otherLayout = dyn_cast<xegpu::LayoutAttr>(other))
+    return flattenedThis.getParent() == otherLayout;
+  // If other is a SliceAttr, flatten it first before comparing.
+  auto flattenedOther = dyn_cast<xegpu::SliceAttr>(other).flatten();
+  // Both must have common parent LayoutAttr.
+  if (flattenedThis.getParent() != flattenedOther.getParent())
+    return false;
+  // otherFlattened's sliced dims must be a subset of flattenedThis's sliced
+  // dims.
+  llvm::SmallDenseSet<int64_t> thisDims(
+      flattenedThis.getDims().asArrayRef().begin(),
+      flattenedThis.getDims().asArrayRef().end());
+  return llvm::all_of(flattenedOther.getDims().asArrayRef(),
+                      [&](int64_t dim) { return thisDims.contains(dim); });
+}
+
 //===----------------------------------------------------------------------===//
 // XeGPU_RangeAttr
 //===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp
index 5d5ff69..7efa4b9 100644
--- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp
+++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp
@@ -85,16 +85,16 @@ struct ConvertLayoutOpPattern
   using OpRewritePattern::OpRewritePattern;
   LogicalResult matchAndRewrite(xegpu::ConvertLayoutOp op,
                                 PatternRewriter &rewriter) const override {
-    xegpu::DistributeLayoutAttr input_layout = op.getInputLayoutAttr();
-    xegpu::DistributeLayoutAttr target_layout = op.getTargetLayoutAttr();
-    if (input_layout.getInstDataAsInt().empty() ||
-        target_layout.getInstDataAsInt().empty())
+    xegpu::DistributeLayoutAttr inputLayout = op.getInputLayoutAttr();
+    xegpu::DistributeLayoutAttr targetLayout = op.getTargetLayoutAttr();
+    if (inputLayout.getEffectiveInstDataAsInt().empty() ||
+        targetLayout.getEffectiveInstDataAsInt().empty())
       return rewriter.notifyMatchFailure(op, "Not a target ConvertLayoutOp.");
 
-    input_layout = input_layout.dropInstData();
-    target_layout = target_layout.dropInstData();
+    inputLayout = inputLayout.dropInstData();
+    targetLayout = targetLayout.dropInstData();
     auto newOp = rewriter.createOrFold<xegpu::ConvertLayoutOp>(
-        op.getLoc(), op.getType(), op.getSource(), input_layout, target_layout);
+        op.getLoc(), op.getType(), op.getSource(), inputLayout, targetLayout);
     rewriter.replaceOp(op, newOp);
     return success();
   }
@@ -145,8 +145,8 @@ XeGPUBlockingPass::getTileShape(const T &operandOrResult) const {
   xegpu::DistributeLayoutAttr layout =
       xegpu::getDistributeLayoutAttr(operandOrResult);
   if (layout && layout.isForSubgroup()) {
-    if (!layout.getInstDataAsInt().empty())
-      return layout.getInstDataAsInt();
+    if (!layout.getEffectiveInstDataAsInt().empty())
+      return layout.getEffectiveInstDataAsInt();
 
     if (auto type = dyn_cast<ShapedType>(value.getType()))
       return llvm::to_vector(type.getShape());
@@ -226,7 +226,7 @@ bool XeGPUBlockingPass::needsUnroll(Operation *op) const {
     Type valTy = value.getType();
     if (auto tdescTy = dyn_cast<xegpu::TensorDescType>(valTy)) {
       xegpu::DistributeLayoutAttr layout = tdescTy.getLayoutAttr();
-      return layout && !layout.getInstDataAsInt().empty();
+      return layout && !layout.getEffectiveInstDataAsInt().empty();
     }
     auto shapedType = dyn_cast<ShapedType>(valTy);
     return shapedType && !llvm::equal(tileShape, shapedType.getShape());
diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp
index b336692..21c1583 100644
--- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp
+++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp
@@ -58,6 +58,12 @@ namespace {
 // SIMT Distribution Patterns
 //===----------------------------------------------------------------------===//
 
+/// In certain cases, we may need to favor XeGPU specific distribution patterns
+/// over generic vector distribution patterns. In such cases, we can assign
+/// priorities to patterns.
+static constexpr unsigned regularPatternBenefit = 1;
+static constexpr unsigned highPatternBenefit = 2;
+
 /// Helper function to get  distributed vector type for a source vector type
 /// according to the lane_layout. We simply divide each dimension of tensor
 /// descriptor shape by corresponding lane_layout dimension. If
@@ -72,27 +78,31 @@ namespace {
 /// | 32x16                 | [2, 8]      | 16x2                     |
 /// | 2x32x16               | [1, 16]     | 2x32x1                   |
 static FailureOr<VectorType>
-getDistVecTypeBasedOnLaneLayout(xegpu::LayoutAttr layout,
+getDistVecTypeBasedOnLaneLayout(xegpu::DistributeLayoutAttr layout,
                                 VectorType originalType) {
   if (!layout)
     return failure();
-
-  auto laneLayout = layout.getLaneLayout().asArrayRef();
-  assert(originalType.getShape().size() >= laneLayout.size() &&
+  assert((isa<xegpu::LayoutAttr>(layout) || isa<xegpu::SliceAttr>(layout)) &&
+         "Expecting a valid layout.");
+  SmallVector<int64_t> effectiveLaneLayout =
+      layout.getEffectiveLaneLayoutAsInt();
+  assert(static_cast<size_t>(originalType.getRank()) >=
+             effectiveLaneLayout.size() &&
          "Rank of the original vector type should be greater or equal to the "
          "size of the lane layout to distribute the vector type.");
   SmallVector<int64_t> distributedShape(originalType.getShape());
   // Only distribute the last `laneLayout.size()` dimensions. The remaining
   // dimensions are not distributed.
-  unsigned distributionStart = originalType.getRank() - laneLayout.size();
+  unsigned distributionStart =
+      originalType.getRank() - effectiveLaneLayout.size();
   for (auto [i, dim] : llvm::enumerate(originalType.getShape())) {
     if (i < distributionStart)
       continue;
 
     // Check if the dimension can be distributed evenly.
-    if (dim % laneLayout[i - distributionStart] != 0)
+    if (dim % effectiveLaneLayout[i - distributionStart] != 0)
       return failure();
-    distributedShape[i] = dim / laneLayout[i - distributionStart];
+    distributedShape[i] = dim / effectiveLaneLayout[i - distributionStart];
   }
   return VectorType::get(distributedShape, originalType.getElementType());
 }
@@ -1001,12 +1011,282 @@ struct LoadDistribution final : public gpu::WarpDistributionPattern {
   }
 };
 
+/// Helper to rewrite a 2D VectorMultiReductionOp into a sequence of 1D
+/// VectorReductionOps.
+static Value lowerToVectorReductions(TypedValue<VectorType> src,
+                                     TypedValue<VectorType> acc,
+                                     vector::CombiningKind kind,
+                                     int64_t reductionDim, Location loc,
+                                     PatternRewriter &rewriter) {
+  // Expecting a 2D source vector.
+  assert(src.getType().getRank() == 2 && "expected a 2D source vector");
+  VectorType sourceType = src.getType();
+  int64_t sourceH = sourceType.getShape()[0];
+  int64_t sourceW = sourceType.getShape()[1];
+  int nSlices = (reductionDim == 0) ? sourceW : sourceH;
+  // Create a constant vector to hold the result of the reduction.
+  TypedAttr zeroAttr = rewriter.getZeroAttr(sourceType.getElementType());
+  Value reductionResult = arith::ConstantOp::create(
+      rewriter, loc, acc.getType(),
+      DenseElementsAttr::get(acc.getType(), zeroAttr));
+  // For each slice of the source, extract the slice vector, do a reduction
+  // and, insert the reduced value back to the result vector.
+  for (int i = 0; i < nSlices; ++i) {
+    SmallVector<int64_t, 2> sliceOffsets, sliceSizes;
+    if (reductionDim == 1) {
+      sliceOffsets = {i, 0};
+      sliceSizes = {1, sourceW};
+    } else {
+      sliceOffsets = {0, i};
+      sliceSizes = {sourceH, 1};
+    }
+    vector::ExtractStridedSliceOp extractOp =
+        vector::ExtractStridedSliceOp::create(rewriter, loc, src, sliceOffsets,
+                                              sliceSizes, {1, 1});
+    int64_t nSliceElements = extractOp.getResult().getType().getNumElements();
+    Value slice = vector::ShapeCastOp::create(
+        rewriter, loc,
+        VectorType::get({nSliceElements}, sourceType.getElementType()),
+        extractOp.getResult());
+    Value accExtract = vector::ExtractOp::create(rewriter, loc, acc, i);
+    Value reduction =
+        vector::ReductionOp::create(rewriter, loc, kind, slice, accExtract);
+    reductionResult =
+        vector::InsertOp::create(rewriter, loc, reduction, reductionResult, i);
+  }
+  return reductionResult;
+}
+
+/// This patterns distribute the `vector.multi_reduction` operation across
+/// lanes in a warp. Currently only 2D to 1D reductions are supported. Given
+/// layouts for the source and accumulator vectors,
+/// * If the reduction dimension is distributed across lanes, the reduction is
+///   non-lane-local and the reduction is done using warp shuffles. Here we
+///   simply rewrite the MultiDimReductionOp to a sequence of ReductionOps in
+///   the warp op body.
+/// * If the reduction dimension is not distributed across lanes, the reduction
+///   is lane-local. In this case, we yield the source and accumulator vectors
+///   from the warp op and perform the lane-local reduction outside the warp op
+///   using a sequence of ReductionOps.
+/// Example 1 (Reduction is lane-local):
+/// ```
+/// %r = gpu.warp_execute_on_lane_0(%laneid)[32] -> (vector<1xf32>) {
+///   %0 = "some_def"() : () -> (vector<16x32xf32>)
+///   %acc = "some_def"() : () -> (vector<32xf32>)
+///   %1 = vector.multi_reduction <add>, %0, %acc [0] : vector<16x32xf32> to
+///   vector<32xf32> gpu.yield %1 : vector<32xf32>
+/// }
+/// ```
+/// is lowered to:
+/// ```
+/// %r:2 = gpu.warp_execute_on_lane_0(%laneid)[32] -> (vector<16x1xf32>,
+/// vector<1xf32>) {
+///   %0 = "some_def"() : () -> (vector<16x32xf32>)
+///   %acc = "some_def"() : () -> (vector<32xf32>)
+///   gpu.yield %0, %acc : vector<16x32xf32>, vector<32xf32>
+/// }
+/// %c = arith.constant dense<0.0> : vector<1xf32>
+/// %1 = vector.shape_cast %r#0 : vector<16x1xf32> to vector<16xf32>
+/// %2 = vector.reduction <add>, %1, %r#1 : vector<16xf32> to f32
+/// %3 = vector.insert %2, %c[0] : f32 into vector<1xf32>
+/// ```
+/// Example 2 (Reduction is non-lane-local):
+/// ```
+/// %r = gpu.warp_execute_on_lane_0(%laneid)[32] -> (vector<2xf32>) {
+///   %0 = "some_def"() : () -> (vector<2x32xf32>)
+///   %acc = "some_def"() : () -> (vector<2xf32>)
+///   %1 = vector.multi_reduction <add>, %0, %acc [1] : vector<2x32xf32> to
+///   vector<2xf32>
+///   gpu.yield %1 : vector<2xf32>
+/// }
+/// ```
+/// is lowered to:
+/// ```
+/// %r = gpu.warp_execute_on_lane_0(%laneid)[32] -> (vector<2xf32>) {
+///   %0 = "some_def"() : () -> (vector<2x32xf32>)
+///   %acc = "some_def"() : () -> (vector<2xf32>)
+///   %1 = arith.constant dense<0.0> : vector<2xf32>
+///   %2 = vector.extract %0[0] : vector<32xf32> from <vector<2x32xf32>>
+///   %3 = ("warp.reduction %2") : f32
+///   %4 = vector.insert %3, %1[0] : f32 into vector<2xf32>
+///   ... repeat for row 1
+///   gpu.yield %1 : vector<2xf32>
+/// }
+struct VectorMultiReductionDistribution : public gpu::WarpDistributionPattern {
+  using gpu::WarpDistributionPattern::WarpDistributionPattern;
+  LogicalResult matchAndRewrite(gpu::WarpExecuteOnLane0Op warpOp,
+                                PatternRewriter &rewriter) const override {
+    OpOperand *yieldOperand =
+        getWarpResult(warpOp, llvm::IsaPred<vector::MultiDimReductionOp>);
+    if (!yieldOperand)
+      return failure();
+    auto reductionOp =
+        cast<vector::MultiDimReductionOp>(yieldOperand->get().getDefiningOp());
+    unsigned operandNumber = yieldOperand->getOperandNumber();
+    VectorType sourceType = reductionOp.getSourceVectorType();
+    // Only 2D vectors are supported.
+    if (sourceType.getRank() != 2)
+      return rewriter.notifyMatchFailure(warpOp,
+                                         "Only 2D reductions are supported.");
+    ArrayRef<int64_t> reductionDims = reductionOp.getReductionDims();
+    // Only 1 reduction dimension supported. This also ensures that the result
+    // is vector type.
+    if (reductionDims.size() != 1)
+      return rewriter.notifyMatchFailure(
+          warpOp, "Only 1 reduction dimension is supported.");
+    int64_t reductionDim = reductionDims[0];
+    VectorType distributedResultType =
+        cast<VectorType>(warpOp.getResult(operandNumber).getType());
+    VectorType resultType = cast<VectorType>(reductionOp.getType());
+    xegpu::DistributeLayoutAttr sourceLayout =
+        xegpu::getDistributeLayoutAttr(reductionOp.getSource());
+
+    FailureOr<VectorType> sourceDistTypeOrFailure =
+        getDistVecTypeBasedOnLaneLayout(sourceLayout, sourceType);
+    if (failed(sourceDistTypeOrFailure))
+      return rewriter.notifyMatchFailure(
+          warpOp, "Failed to distribute the source vector type.");
+    VectorType sourceDistType = sourceDistTypeOrFailure.value();
+    // Only single dimension distribution is supported.
+    bool dim0Distributed =
+        sourceDistType.getShape()[0] != sourceType.getShape()[0];
+    bool dim1Distributed =
+        sourceDistType.getShape()[1] != sourceType.getShape()[1];
+    if (dim0Distributed && dim1Distributed)
+      return rewriter.notifyMatchFailure(
+          warpOp, "Expecting source to be distributed in a single dimension.");
+    int64_t sourceDistDim = dim0Distributed ? 0 : (dim1Distributed ? 1 : -1);
+    if (sourceDistDim == -1)
+      return rewriter.notifyMatchFailure(
+          warpOp, "Expecting a distributed source vector.");
+    bool resultDistributed =
+        distributedResultType.getNumElements() < resultType.getNumElements();
+    // If the lane owns all the data required for reduction (i.e. reduction is
+    // fully parallel accross lanes), then each lane owns part of the result
+    // (i.e. result is distributed). If the reduction require cross-lane
+    // shuffling, then the result is shared among all lanes (broadcasted).
+    // Therefore we expect following cases:
+    //
+    // | Source vector        | Reduction dim  | Result vector  |
+    // |----------------------|----------------|----------------|
+    // |  dim-0 distributed   |       0        | broadcasted    |
+    // |  dim-0 distributed   |       1        | distributed    |
+    // |  dim-1 distributed   |       0        | distributed    |
+    // |  dim-1 distributed   |       1        | broadcasted    |
+
+    bool isReductionLaneLocal = (sourceDistDim == 0 && reductionDim == 1) ||
+                                (sourceDistDim == 1 && reductionDim == 0);
+    if (isReductionLaneLocal && !resultDistributed)
+      return rewriter.notifyMatchFailure(
+          warpOp, "Expecting a distributed result for lane-local reduction.");
+
+    if (!isReductionLaneLocal && resultDistributed)
+      return rewriter.notifyMatchFailure(
+          warpOp,
+          "Expecting a broadcasted result for non-lane-local reduction.");
+
+    // Handle lane-local reduction case. In this case we fully distribute the
+    // reduction result.
+    if (isReductionLaneLocal) {
+      // Yield the source and acc vectors from the WarpOp.
+      SmallVector<size_t> newRetIndices;
+      auto newWarpOp = moveRegionToNewWarpOpAndAppendReturns(
+          rewriter, warpOp, {reductionOp.getSource(), reductionOp.getAcc()},
+          {sourceDistType, distributedResultType}, newRetIndices);
+      rewriter.setInsertionPointAfter(newWarpOp);
+      Value result = lowerToVectorReductions(
+          cast<TypedValue<VectorType>>(newWarpOp->getResult(newRetIndices[0])),
+          cast<TypedValue<VectorType>>(newWarpOp->getResult(newRetIndices[1])),
+          reductionOp.getKind(), reductionDim, reductionOp.getLoc(), rewriter);
+      // Replace the warp op result with the final result.
+      rewriter.replaceAllUsesWith(reductionOp.getResult(), result);
+      return success();
+    }
+    // For non-lane-local case, we simply rewrite the MultiReductionOp in terms
+    // of multiple ReductionOps. Actual distribution is done by the
+    // WarpOpReduction pattern.
+    rewriter.setInsertionPointAfter(reductionOp);
+    Value result = lowerToVectorReductions(
+        cast<TypedValue<VectorType>>(reductionOp.getSource()),
+        cast<TypedValue<VectorType>>(reductionOp.getAcc()),
+        reductionOp.getKind(), reductionDim, reductionOp.getLoc(), rewriter);
+    // Replace the warp op result with the final result.
+    rewriter.replaceAllUsesWith(reductionOp.getResult(), result);
+    return success();
+  }
+};
+
+/// Distribute a `vector.shape_cast` op feeding into yield op of an enclosing
+/// `gpu.warp_execute_on_lane_0` region.
+struct VectorShapeCastDistribution : public gpu::WarpDistributionPattern {
+  using gpu::WarpDistributionPattern::WarpDistributionPattern;
+  LogicalResult matchAndRewrite(gpu::WarpExecuteOnLane0Op warpOp,
+                                PatternRewriter &rewriter) const override {
+    OpOperand *yieldOperand =
+        getWarpResult(warpOp, llvm::IsaPred<vector::ShapeCastOp>);
+    if (!yieldOperand)
+      return failure();
+    auto shapeCastOp =
+        cast<vector::ShapeCastOp>(yieldOperand->get().getDefiningOp());
+    unsigned operandNumber = yieldOperand->getOperandNumber();
+    auto resultDistTy =
+        cast<VectorType>(warpOp.getResult(operandNumber).getType());
+    xegpu::DistributeLayoutAttr sourceLayout =
+        xegpu::getDistributeLayoutAttr(shapeCastOp.getSource());
+    xegpu::DistributeLayoutAttr resultLayout =
+        xegpu::getDistributeLayoutAttr(shapeCastOp.getResult());
+    if (!sourceLayout || !resultLayout)
+      return rewriter.notifyMatchFailure(
+          warpOp,
+          "the source or result of shape_cast op lacks distribution layout");
+
+    // For rank reducing or increasing shape_cast ops, the lower rank layout
+    // must be a slice of higher rank layout.
+    int64_t sourceRank = shapeCastOp.getSourceVectorType().getRank();
+    int64_t resultRank = shapeCastOp.getResultVectorType().getRank();
+    if (sourceRank < resultRank && !sourceLayout.isSliceOf(resultLayout))
+      return rewriter.notifyMatchFailure(
+          warpOp, "shape_cast is rank reducing but source layout is not a "
+                  "slice of result layout");
+    if (sourceRank > resultRank && !resultLayout.isSliceOf(sourceLayout))
+      return rewriter.notifyMatchFailure(
+          warpOp, "shape_cast is rank increasing but result layout is not a "
+                  "slice of source layout");
+
+    FailureOr<VectorType> sourceDistTypeOrFailure =
+        getDistVecTypeBasedOnLaneLayout(sourceLayout,
+                                        shapeCastOp.getSourceVectorType());
+    if (failed(sourceDistTypeOrFailure))
+      return rewriter.notifyMatchFailure(
+          warpOp, "failed to get distributed vector type for source");
+    VectorType sourceDistType = sourceDistTypeOrFailure.value();
+    // Create a new warp op that yields the source of the shape_cast op.
+    SmallVector<size_t> newRetIndices;
+    auto newWarpOp = moveRegionToNewWarpOpAndAppendReturns(
+        rewriter, warpOp, {shapeCastOp.getSource()}, {sourceDistType},
+        newRetIndices);
+    rewriter.setInsertionPointAfter(newWarpOp);
+    Value source = newWarpOp.getResult(newRetIndices[0]);
+    // Create a new shape_cast op outside the warp op.
+    Value newShapeCast = vector::ShapeCastOp::create(
+        rewriter, shapeCastOp.getLoc(), resultDistTy, source);
+    rewriter.replaceAllUsesWith(newWarpOp.getResult(operandNumber),
+                                newShapeCast);
+    return success();
+  }
+};
+
 } // namespace
 
 namespace {
 struct XeGPUSubgroupDistributePass final
     : public xegpu::impl::XeGPUSubgroupDistributeBase<
           XeGPUSubgroupDistributePass> {
+  XeGPUSubgroupDistributePass() = default;
+  XeGPUSubgroupDistributePass(const XeGPUSubgroupDistributePass &other) =
+      default;
+  XeGPUSubgroupDistributePass(xegpu::XeGPUSubgroupDistributeOptions options)
+      : XeGPUSubgroupDistributeBase(options) {}
   void runOnOperation() override;
 };
 } // namespace
@@ -1016,8 +1296,13 @@ void xegpu::populateXeGPUSubgroupDistributePatterns(
   patterns
       .add<CreateNdDescDistribution, StoreNdDistribution, LoadNdDistribution,
            DpasDistribution, PrefetchNdDistribution, UpdateNdOffsetDistribution,
-           GpuBarrierDistribution, LoadDistribution, StoreDistribution>(
-          patterns.getContext());
+           GpuBarrierDistribution, VectorMultiReductionDistribution,
+           LoadDistribution, StoreDistribution>(
+          patterns.getContext(),
+          /*pattern benefit=*/regularPatternBenefit);
+  patterns.add<VectorShapeCastDistribution>(
+      patterns.getContext(),
+      /*pattern benefit=*/highPatternBenefit);
 }
 
 void XeGPUSubgroupDistributePass::runOnOperation() {
@@ -1032,8 +1317,7 @@ void XeGPUSubgroupDistributePass::runOnOperation() {
       if (!isa<VectorType>(operand.get().getType()))
         continue;
 
-      auto layout =
-          xegpu::getDistributeLayoutAttrOfType<xegpu::LayoutAttr>(operand);
+      auto layout = xegpu::getDistributeLayoutAttr(operand.get());
       if (!layout) {
         op->emitError("Could not find layout attribute for operand ")
             << operand.getOperandNumber() << " of operation " << op->getName();
@@ -1074,18 +1358,15 @@ void XeGPUSubgroupDistributePass::runOnOperation() {
     if (vecRank == 0)
       return AffineMap::get(val.getContext());
     // Get the layout of the vector type.
-    // TODO: support more layout types
-    auto layout = xegpu::getDistributeLayoutAttrOfType<xegpu::LayoutAttr>(val);
+    xegpu::DistributeLayoutAttr layout = xegpu::getDistributeLayoutAttr(val);
     // If no layout is specified, assume the inner most dimension is distributed
     // for now.
     if (!layout)
       return AffineMap::getMultiDimMapWithTargets(
           vecRank, {static_cast<unsigned int>(vecRank - 1)}, val.getContext());
     SmallVector<unsigned int> distributedDims;
-    // Get the distributed dimensions based on the layout.
-    ArrayRef<int> laneLayout = layout.getLaneLayout().asArrayRef();
-    for (unsigned i = 0; i < laneLayout.size(); ++i) {
-      if (laneLayout[i] > 1)
+    for (auto [i, v] : llvm::enumerate(layout.getEffectiveLaneLayoutAsInt())) {
+      if (v > 1)
         distributedDims.push_back(i);
     }
     return AffineMap::getMultiDimMapWithTargets(vecRank, distributedDims,
@@ -1094,8 +1375,32 @@ void XeGPUSubgroupDistributePass::runOnOperation() {
   // TODO: shuffleFn is not used.
   auto shuffleFn = [](Location loc, OpBuilder &builder, Value val, Value srcIdx,
                       int64_t warpSz) { return Value(); };
+
+  auto warpReduction = [](Location loc, OpBuilder &builder, Value input,
+                          vector::CombiningKind kind, uint32_t size) {
+    // First reduce on a single thread to get per lane reduction value.
+    Value laneVal = builder.create<vector::ReductionOp>(loc, kind, input);
+    // Parallel reduction using butterfly shuffles.
+    for (uint64_t i = 1; i < size; i <<= 1) {
+      Value shuffled =
+          builder
+              .create<gpu::ShuffleOp>(loc, laneVal, i,
+                                      /*width=*/size,
+                                      /*mode=*/gpu::ShuffleMode::XOR)
+              .getShuffleResult();
+      laneVal = makeArithReduction(builder, loc, kind, laneVal, shuffled);
+    }
+    return laneVal;
+  };
+
+  if (enableSGReductions)
+    vector::populateDistributeReduction(
+        patterns, warpReduction,
+        /*pattern benefit=*/regularPatternBenefit);
+
   vector::populatePropagateWarpVectorDistributionPatterns(
-      patterns, distributionFn, shuffleFn);
+      patterns, distributionFn, shuffleFn,
+      /*pattern benefit=*/regularPatternBenefit);
   if (failed(applyPatternsGreedily(getOperation(), std::move(patterns)))) {
     signalPassFailure();
     return;
diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp
index 5d0f1d1..d7592fe 100644
--- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp
+++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp
@@ -52,9 +52,9 @@ getSgShapeAndCount(ArrayRef<int64_t> shape,
   int count = 1;
   SmallVector<int64_t> sgShape(shape);
   if (layout && layout.isForWorkgroup()) {
-    SmallVector<int64_t> sgLayout = layout.getSgLayoutAsInt();
-    if (!layout.getSgDataAsInt().empty())
-      sgShape = layout.getSgDataAsInt();
+    SmallVector<int64_t> sgLayout = layout.getEffectiveSgLayoutAsInt();
+    if (!layout.getEffectiveSgDataAsInt().empty())
+      sgShape = layout.getEffectiveSgDataAsInt();
     else if (auto maybeDerivedSgData = computeShapeRatio(shape, sgLayout))
       sgShape = *maybeDerivedSgData;
     SmallVector<int64_t> distUnit = computeElementwiseMul(sgLayout, sgShape);
@@ -468,6 +468,7 @@ struct WgToSgVectorBroadcastOp
   LogicalResult
   matchAndRewrite(vector::BroadcastOp op, OneToNOpAdaptor adaptor,
                   ConversionPatternRewriter &rewriter) const override {
+
     VectorType resultType = op.getResult().getType();
     ArrayRef<int64_t> wgShape = resultType.getShape();
 
@@ -476,43 +477,24 @@ struct WgToSgVectorBroadcastOp
     if (!layout || !layout.isForWorkgroup())
       return failure();
 
-    // TODO: Currently only supports cases where the source and result ranks
-    // are the same.
-    auto srcType =
-        dyn_cast<VectorType>(adaptor.getOperands().front()[0].getType());
-    if (!srcType || srcType.getRank() != resultType.getRank())
-      return failure();
-
     SmallVector<int64_t> sgShape = getSgShapeAndCount(wgShape, layout).first;
     VectorType newResultType =
         VectorType::get(sgShape, resultType.getElementType());
 
-    // Check if the output layout is distributable
-    SmallVector<int64_t> sgLayout = layout.getSgLayoutAsInt();
-    if (sgLayout.empty())
-      return failure();
-
     if (!xegpu::XeGPUDialect::isEvenlyDistributable(wgShape, layout))
       return failure();
 
-    // Check if the srcShape has unit dim in dimensions being broadcasted,
-    // and the other dimensions are the same as the destination type
-    // TODO: Generalize it
-    auto srcShape = srcType.getShape();
-    for (size_t i = 0; i < srcShape.size(); ++i) {
-      if (srcShape[i] != 1 && srcShape[i] != sgShape[i])
-        return failure();
-    }
-
     SmallVector<Value> newBroadcastOps;
     for (auto operand : adaptor.getOperands().front()) {
       auto newBroadcast = vector::BroadcastOp::create(rewriter, op.getLoc(),
                                                       newResultType, operand);
-      xegpu::setDistributeLayoutAttr(newBroadcast->getResult(0),
-                                     layout.dropSgLayoutAndData());
+      if (!layout.getEffectiveLaneLayoutAsInt().empty() ||
+          !layout.getEffectiveInstDataAsInt().empty())
+        xegpu::setDistributeLayoutAttr(newBroadcast->getResult(0),
+                                       layout.dropSgLayoutAndData());
+
       newBroadcastOps.push_back(newBroadcast.getResult());
     }
-
     rewriter.replaceOpWithMultiple(op, {newBroadcastOps});
     return success();
   }
@@ -564,9 +546,11 @@ struct WgToSgElementwiseOp : public ConversionPattern {
       // Copy all attributes, but update "layout_result_0" to drop
       // sgLayout/sgData
       for (auto attr : op->getAttrs()) {
-        if (auto layout = dyn_cast<xegpu::LayoutAttr>(attr.getValue())) {
-          if (auto newLayout = layout.dropSgLayoutAndData())
-            state.addAttribute(attr.getName(), newLayout);
+        if (auto layout =
+                dyn_cast<xegpu::DistributeLayoutAttr>(attr.getValue())) {
+          if (!layout.getEffectiveLaneLayoutAsInt().empty() ||
+              !layout.getEffectiveInstDataAsInt().empty())
+            state.addAttribute(attr.getName(), layout.dropSgLayoutAndData());
         } else {
           state.addAttribute(attr.getName(), attr.getValue());
         }
@@ -757,8 +741,10 @@ struct WgToSgArithConstantOp : public OpConversionPattern<arith::ConstantOp> {
     auto sgAttr = DenseElementsAttr::get(newType, singleVal);
     auto cstOp =
         arith::ConstantOp::create(rewriter, op.getLoc(), newType, sgAttr);
-    if (auto newLayout = layout.dropSgLayoutAndData())
-      xegpu::setDistributeLayoutAttr(cstOp->getResult(0), newLayout);
+    if (!layout.getEffectiveLaneLayoutAsInt().empty() ||
+        !layout.getEffectiveInstDataAsInt().empty())
+      xegpu::setDistributeLayoutAttr(cstOp->getResult(0),
+                                     layout.dropSgLayoutAndData());
     SmallVector<Value> newConsts(count, cstOp);
 
     rewriter.replaceOpWithMultiple(op, {newConsts});
@@ -919,6 +905,128 @@ struct WgToSgStoreMatrixOp : public OpConversionPattern<xegpu::StoreMatrixOp> {
   }
 };
 
+// This pattern distributes the vector.step ops to work at subgroup level
+struct WgToSgVectorStepOp : public OpConversionPattern<vector::StepOp> {
+  using OpConversionPattern<vector::StepOp>::OpConversionPattern;
+  LogicalResult
+  matchAndRewrite(vector::StepOp op, OneToNOpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
+    xegpu::DistributeLayoutAttr layout =
+        xegpu::getDistributeLayoutAttr(op.getResult());
+    if (!layout || !layout.isForWorkgroup())
+      return failure();
+
+    Location loc = op.getLoc();
+    VectorType type = op.getResult().getType();
+    auto wgShape = type.getShape();
+    std::optional<SmallVector<int64_t>> sgShape =
+        getSgShapeAndCount(wgShape, layout).first;
+    if (!sgShape)
+      return failure();
+
+    Value sgId =
+        gpu::SubgroupIdOp::create(rewriter, loc, /*upper_bound=*/nullptr);
+    auto sgOffsets = layout.getOffsets(rewriter, loc, sgId, wgShape);
+    if (failed(sgOffsets))
+      return failure();
+
+    VectorType newTy = type.cloneWith(*sgShape, type.getElementType());
+    auto steps = vector::StepOp::create(rewriter, loc, newTy);
+    SmallVector<Value> newOps;
+    for (auto offsets : *sgOffsets) {
+      // Broadcast the offset scalar to a vector & add to the base steps
+      auto bcastOffset =
+          vector::BroadcastOp::create(rewriter, loc, newTy, offsets[0]);
+      auto finalSteps =
+          arith::AddIOp::create(rewriter, loc, steps, bcastOffset);
+      if (!layout.getEffectiveLaneLayoutAsInt().empty() ||
+          !layout.getEffectiveInstDataAsInt().empty()) {
+        xegpu::setDistributeLayoutAttr(steps->getResult(0),
+                                       layout.dropSgLayoutAndData());
+        xegpu::setDistributeLayoutAttr(bcastOffset->getResult(0),
+                                       layout.dropSgLayoutAndData());
+        xegpu::setDistributeLayoutAttr(finalSteps->getResult(0),
+                                       layout.dropSgLayoutAndData());
+      }
+      newOps.push_back(finalSteps);
+    }
+
+    rewriter.replaceOpWithMultiple(op, {newOps});
+    return success();
+  }
+};
+
+// This pattern transforms vector.shape_cast ops to work at subgroup level.
+struct WgToSgVectorShapeCastOp
+    : public OpConversionPattern<vector::ShapeCastOp> {
+  using OpConversionPattern<vector::ShapeCastOp>::OpConversionPattern;
+
+  LogicalResult
+  matchAndRewrite(vector::ShapeCastOp op, OneToNOpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
+
+    VectorType resultType = dyn_cast<VectorType>(op.getResult().getType());
+    if (!resultType)
+      return failure();
+
+    ArrayRef<int64_t> wgShape = resultType.getShape();
+    xegpu::DistributeLayoutAttr layout =
+        xegpu::getDistributeLayoutAttr(op.getResult());
+    if (!layout || !layout.isForWorkgroup())
+      return failure();
+
+    SmallVector<int64_t> sgShape = getSgShapeAndCount(wgShape, layout).first;
+    VectorType newResultType =
+        VectorType::get(sgShape, resultType.getElementType());
+
+    // TODO: Add check for compatible layouts in layout attr.
+    auto srcType = dyn_cast<VectorType>(adaptor.getSource()[0].getType());
+    if (!srcType)
+      return failure();
+
+    // Check that shape_cast only adds/removes unit dimensions,
+    auto onlyUnitDims = [](ArrayRef<int64_t> src, ArrayRef<int64_t> dst) {
+      // Remove all 1s from both shapes and compare the rest.
+      SmallVector<int64_t> srcNonUnit, dstNonUnit;
+      for (int64_t d : src)
+        if (d != 1)
+          srcNonUnit.push_back(d);
+      for (int64_t d : dst)
+        if (d != 1)
+          dstNonUnit.push_back(d);
+      return srcNonUnit == dstNonUnit;
+    };
+
+    if (!onlyUnitDims(srcType.getShape(), sgShape))
+      return failure();
+
+    // For rank reducing or increasing shape_cast ops, the lower rank layout
+    // must be a slice of higher rank layout.
+    int64_t sourceRank = srcType.getRank();
+    int64_t resultRank = sgShape.size();
+    xegpu::DistributeLayoutAttr sourceLayout =
+        xegpu::getDistributeLayoutAttr(op.getSource());
+    if (sourceRank < resultRank && !sourceLayout.isSliceOf(layout))
+      return failure();
+    if (sourceRank > resultRank && !layout.isSliceOf(sourceLayout))
+      return failure();
+
+    SmallVector<Value> newShapeCastOps;
+    for (auto src : adaptor.getSource()) {
+      auto newShapeCast =
+          rewriter.create<vector::ShapeCastOp>(op.getLoc(), newResultType, src);
+      if (!layout.getEffectiveLaneLayoutAsInt().empty() ||
+          !layout.getEffectiveInstDataAsInt().empty())
+        xegpu::setDistributeLayoutAttr(newShapeCast->getResult(0),
+                                       layout.dropSgLayoutAndData());
+      newShapeCastOps.push_back(newShapeCast.getResult());
+    }
+
+    rewriter.replaceOpWithMultiple(op, {newShapeCastOps});
+    return success();
+  }
+};
+
 } // namespace
 
 namespace mlir {
@@ -932,7 +1040,8 @@ void populateXeGPUWgToSgDistributePatterns(RewritePatternSet &patterns) {
            WgToSgElementwiseOp, WgToSgVectorBroadcastOp, WgToSgConvertLayoutOp,
            WgToSgArithConstantOp, WgToSgLoadGatherOpWithOffset,
            WgToSgStoreScatterOpWithOffset, WgToSgLoadMatrixOp,
-           WgToSgStoreMatrixOp>(patterns.getContext());
+           WgToSgStoreMatrixOp, WgToSgVectorStepOp, WgToSgVectorShapeCastOp>(
+          patterns.getContext());
 }
 } // namespace xegpu
 } // namespace mlir
@@ -1054,7 +1163,16 @@ void XeGPUWgToSgDistributePass::runOnOperation() {
         auto vecType = dyn_cast<VectorType>(op.getType());
         if (!vecType)
           return true;
-        return isLegal(xegpu::getDistributeLayoutAttr(op.getResult()));
+
+        auto layout = xegpu::getDistributeLayoutAttr(op.getResult());
+        return isLegal(layout);
+      });
+
+  target.addDynamicallyLegalOp<vector::ShapeCastOp, vector::StepOp>(
+      [=](Operation *op) -> bool {
+        // Check for either a SliceAttr or LayoutAttr on the result.
+        auto layout = xegpu::getDistributeLayoutAttr(op->getResult(0));
+        return isLegal(layout);
       });
 
   target.addDynamicallyLegalOp<xegpu::LoadGatherOp>(
diff --git a/mlir/lib/IR/Remarks.cpp b/mlir/lib/IR/Remarks.cpp
index 78c9644..a55f61a 100644
--- a/mlir/lib/IR/Remarks.cpp
+++ b/mlir/lib/IR/Remarks.cpp
@@ -248,17 +248,56 @@ RemarkEngine::initialize(std::unique_ptr<MLIRRemarkStreamerBase> streamer,
   return success();
 }
 
+/// Returns true if filter is already anchored like ^...$
+static bool isAnchored(llvm::StringRef s) {
+  s = s.trim();
+  return s.starts_with("^") && s.ends_with("$"); // note: startswith/endswith
+}
+
+/// Anchor the entire pattern so it matches the whole string.
+static std::string anchorWhole(llvm::StringRef filter) {
+  if (isAnchored(filter))
+    return filter.str();
+  return (llvm::Twine("^(") + filter + ")$").str();
+}
+
+/// Build a combined filter from cats.all and a category-specific pattern.
+/// If neither is present, return std::nullopt. Otherwise "(all|specific)"
+/// and anchor once. Also validate before returning.
+static std::optional<llvm::Regex>
+buildFilter(const mlir::remark::RemarkCategories &cats,
+            const std::optional<std::string> &specific) {
+  llvm::SmallVector<llvm::StringRef, 2> parts;
+  if (cats.all && !cats.all->empty())
+    parts.emplace_back(*cats.all);
+  if (specific && !specific->empty())
+    parts.emplace_back(*specific);
+
+  if (parts.empty())
+    return std::nullopt;
+
+  std::string joined = llvm::join(parts, "|");
+  std::string anchored = anchorWhole(joined);
+
+  llvm::Regex rx(anchored);
+  std::string err;
+  if (!rx.isValid(err))
+    return std::nullopt;
+
+  return std::make_optional<llvm::Regex>(std::move(rx));
+}
+
 RemarkEngine::RemarkEngine(bool printAsEmitRemarks,
                            const RemarkCategories &cats)
     : printAsEmitRemarks(printAsEmitRemarks) {
   if (cats.passed)
-    passedFilter = llvm::Regex(cats.passed.value());
+    passedFilter = buildFilter(cats, cats.passed);
   if (cats.missed)
-    missFilter = llvm::Regex(cats.missed.value());
+    missFilter = buildFilter(cats, cats.missed);
   if (cats.analysis)
-    analysisFilter = llvm::Regex(cats.analysis.value());
+    analysisFilter = buildFilter(cats, cats.analysis);
   if (cats.failed)
-    failedFilter = llvm::Regex(cats.failed.value());
+    failedFilter = buildFilter(cats, cats.failed);
 }
 
 llvm::LogicalResult mlir::remark::enableOptimizationRemarks(
diff --git a/mlir/lib/Interfaces/ParallelCombiningOpInterface.cpp b/mlir/lib/Interfaces/ParallelCombiningOpInterface.cpp
index 2b67035..30b8191 100644
--- a/mlir/lib/Interfaces/ParallelCombiningOpInterface.cpp
+++ b/mlir/lib/Interfaces/ParallelCombiningOpInterface.cpp
@@ -11,11 +11,11 @@
 using namespace mlir;
 
 //===----------------------------------------------------------------------===//
-// ParallelCombiningOpInterface
+// InParallelOpInterface (formerly ParallelCombiningOpInterface)
 //===----------------------------------------------------------------------===//
 
 // TODO: Single region single block interface on interfaces ?
-LogicalResult mlir::detail::verifyParallelCombiningOpInterface(Operation *op) {
+LogicalResult mlir::detail::verifyInParallelOpInterface(Operation *op) {
   if (op->getNumRegions() != 1)
     return op->emitError("expected single region op");
   if (!op->getRegion(0).hasOneBlock())
diff --git a/mlir/lib/Pass/Pass.cpp b/mlir/lib/Pass/Pass.cpp
index 7094c8e..521c7c6 100644
--- a/mlir/lib/Pass/Pass.cpp
+++ b/mlir/lib/Pass/Pass.cpp
@@ -21,11 +21,14 @@
 #include "llvm/ADT/Hashing.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/ScopeExit.h"
+#include "llvm/Support/DebugLog.h"
 #include "llvm/Support/Mutex.h"
 #include "llvm/Support/Signals.h"
 #include "llvm/Support/Threading.h"
 #include <optional>
 
+#define DEBUG_TYPE "pass-manager"
+
 using namespace mlir;
 using namespace mlir::detail;
 
@@ -242,6 +245,7 @@ LogicalResult OpPassManagerImpl::finalizePassList(MLIRContext *ctx) {
   };
 
   // Walk the pass list and merge adjacent adaptors.
+  LDBG(3) << "Merging adjacent adaptors in pass list";
   OpToOpPassAdaptor *lastAdaptor = nullptr;
   for (auto &pass : passes) {
     // Check to see if this pass is an adaptor.
@@ -249,18 +253,26 @@ LogicalResult OpPassManagerImpl::finalizePassList(MLIRContext *ctx) {
       // If it is the first adaptor in a possible chain, remember it and
       // continue.
       if (!lastAdaptor) {
+        LDBG(3) << "Found first adaptor in chain";
         lastAdaptor = currentAdaptor;
         continue;
       }
 
       // Otherwise, try to merge into the existing adaptor and delete the
       // current one. If merging fails, just remember this as the last adaptor.
-      if (succeeded(currentAdaptor->tryMergeInto(ctx, *lastAdaptor)))
+      LDBG(3) << "Attempting to merge adaptor with "
+              << currentAdaptor->getPassManagers().size()
+              << " managers into previous adaptor";
+      if (succeeded(currentAdaptor->tryMergeInto(ctx, *lastAdaptor))) {
+        LDBG(3) << "Successfully merged adaptors, removing current one";
         pass.reset();
-      else
+      } else {
+        LDBG(3) << "Failed to merge adaptors, keeping current as last";
         lastAdaptor = currentAdaptor;
+      }
     } else if (lastAdaptor) {
       // If this pass isn't an adaptor, finalize it and forget the last adaptor.
+      LDBG(3) << "Finalizing adaptor chain before non-adaptor pass";
       if (failed(finalizeAdaptor(lastAdaptor)))
         return failure();
       lastAdaptor = nullptr;
@@ -273,15 +285,26 @@ LogicalResult OpPassManagerImpl::finalizePassList(MLIRContext *ctx) {
 
   // Now that the adaptors have been merged, erase any empty slots corresponding
   // to the merged adaptors that were nulled-out in the loop above.
+  size_t beforeErase = passes.size();
   llvm::erase_if(passes, std::logical_not<std::unique_ptr<Pass>>());
+  if (beforeErase != passes.size()) {
+    LDBG(3) << "Removed " << (beforeErase - passes.size())
+            << " merged adaptor slots from pass list";
+  }
 
   // If this is a op-agnostic pass manager, there is nothing left to do.
   std::optional<OperationName> rawOpName = getOpName(*ctx);
-  if (!rawOpName)
+  if (!rawOpName) {
+    LDBG(3)
+        << "Op-agnostic pass manager, skipping operation-specific verification";
     return success();
+  }
 
   // Otherwise, verify that all of the passes are valid for the current
   // operation anchor.
+  LDBG(3) << "Verifying " << passes.size() << " passes for operation '"
+          << getOpAnchorName() << "'";
+
   std::optional<RegisteredOperationName> opName =
       rawOpName->getRegisteredInfo();
   for (std::unique_ptr<Pass> &pass : passes) {
@@ -292,6 +315,8 @@ LogicalResult OpPassManagerImpl::finalizePassList(MLIRContext *ctx) {
              << "'!";
     }
   }
+
+  LDBG(3) << "Pass list finalization completed successfully";
   return success();
 }
 
@@ -456,23 +481,45 @@ OpPassManager::Nesting OpPassManager::getNesting() { return impl->nesting; }
 
 LogicalResult OpPassManager::initialize(MLIRContext *context,
                                         unsigned newInitGeneration) {
-  if (impl->initializationGeneration == newInitGeneration)
+
+  if (impl->initializationGeneration == newInitGeneration) {
+    LDBG(2) << "Pass manager already initialized "
+            << "' (generation " << newInitGeneration << ") with " << size()
+            << " passes";
     return success();
+  }
+
+  LDBG(2) << "Initializing pass manager '" << getOpAnchorName()
+          << "' (generation " << newInitGeneration << ") with " << size()
+          << " passes";
   impl->initializationGeneration = newInitGeneration;
+
   for (Pass &pass : getPasses()) {
     // If this pass isn't an adaptor, directly initialize it.
     auto *adaptor = dyn_cast<OpToOpPassAdaptor>(&pass);
     if (!adaptor) {
-      if (failed(pass.initialize(context)))
+      LDBG(2) << "Initializing pass '" << pass.getName() << "'";
+      if (failed(pass.initialize(context))) {
+        LDBG(2) << "Failed to initialize pass '" << pass.getName() << "'";
         return failure();
+      }
       continue;
     }
 
     // Otherwise, initialize each of the adaptors pass managers.
+    LDBG(3) << "Initializing adaptor pass with "
+            << adaptor->getPassManagers().size() << " nested managers";
     for (OpPassManager &adaptorPM : adaptor->getPassManagers())
-      if (failed(adaptorPM.initialize(context, newInitGeneration)))
+      if (failed(adaptorPM.initialize(context, newInitGeneration))) {
+        LDBG(2) << "Failed to initialize nested pass manager";
         return failure();
+      }
   }
+
+  LDBG_OS([&](raw_ostream &os) {
+    os << "Pass manager initialization completed successfully: ";
+    printAsTextualPipeline(os, /*pretty=*/false);
+  });
   return success();
 }
 
@@ -499,16 +546,23 @@ llvm::hash_code OpPassManager::hash() {
 LogicalResult OpToOpPassAdaptor::run(Pass *pass, Operation *op,
                                      AnalysisManager am, bool verifyPasses,
                                      unsigned parentInitGeneration) {
+  LDBG() << "Running pass '" << pass->getName() << "' on operation '"
+         << OpWithFlags(op, OpPrintingFlags().skipRegions()) << "' at "
+         << op->getLoc();
+
   std::optional<RegisteredOperationName> opInfo = op->getRegisteredInfo();
-  if (!opInfo)
+  if (!opInfo) {
     return op->emitOpError()
            << "trying to schedule a pass on an unregistered operation";
-  if (!opInfo->hasTrait<OpTrait::IsIsolatedFromAbove>())
+  }
+  if (!opInfo->hasTrait<OpTrait::IsIsolatedFromAbove>()) {
     return op->emitOpError() << "trying to schedule a pass on an operation not "
                                 "marked as 'IsolatedFromAbove'";
-  if (!pass->canScheduleOn(*op->getName().getRegisteredInfo()))
+  }
+  if (!pass->canScheduleOn(*op->getName().getRegisteredInfo())) {
     return op->emitOpError()
            << "trying to schedule a pass on an unsupported operation";
+  }
 
   // Initialize the pass state with a callback for the pass to dynamically
   // execute a pipeline on the currently visited operation.
@@ -526,8 +580,10 @@ LogicalResult OpToOpPassAdaptor::run(Pass *pass, Operation *op,
         pipeline.getImpl().canScheduleOn(*op->getContext(), root->getName()));
 
     // Before running, finalize the passes held by the pipeline.
-    if (failed(pipeline.getImpl().finalizePassList(root->getContext())))
+    if (failed(pipeline.getImpl().finalizePassList(root->getContext()))) {
+      LDBG() << "Failed to finalize pass list for pipeline";
       return failure();
+    }
 
     // Initialize the user provided pipeline and execute the pipeline.
     if (failed(pipeline.initialize(root->getContext(), parentInitGeneration)))
@@ -599,6 +655,13 @@ LogicalResult OpToOpPassAdaptor::runPipeline(
     OpPassManager &pm, Operation *op, AnalysisManager am, bool verifyPasses,
     unsigned parentInitGeneration, PassInstrumentor *instrumentor,
     const PassInstrumentation::PipelineParentInfo *parentInfo) {
+  LDBG_OS([&](raw_ostream &os) {
+    os << "Running pipeline on operation '"
+       << OpWithFlags(op, OpPrintingFlags().skipRegions()) << "' with "
+       << pm.size() << " passes, verifyPasses=" << verifyPasses
+       << " pipeline: ";
+    pm.printAsTextualPipeline(os, /*pretty=*/false);
+  });
   assert((!instrumentor || parentInfo) &&
          "expected parent info if instrumentor is provided");
   auto scopeExit = llvm::make_scope_exit([&] {
@@ -615,9 +678,14 @@ LogicalResult OpToOpPassAdaptor::runPipeline(
                                     *parentInfo);
   }
 
-  for (Pass &pass : pm.getPasses())
-    if (failed(run(&pass, op, am, verifyPasses, parentInitGeneration)))
+  for (Pass &pass : pm.getPasses()) {
+    if (failed(run(&pass, op, am, verifyPasses, parentInitGeneration))) {
+      LDBG() << "Pipeline failed for pass '" << pass.getName()
+             << "' on operation '"
+             << OpWithFlags(op, OpPrintingFlags().skipRegions()) << "'";
       return failure();
+    }
+  }
 
   if (instrumentor) {
     instrumentor->runAfterPipeline(pm.getOpName(*op->getContext()),
@@ -630,9 +698,19 @@ LogicalResult OpToOpPassAdaptor::runPipeline(
 /// does not exist.
 static OpPassManager *
 findPassManagerWithAnchor(MutableArrayRef<OpPassManager> mgrs, StringRef name) {
+  LDBG(3) << "Looking for pass manager with anchor name '" << name << "' among "
+          << mgrs.size() << " managers";
+
   auto *it = llvm::find_if(
       mgrs, [&](OpPassManager &mgr) { return mgr.getOpAnchorName() == name; });
-  return it == mgrs.end() ? nullptr : &*it;
+
+  if (it == mgrs.end()) {
+    LDBG(2) << "No pass manager found with anchor name '" << name << "'";
+    return nullptr;
+  }
+
+  LDBG(2) << "Found pass manager with anchor name '" << name << "'";
+  return &*it;
 }
 
 /// Find an operation pass manager that can operate on an operation of the given
@@ -640,10 +718,22 @@ findPassManagerWithAnchor(MutableArrayRef<OpPassManager> mgrs, StringRef name) {
 static OpPassManager *findPassManagerFor(MutableArrayRef<OpPassManager> mgrs,
                                          OperationName name,
                                          MLIRContext &context) {
+  LDBG(4) << "Looking for pass manager that can handle operation '" << name
+          << "' among " << mgrs.size() << " managers";
+
   auto *it = llvm::find_if(mgrs, [&](OpPassManager &mgr) {
     return mgr.getImpl().canScheduleOn(context, name);
   });
-  return it == mgrs.end() ? nullptr : &*it;
+
+  if (it == mgrs.end()) {
+    LDBG(4) << "No pass manager found that can handle operation '" << name
+            << "'";
+    return nullptr;
+  }
+
+  LDBG(4) << "Found pass manager '" << it->getOpAnchorName()
+          << "' that can handle operation '" << name << "'";
+  return &*it;
 }
 
 OpToOpPassAdaptor::OpToOpPassAdaptor(OpPassManager &&mgr) {
@@ -657,6 +747,9 @@ void OpToOpPassAdaptor::getDependentDialects(DialectRegistry &dialects) const {
 
 LogicalResult OpToOpPassAdaptor::tryMergeInto(MLIRContext *ctx,
                                               OpToOpPassAdaptor &rhs) {
+  LDBG(3) << "Attempting to merge pass adaptor with " << mgrs.size()
+          << " managers into rhs with " << rhs.mgrs.size() << " managers";
+
   // Functor used to check if a pass manager is generic, i.e. op-agnostic.
   auto isGenericPM = [&](OpPassManager &pm) { return !pm.getOpName(); };
 
@@ -682,14 +775,24 @@ LogicalResult OpToOpPassAdaptor::tryMergeInto(MLIRContext *ctx,
   //
   // Check the current adaptor.
   auto *lhsGenericPMIt = llvm::find_if(mgrs, isGenericPM);
-  if (lhsGenericPMIt != mgrs.end() &&
-      hasScheduleConflictWith(*lhsGenericPMIt, rhs.mgrs))
-    return failure();
+  if (lhsGenericPMIt != mgrs.end()) {
+    LDBG(4) << "Found generic pass manager on LHS, checking for conflicts";
+    if (hasScheduleConflictWith(*lhsGenericPMIt, rhs.mgrs)) {
+      LDBG(4)
+          << "Merge failed: LHS generic pass manager has conflicts with RHS";
+      return failure();
+    }
+  }
   // Check the rhs adaptor.
   auto *rhsGenericPMIt = llvm::find_if(rhs.mgrs, isGenericPM);
-  if (rhsGenericPMIt != rhs.mgrs.end() &&
-      hasScheduleConflictWith(*rhsGenericPMIt, mgrs))
-    return failure();
+  if (rhsGenericPMIt != rhs.mgrs.end()) {
+    LDBG(4) << "Found generic pass manager on RHS, checking for conflicts";
+    if (hasScheduleConflictWith(*rhsGenericPMIt, mgrs)) {
+      LDBG(4)
+          << "Merge failed: RHS generic pass manager has conflicts with LHS";
+      return failure();
+    }
+  }
 
   for (auto &pm : mgrs) {
     // If an existing pass manager exists, then merge the given pass manager
@@ -744,25 +847,51 @@ void OpToOpPassAdaptor::runOnOperation(bool verifyPasses) {
 
 /// Run this pass adaptor synchronously.
 void OpToOpPassAdaptor::runOnOperationImpl(bool verifyPasses) {
+  LDBG_OS([&](raw_ostream &os) {
+    os << "Running pass adaptor synchronously on operation '"
+       << OpWithFlags(getOperation(), OpPrintingFlags().skipRegions())
+       << "' with " << mgrs.size()
+       << " pass managers, verifyPasses=" << verifyPasses << " pipeline: ";
+    printAsTextualPipeline(os, /*pretty=*/false);
+  });
+
   auto am = getAnalysisManager();
   PassInstrumentation::PipelineParentInfo parentInfo = {llvm::get_threadid(),
                                                         this};
   auto *instrumentor = am.getPassInstrumentor();
+
+  unsigned processedOps = 0;
   for (auto &region : getOperation()->getRegions()) {
     for (auto &block : region) {
       for (auto &op : block) {
         auto *mgr = findPassManagerFor(mgrs, op.getName(), *op.getContext());
-        if (!mgr)
+        if (!mgr) {
+          LDBG(2) << "Skipping operation '"
+                  << OpWithFlags(&op, OpPrintingFlags().skipRegions())
+                  << "': no suitable pass manager found";
           continue;
+        }
 
         // Run the held pipeline over the current operation.
+        LDBG(2) << "Processing operation '"
+                << OpWithFlags(&op, OpPrintingFlags().skipRegions())
+                << "' with pass manager '" << mgr->getOpAnchorName() << "'";
+
         unsigned initGeneration = mgr->impl->initializationGeneration;
         if (failed(runPipeline(*mgr, &op, am.nest(&op), verifyPasses,
-                               initGeneration, instrumentor, &parentInfo)))
+                               initGeneration, instrumentor, &parentInfo))) {
+          LDBG(2) << "Pipeline failed for operation '"
+                  << OpWithFlags(&op, OpPrintingFlags().skipRegions()) << "'";
           signalPassFailure();
+        } else {
+          processedOps++;
+        }
       }
     }
   }
+
+  LDBG() << "Completed synchronous pass adaptor run, processed " << processedOps
+         << " operations";
 }
 
 /// Utility functor that checks if the two ranges of pass managers have a size
@@ -776,13 +905,24 @@ static bool hasSizeMismatch(ArrayRef<OpPassManager> lhs,
 
 /// Run this pass adaptor synchronously.
 void OpToOpPassAdaptor::runOnOperationAsyncImpl(bool verifyPasses) {
+  LDBG_OS([&](raw_ostream &os) {
+    os << "Running pass adaptor asynchronously on operation '"
+       << OpWithFlags(getOperation(), OpPrintingFlags().skipRegions())
+       << "' with " << mgrs.size()
+       << " pass managers, verifyPasses=" << verifyPasses << " pipeline: ";
+    printAsTextualPipeline(os, /*pretty=*/false);
+  });
+
   AnalysisManager am = getAnalysisManager();
   MLIRContext *context = &getContext();
 
   // Create the async executors if they haven't been created, or if the main
   // pipeline has changed.
-  if (asyncExecutors.empty() || hasSizeMismatch(asyncExecutors.front(), mgrs))
+  if (asyncExecutors.empty() || hasSizeMismatch(asyncExecutors.front(), mgrs)) {
+    LDBG(2) << "Creating " << context->getThreadPool().getMaxConcurrency()
+            << " async executors";
     asyncExecutors.assign(context->getThreadPool().getMaxConcurrency(), mgrs);
+  }
 
   // This struct represents the information for a single operation to be
   // scheduled on a pass manager.
@@ -803,21 +943,36 @@ void OpToOpPassAdaptor::runOnOperationAsyncImpl(bool verifyPasses) {
   // operation, as well as providing a queue of operations to execute over.
   std::vector<OpPMInfo> opInfos;
   DenseMap<OperationName, std::optional<unsigned>> knownOpPMIdx;
+
+  LDBG(2) << "Collecting operations for async execution";
   for (auto &region : getOperation()->getRegions()) {
     for (Operation &op : region.getOps()) {
       // Get the pass manager index for this operation type.
       auto pmIdxIt = knownOpPMIdx.try_emplace(op.getName(), std::nullopt);
       if (pmIdxIt.second) {
-        if (auto *mgr = findPassManagerFor(mgrs, op.getName(), *context))
+        if (auto *mgr = findPassManagerFor(mgrs, op.getName(), *context)) {
           pmIdxIt.first->second = std::distance(mgrs.begin(), mgr);
+          LDBG(2) << "Operation '"
+                  << OpWithFlags(&op, OpPrintingFlags().skipRegions())
+                  << "' will use pass manager '" << mgr->getOpAnchorName()
+                  << "'";
+        }
       }
 
       // If this operation can be scheduled, add it to the list.
-      if (pmIdxIt.first->second)
+      if (pmIdxIt.first->second) {
         opInfos.emplace_back(*pmIdxIt.first->second, &op, am.nest(&op));
+      } else {
+        LDBG(2) << "Operation '"
+                << OpWithFlags(&op, OpPrintingFlags().skipRegions())
+                << "' skipped: no suitable pass manager";
+      }
     }
   }
 
+  LDBG(2) << "Collected " << opInfos.size()
+          << " operations for async execution";
+
   // Get the current thread for this adaptor.
   PassInstrumentation::PipelineParentInfo parentInfo = {llvm::get_threadid(),
                                                         this};
@@ -872,23 +1027,36 @@ void PassManager::enableVerifier(bool enabled) { verifyPasses = enabled; }
 
 /// Run the passes within this manager on the provided operation.
 LogicalResult PassManager::run(Operation *op) {
+  LDBG_OS([&](raw_ostream &os) {
+    os << "Starting PassManager run on operation '"
+       << OpWithFlags(op, OpPrintingFlags().skipRegions()) << "' with "
+       << size() << " passes, verifyPasses=" << verifyPasses << " pipeline: ";
+    printAsTextualPipeline(os, /*pretty=*/false);
+  });
+
   MLIRContext *context = getContext();
   std::optional<OperationName> anchorOp = getOpName(*context);
-  if (anchorOp && anchorOp != op->getName())
+  if (anchorOp && anchorOp != op->getName()) {
     return emitError(op->getLoc())
            << "can't run '" << getOpAnchorName() << "' pass manager on '"
            << op->getName() << "' op";
+  }
 
   // Register all dialects for the current pipeline.
+  LDBG(2) << "Registering dependent dialects for pipeline";
   DialectRegistry dependentDialects;
   getDependentDialects(dependentDialects);
   context->appendDialectRegistry(dependentDialects);
-  for (StringRef name : dependentDialects.getDialectNames())
+  for (StringRef name : dependentDialects.getDialectNames()) {
+    LDBG(2) << "Loading dialect: " << name;
     context->getOrLoadDialect(name);
+  }
 
   // Before running, make sure to finalize the pipeline pass list.
-  if (failed(getImpl().finalizePassList(context)))
+  if (failed(getImpl().finalizePassList(context))) {
+    LDBG(2) << "Pass list finalization failed";
     return failure();
+  }
 
   // Notify the context that we start running a pipeline for bookkeeping.
   context->enterMultiThreadedExecution();
@@ -898,17 +1066,27 @@ LogicalResult PassManager::run(Operation *op) {
   llvm::hash_code pipelineKey = hash();
   if (newInitKey != initializationKey ||
       pipelineKey != pipelineInitializationKey) {
-    if (failed(initialize(context, impl->initializationGeneration + 1)))
+    LDBG(2) << "Initializing passes with new generation: "
+            << (impl->initializationGeneration + 1);
+    if (failed(initialize(context, impl->initializationGeneration + 1))) {
+      LDBG(2) << "Pass initialization failed";
       return failure();
+    }
     initializationKey = newInitKey;
     pipelineInitializationKey = pipelineKey;
+  } else {
+    LDBG(2) << "Using existing pass initialization (generation: "
+            << impl->initializationGeneration << ")";
   }
 
   // Construct a top level analysis manager for the pipeline.
+  LDBG(2) << "Constructing analysis manager for pipeline execution";
   ModuleAnalysisManager am(op, instrumentor.get());
 
   // If reproducer generation is enabled, run the pass manager with crash
   // handling enabled.
+  LDBG(2) << "Executing pipeline with "
+          << (crashReproGenerator ? "crash recovery" : "normal execution");
   LogicalResult result =
       crashReproGenerator ? runWithCrashRecovery(op, am) : runPasses(op, am);
 
@@ -916,8 +1094,13 @@ LogicalResult PassManager::run(Operation *op) {
   context->exitMultiThreadedExecution();
 
   // Dump all of the pass statistics if necessary.
-  if (passStatisticsMode)
+  if (passStatisticsMode) {
+    LDBG(2) << "Dumping pass statistics";
     dumpStatistics();
+  }
+
+  LDBG(2) << "PassManager run completed with result: "
+          << (succeeded(result) ? "success" : "failure");
   return result;
 }
 
@@ -930,6 +1113,7 @@ void PassManager::addInstrumentation(std::unique_ptr<PassInstrumentation> pi) {
 }
 
 LogicalResult PassManager::runPasses(Operation *op, AnalysisManager am) {
+  LDBG(2) << "Executing passes using OpToOpPassAdaptor pipeline";
   return OpToOpPassAdaptor::runPipeline(*this, op, am, verifyPasses,
                                         impl->initializationGeneration);
 }
diff --git a/mlir/lib/Target/SPIRV/Deserialization/DeserializeOps.cpp b/mlir/lib/Target/SPIRV/Deserialization/DeserializeOps.cpp
index ee18cf8..c27f9aa 100644
--- a/mlir/lib/Target/SPIRV/Deserialization/DeserializeOps.cpp
+++ b/mlir/lib/Target/SPIRV/Deserialization/DeserializeOps.cpp
@@ -86,6 +86,13 @@ Value spirv::Deserializer::getValue(uint32_t id) {
   if (auto undef = getUndefType(id)) {
     return spirv::UndefOp::create(opBuilder, unknownLoc, undef);
   }
+  if (std::optional<spirv::GraphConstantARMOpMaterializationInfo>
+          graphConstantARMInfo = getGraphConstantARM(id)) {
+    IntegerAttr graphConstantID = graphConstantARMInfo->graphConstantID;
+    Type resultType = graphConstantARMInfo->resultType;
+    return spirv::GraphConstantARMOp::create(opBuilder, unknownLoc, resultType,
+                                             graphConstantID);
+  }
   return valueMap.lookup(id);
 }
 
@@ -180,6 +187,7 @@ LogicalResult spirv::Deserializer::processInstruction(
   case spirv::Opcode::OpTypeStruct:
   case spirv::Opcode::OpTypePointer:
   case spirv::Opcode::OpTypeTensorARM:
+  case spirv::Opcode::OpTypeGraphARM:
   case spirv::Opcode::OpTypeCooperativeMatrixKHR:
     return processType(opcode, operands);
   case spirv::Opcode::OpTypeForwardPointer:
@@ -208,12 +216,26 @@ LogicalResult spirv::Deserializer::processInstruction(
     return processConstantBool(/*isTrue=*/false, operands, /*isSpec=*/true);
   case spirv::Opcode::OpConstantNull:
     return processConstantNull(operands);
+  case spirv::Opcode::OpGraphConstantARM:
+    return processGraphConstantARM(operands);
   case spirv::Opcode::OpDecorate:
     return processDecoration(operands);
   case spirv::Opcode::OpMemberDecorate:
     return processMemberDecoration(operands);
   case spirv::Opcode::OpFunction:
     return processFunction(operands);
+  case spirv::Opcode::OpGraphEntryPointARM:
+    if (deferInstructions) {
+      deferredInstructions.emplace_back(opcode, operands);
+      return success();
+    }
+    return processGraphEntryPointARM(operands);
+  case spirv::Opcode::OpGraphARM:
+    return processGraphARM(operands);
+  case spirv::Opcode::OpGraphSetOutputARM:
+    return processOpGraphSetOutputARM(operands);
+  case spirv::Opcode::OpGraphEndARM:
+    return processGraphEndARM(operands);
   case spirv::Opcode::OpLabel:
     return processLabel(operands);
   case spirv::Opcode::OpBranch:
diff --git a/mlir/lib/Target/SPIRV/Deserialization/Deserializer.cpp b/mlir/lib/Target/SPIRV/Deserialization/Deserializer.cpp
index 3625dd2..0c3e87a 100644
--- a/mlir/lib/Target/SPIRV/Deserialization/Deserializer.cpp
+++ b/mlir/lib/Target/SPIRV/Deserialization/Deserializer.cpp
@@ -669,6 +669,200 @@ spirv::Deserializer::processFunctionEnd(ArrayRef<uint32_t> operands) {
   return success();
 }
 
+LogicalResult
+spirv::Deserializer::processGraphEntryPointARM(ArrayRef<uint32_t> operands) {
+  if (operands.size() < 2) {
+    return emitError(unknownLoc,
+                     "missing graph defintion in OpGraphEntryPointARM");
+  }
+
+  unsigned wordIndex = 0;
+  uint32_t graphID = operands[wordIndex++];
+  if (!graphMap.contains(graphID)) {
+    return emitError(unknownLoc,
+                     "missing graph definition/declaration with id ")
+           << graphID;
+  }
+
+  spirv::GraphARMOp graphARM = graphMap[graphID];
+  StringRef name = decodeStringLiteral(operands, wordIndex);
+  graphARM.setSymName(name);
+  graphARM.setEntryPoint(true);
+
+  SmallVector<Attribute, 4> interface;
+  for (int64_t size = operands.size(); wordIndex < size; ++wordIndex) {
+    if (spirv::GlobalVariableOp arg = getGlobalVariable(operands[wordIndex])) {
+      interface.push_back(SymbolRefAttr::get(arg.getOperation()));
+    } else {
+      return emitError(unknownLoc, "undefined result <id> ")
+             << operands[wordIndex] << " while decoding OpGraphEntryPoint";
+    }
+  }
+
+  // RAII guard to reset the insertion point to previous value when done.
+  OpBuilder::InsertionGuard insertionGuard(opBuilder);
+  opBuilder.setInsertionPoint(graphARM);
+  opBuilder.create<spirv::GraphEntryPointARMOp>(
+      unknownLoc, SymbolRefAttr::get(opBuilder.getContext(), name),
+      opBuilder.getArrayAttr(interface));
+
+  return success();
+}
+
+LogicalResult
+spirv::Deserializer::processGraphARM(ArrayRef<uint32_t> operands) {
+  if (curGraph) {
+    return emitError(unknownLoc, "found graph inside graph");
+  }
+  // Get the result type.
+  if (operands.size() < 2) {
+    return emitError(unknownLoc, "OpGraphARM must have at least 2 parameters");
+  }
+
+  Type type = getType(operands[0]);
+  if (!type || !isa<GraphType>(type)) {
+    return emitError(unknownLoc, "unknown graph type from <id> ")
+           << operands[0];
+  }
+  auto graphType = cast<GraphType>(type);
+  if (graphType.getNumResults() <= 0) {
+    return emitError(unknownLoc, "expected at least one result");
+  }
+
+  uint32_t graphID = operands[1];
+  if (graphMap.count(graphID)) {
+    return emitError(unknownLoc, "duplicate graph definition/declaration");
+  }
+
+  std::string graphName = getGraphSymbol(graphID);
+  auto graphOp =
+      opBuilder.create<spirv::GraphARMOp>(unknownLoc, graphName, graphType);
+  curGraph = graphMap[graphID] = graphOp;
+  Block *entryBlock = graphOp.addEntryBlock();
+  LLVM_DEBUG({
+    logger.startLine()
+        << "//===-------------------------------------------===//\n";
+    logger.startLine() << "[graph] name: " << graphName << "\n";
+    logger.startLine() << "[graph] type: " << graphType << "\n";
+    logger.startLine() << "[graph] ID: " << graphID << "\n";
+    logger.startLine() << "[graph] entry block: " << entryBlock << "\n";
+    logger.indent();
+  });
+
+  // Parse the op argument instructions.
+  for (auto [index, argType] : llvm::enumerate(graphType.getInputs())) {
+    spirv::Opcode opcode;
+    ArrayRef<uint32_t> operands;
+    if (failed(sliceInstruction(opcode, operands,
+                                spirv::Opcode::OpGraphInputARM))) {
+      return failure();
+    }
+    if (operands.size() != 3) {
+      return emitError(unknownLoc, "expected result type, result <id> and "
+                                   "input index for OpGraphInputARM");
+    }
+
+    Type argDefinedType = getType(operands[0]);
+    if (!argDefinedType) {
+      return emitError(unknownLoc, "unknown operand type <id> ") << operands[0];
+    }
+
+    if (argDefinedType != argType) {
+      return emitError(unknownLoc,
+                       "mismatch in argument type between graph type "
+                       "definition ")
+             << graphType << " and argument type definition " << argDefinedType
+             << " at argument " << index;
+    }
+    if (getValue(operands[1])) {
+      return emitError(unknownLoc, "duplicate definition of result <id> ")
+             << operands[1];
+    }
+
+    IntegerAttr inputIndexAttr = getConstantInt(operands[2]);
+    if (!inputIndexAttr) {
+      return emitError(unknownLoc,
+                       "unable to read inputIndex value from constant op ")
+             << operands[2];
+    }
+    BlockArgument argValue = graphOp.getArgument(inputIndexAttr.getInt());
+    valueMap[operands[1]] = argValue;
+  }
+
+  graphOutputs.resize(graphType.getNumResults());
+
+  // RAII guard to reset the insertion point to the module's region after
+  // deserializing the body of this function.
+  OpBuilder::InsertionGuard moduleInsertionGuard(opBuilder);
+
+  blockMap[graphID] = entryBlock;
+  if (failed(createGraphBlock(graphID))) {
+    return failure();
+  }
+
+  // Process all the instructions in the graph until and including
+  // OpGraphEndARM.
+  spirv::Opcode opcode;
+  ArrayRef<uint32_t> instOperands;
+  do {
+    if (failed(sliceInstruction(opcode, instOperands, std::nullopt))) {
+      return failure();
+    }
+
+    if (failed(processInstruction(opcode, instOperands))) {
+      return failure();
+    }
+  } while (opcode != spirv::Opcode::OpGraphEndARM);
+
+  return success();
+}
+
+LogicalResult
+spirv::Deserializer::processOpGraphSetOutputARM(ArrayRef<uint32_t> operands) {
+  if (operands.size() != 2) {
+    return emitError(
+        unknownLoc,
+        "expected value id and output index for OpGraphSetOutputARM");
+  }
+
+  uint32_t id = operands[0];
+  Value value = getValue(id);
+  if (!value) {
+    return emitError(unknownLoc, "could not find result <id> ") << id;
+  }
+
+  IntegerAttr outputIndexAttr = getConstantInt(operands[1]);
+  if (!outputIndexAttr) {
+    return emitError(unknownLoc,
+                     "unable to read outputIndex value from constant op ")
+           << operands[1];
+  }
+  graphOutputs[outputIndexAttr.getInt()] = value;
+  return success();
+}
+
+LogicalResult
+spirv::Deserializer::processGraphEndARM(ArrayRef<uint32_t> operands) {
+  // Create GraphOutputsARM instruction.
+  opBuilder.create<spirv::GraphOutputsARMOp>(unknownLoc, graphOutputs);
+
+  // Process OpGraphEndARM.
+  if (!operands.empty()) {
+    return emitError(unknownLoc, "unexpected operands for OpGraphEndARM");
+  }
+
+  curBlock = nullptr;
+  curGraph = std::nullopt;
+  graphOutputs.clear();
+
+  LLVM_DEBUG({
+    logger.unindent();
+    logger.startLine()
+        << "//===-------------------------------------------===//\n";
+  });
+  return success();
+}
+
 std::optional<std::pair<Attribute, Type>>
 spirv::Deserializer::getConstant(uint32_t id) {
   auto constIt = constantMap.find(id);
@@ -701,6 +895,14 @@ std::string spirv::Deserializer::getFunctionSymbol(uint32_t id) {
   return funcName;
 }
 
+std::string spirv::Deserializer::getGraphSymbol(uint32_t id) {
+  std::string graphName = nameMap.lookup(id).str();
+  if (graphName.empty()) {
+    graphName = "spirv_graph_" + std::to_string(id);
+  }
+  return graphName;
+}
+
 std::string spirv::Deserializer::getSpecConstantSymbol(uint32_t id) {
   auto constName = nameMap.lookup(id).str();
   if (constName.empty()) {
@@ -723,6 +925,14 @@ spirv::Deserializer::createSpecConstant(Location loc, uint32_t resultID,
   return op;
 }
 
+std::optional<spirv::GraphConstantARMOpMaterializationInfo>
+spirv::Deserializer::getGraphConstantARM(uint32_t id) {
+  auto graphConstIt = graphConstantMap.find(id);
+  if (graphConstIt == graphConstantMap.end())
+    return std::nullopt;
+  return graphConstIt->getSecond();
+}
+
 LogicalResult
 spirv::Deserializer::processGlobalVariable(ArrayRef<uint32_t> operands) {
   unsigned wordIndex = 0;
@@ -944,6 +1154,8 @@ LogicalResult spirv::Deserializer::processType(spirv::Opcode opcode,
     return processMatrixType(operands);
   case spirv::Opcode::OpTypeTensorARM:
     return processTensorARMType(operands);
+  case spirv::Opcode::OpTypeGraphARM:
+    return processGraphTypeARM(operands);
   default:
     return emitError(unknownLoc, "unhandled type instruction");
   }
@@ -1312,6 +1524,35 @@ spirv::Deserializer::processTensorARMType(ArrayRef<uint32_t> operands) {
 }
 
 LogicalResult
+spirv::Deserializer::processGraphTypeARM(ArrayRef<uint32_t> operands) {
+  unsigned size = operands.size();
+  if (size < 2) {
+    return emitError(unknownLoc, "OpTypeGraphARM must have at least 2 operands "
+                                 "(result_id, num_inputs, (inout0_type, "
+                                 "inout1_type, ...))")
+           << size;
+  }
+  uint32_t numInputs = operands[1];
+  SmallVector<Type, 1> argTypes;
+  SmallVector<Type, 1> returnTypes;
+  for (unsigned i = 2; i < size; ++i) {
+    Type inOutTy = getType(operands[i]);
+    if (!inOutTy) {
+      return emitError(unknownLoc,
+                       "OpTypeGraphARM references undefined element type.")
+             << operands[i];
+    }
+    if (i - 2 >= numInputs) {
+      returnTypes.push_back(inOutTy);
+    } else {
+      argTypes.push_back(inOutTy);
+    }
+  }
+  typeMap[operands[0]] = GraphType::get(context, argTypes, returnTypes);
+  return success();
+}
+
+LogicalResult
 spirv::Deserializer::processTypeForwardPointer(ArrayRef<uint32_t> operands) {
   if (operands.size() != 2)
     return emitError(unknownLoc,
@@ -1823,6 +2064,34 @@ spirv::Deserializer::processConstantNull(ArrayRef<uint32_t> operands) {
          << resultType;
 }
 
+LogicalResult
+spirv::Deserializer::processGraphConstantARM(ArrayRef<uint32_t> operands) {
+  if (operands.size() < 3) {
+    return emitError(unknownLoc)
+           << "OpGraphConstantARM must have at least 2 operands";
+  }
+
+  Type resultType = getType(operands[0]);
+  if (!resultType) {
+    return emitError(unknownLoc, "undefined result type from <id> ")
+           << operands[0];
+  }
+
+  uint32_t resultID = operands[1];
+
+  if (!dyn_cast<spirv::TensorArmType>(resultType)) {
+    return emitError(unknownLoc, "result must be of type OpTypeTensorARM");
+  }
+
+  APInt graph_constant_id = APInt(32, operands[2], /*isSigned=*/true);
+  Type i32Ty = opBuilder.getIntegerType(32);
+  IntegerAttr attr = opBuilder.getIntegerAttr(i32Ty, graph_constant_id);
+  graphConstantMap.try_emplace(
+      resultID, GraphConstantARMOpMaterializationInfo{resultType, attr});
+
+  return success();
+}
+
 //===----------------------------------------------------------------------===//
 // Control flow
 //===----------------------------------------------------------------------===//
@@ -1920,6 +2189,24 @@ LogicalResult spirv::Deserializer::processLabel(ArrayRef<uint32_t> operands) {
   return success();
 }
 
+LogicalResult spirv::Deserializer::createGraphBlock(uint32_t graphID) {
+  if (!curGraph) {
+    return emitError(unknownLoc, "a graph block must appear inside a graph");
+  }
+
+  // We may have forward declared this block.
+  Block *block = getOrCreateBlock(graphID);
+  LLVM_DEBUG(logger.startLine()
+             << "[block] populating block " << block << "\n");
+  // If we have seen this block, make sure it was just a forward declaration.
+  assert(block->empty() && "re-deserialize the same block!");
+
+  opBuilder.setInsertionPointToStart(block);
+  blockMap[graphID] = curBlock = block;
+
+  return success();
+}
+
 LogicalResult
 spirv::Deserializer::processSelectionMerge(ArrayRef<uint32_t> operands) {
   if (!curBlock) {
diff --git a/mlir/lib/Target/SPIRV/Deserialization/Deserializer.h b/mlir/lib/Target/SPIRV/Deserialization/Deserializer.h
index db1cc3f..6027f1a 100644
--- a/mlir/lib/Target/SPIRV/Deserialization/Deserializer.h
+++ b/mlir/lib/Target/SPIRV/Deserialization/Deserializer.h
@@ -106,6 +106,13 @@ struct SpecConstOperationMaterializationInfo {
   SmallVector<uint32_t> enclosedOpOperands;
 };
 
+/// A struct that collects the info needed to materialize/emit a
+/// GraphConstantARMOp.
+struct GraphConstantARMOpMaterializationInfo {
+  Type resultType;
+  IntegerAttr graphConstantID;
+};
+
 //===----------------------------------------------------------------------===//
 // Deserializer Declaration
 //===----------------------------------------------------------------------===//
@@ -211,9 +218,14 @@ private:
   /// exists; otherwise creates one based on the <id>.
   std::string getFunctionSymbol(uint32_t id);
 
-  /// Returns a symbol to be used for the specialization constant with the given
-  /// result <id>. This tries to use the specialization constant's OpName if
+  /// Returns a symbol to be used for the graph name with the given
+  /// result <id>. This tries to use the graph's OpName if
   /// exists; otherwise creates one based on the <id>.
+  std::string getGraphSymbol(uint32_t id);
+
+  /// Returns a symbol to be used for the specialization constant with the
+  /// given result <id>. This tries to use the specialization constant's
+  /// OpName if exists; otherwise creates one based on the <id>.
   std::string getSpecConstantSymbol(uint32_t id);
 
   /// Gets the specialization constant with the given result <id>.
@@ -237,6 +249,11 @@ private:
   spirv::SpecConstantOp createSpecConstant(Location loc, uint32_t resultID,
                                            TypedAttr defaultValue);
 
+  /// Gets the GraphConstantARM ID attribute and result type with the given
+  /// result <id>.
+  std::optional<spirv::GraphConstantARMOpMaterializationInfo>
+  getGraphConstantARM(uint32_t id);
+
   /// Processes the OpVariable instructions at current `offset` into `binary`.
   /// It is expected that this method is used for variables that are to be
   /// defined at module scope and will be deserialized into a
@@ -306,6 +323,16 @@ private:
 
   LogicalResult processTensorARMType(ArrayRef<uint32_t> operands);
 
+  LogicalResult processGraphTypeARM(ArrayRef<uint32_t> operands);
+
+  LogicalResult processGraphEntryPointARM(ArrayRef<uint32_t> operands);
+
+  LogicalResult processGraphARM(ArrayRef<uint32_t> operands);
+
+  LogicalResult processOpGraphSetOutputARM(ArrayRef<uint32_t> operands);
+
+  LogicalResult processGraphEndARM(ArrayRef<uint32_t> operands);
+
   LogicalResult processTypeForwardPointer(ArrayRef<uint32_t> operands);
 
   //===--------------------------------------------------------------------===//
@@ -353,6 +380,10 @@ private:
   /// Processes a SPIR-V OpConstantNull instruction with the given `operands`.
   LogicalResult processConstantNull(ArrayRef<uint32_t> operands);
 
+  /// Processes a SPIR-V OpGraphConstantARM instruction with the given
+  /// `operands`.
+  LogicalResult processGraphConstantARM(ArrayRef<uint32_t> operands);
+
   //===--------------------------------------------------------------------===//
   // Debug
   //===--------------------------------------------------------------------===//
@@ -450,6 +481,9 @@ private:
   /// blocks declared as selection/loop headers are handled.
   LogicalResult structurizeControlFlow();
 
+  /// Creates a block for graph with the given graphID.
+  LogicalResult createGraphBlock(uint32_t graphID);
+
   //===--------------------------------------------------------------------===//
   // Instruction
   //===--------------------------------------------------------------------===//
@@ -546,6 +580,9 @@ private:
   /// The current function under construction.
   std::optional<spirv::FuncOp> curFunction;
 
+  /// The current graph under construction.
+  std::optional<spirv::GraphARMOp> curGraph;
+
   /// The current block under construction.
   Block *curBlock = nullptr;
 
@@ -599,12 +636,19 @@ private:
   DenseMap<uint32_t, SpecConstOperationMaterializationInfo>
       specConstOperationMap;
 
+  // Result <id> to GraphConstantARM ID attribute and result type.
+  DenseMap<uint32_t, spirv::GraphConstantARMOpMaterializationInfo>
+      graphConstantMap;
+
   // Result <id> to variable mapping.
   DenseMap<uint32_t, spirv::GlobalVariableOp> globalVariableMap;
 
   // Result <id> to function mapping.
   DenseMap<uint32_t, spirv::FuncOp> funcMap;
 
+  // Result <id> to function mapping.
+  DenseMap<uint32_t, spirv::GraphARMOp> graphMap;
+
   // Result <id> to block mapping.
   DenseMap<uint32_t, Block *> blockMap;
 
@@ -668,6 +712,9 @@ private:
   /// Deserialization options.
   DeserializationOptions options;
 
+  /// List of IDs assigned to graph outputs.
+  SmallVector<Value> graphOutputs;
+
 #ifndef NDEBUG
   /// A logger used to emit information during the deserialzation process.
   llvm::ScopedPrinter logger;
diff --git a/mlir/lib/Target/SPIRV/Serialization/SerializeOps.cpp b/mlir/lib/Target/SPIRV/Serialization/SerializeOps.cpp
index d62529b..e9b180a 100644
--- a/mlir/lib/Target/SPIRV/Serialization/SerializeOps.cpp
+++ b/mlir/lib/Target/SPIRV/Serialization/SerializeOps.cpp
@@ -203,6 +203,16 @@ Serializer::processSpecConstantOperationOp(spirv::SpecConstantOperationOp op) {
   return success();
 }
 
+LogicalResult
+Serializer::processGraphConstantARMOp(spirv::GraphConstantARMOp op) {
+  if (uint32_t resultID = prepareGraphConstantId(op.getLoc(), op.getType(),
+                                                 op.getGraphConstantIdAttr())) {
+    valueIDMap[op.getResult()] = resultID;
+    return success();
+  }
+  return failure();
+}
+
 LogicalResult Serializer::processUndefOp(spirv::UndefOp op) {
   auto undefType = op.getType();
   auto &id = undefValIDMap[undefType];
@@ -368,6 +378,118 @@ LogicalResult Serializer::processFuncOp(spirv::FuncOp op) {
   return success();
 }
 
+LogicalResult Serializer::processGraphARMOp(spirv::GraphARMOp op) {
+  if (op.getNumResults() < 1) {
+    return op.emitError("cannot serialize graph with no return types");
+  }
+
+  LLVM_DEBUG(llvm::dbgs() << "-- start graph '" << op.getName() << "' --\n");
+  assert(functionHeader.empty() && functionBody.empty());
+
+  uint32_t funcID = getOrCreateFunctionID(op.getName());
+  uint32_t fnTypeID = 0;
+  // Generate type of the function.
+  if (failed(processType(op.getLoc(), op.getFunctionType(), fnTypeID)))
+    return failure();
+  encodeInstructionInto(functionHeader, spirv::Opcode::OpGraphARM,
+                        {fnTypeID, funcID});
+
+  // Declare the parameters.
+  for (auto [idx, arg] : llvm::enumerate(op.getArguments())) {
+    uint32_t argTypeID = 0;
+    SmallVector<uint32_t, 3> inputOperands;
+
+    if (failed(processType(op.getLoc(), arg.getType(), argTypeID))) {
+      return failure();
+    }
+
+    uint32_t argValueID = getNextID();
+    valueIDMap[arg] = argValueID;
+
+    auto attr = IntegerAttr::get(IntegerType::get(op.getContext(), 32), idx);
+    uint32_t indexID = prepareConstantInt(op.getLoc(), attr, false);
+
+    inputOperands.push_back(argTypeID);
+    inputOperands.push_back(argValueID);
+    inputOperands.push_back(indexID);
+
+    encodeInstructionInto(functionHeader, spirv::Opcode::OpGraphInputARM,
+                          inputOperands);
+  }
+
+  if (failed(processBlock(&op.front(), /*omitLabel=*/true)))
+    return failure();
+  if (failed(visitInPrettyBlockOrder(
+          &op.front(), [&](Block *block) { return processBlock(block); },
+          /*skipHeader=*/true))) {
+    return failure();
+  }
+
+  LLVM_DEBUG(llvm::dbgs() << "-- completed graph '" << op.getName()
+                          << "' --\n");
+  // Insert OpGraphEndARM.
+  encodeInstructionInto(functionBody, spirv::Opcode::OpGraphEndARM, {});
+
+  llvm::append_range(graphs, functionHeader);
+  llvm::append_range(graphs, functionBody);
+  functionHeader.clear();
+  functionBody.clear();
+
+  return success();
+}
+
+LogicalResult
+Serializer::processGraphEntryPointARMOp(spirv::GraphEntryPointARMOp op) {
+  SmallVector<uint32_t, 4> operands;
+  StringRef graph = op.getFn();
+  // Add the graph <id>.
+  uint32_t graphID = getOrCreateFunctionID(graph);
+  operands.push_back(graphID);
+  // Add the name of the graph.
+  spirv::encodeStringLiteralInto(operands, graph);
+
+  // Add the interface values.
+  if (ArrayAttr interface = op.getInterface()) {
+    for (Attribute var : interface.getValue()) {
+      StringRef value = cast<FlatSymbolRefAttr>(var).getValue();
+      if (uint32_t id = getVariableID(value)) {
+        operands.push_back(id);
+      } else {
+        return op.emitError(
+            "referencing undefined global variable."
+            "spirv.GraphEntryPointARM is at the end of spirv.module. All "
+            "referenced variables should already be defined");
+      }
+    }
+  }
+  encodeInstructionInto(graphs, spirv::Opcode::OpGraphEntryPointARM, operands);
+  return success();
+}
+
+LogicalResult
+Serializer::processGraphOutputsARMOp(spirv::GraphOutputsARMOp op) {
+  for (auto [idx, value] : llvm::enumerate(op->getOperands())) {
+    SmallVector<uint32_t, 2> outputOperands;
+
+    Type resType = value.getType();
+    uint32_t resTypeID = 0;
+    if (failed(processType(op.getLoc(), resType, resTypeID))) {
+      return failure();
+    }
+
+    uint32_t outputID = getValueID(value);
+    auto attr = IntegerAttr::get(IntegerType::get(op.getContext(), 32), idx);
+    uint32_t indexID = prepareConstantInt(op.getLoc(), attr, false);
+
+    outputOperands.push_back(outputID);
+    outputOperands.push_back(indexID);
+
+    encodeInstructionInto(functionBody, spirv::Opcode::OpGraphSetOutputARM,
+                          outputOperands);
+  }
+  return success();
+}
+
 LogicalResult Serializer::processVariableOp(spirv::VariableOp op) {
   SmallVector<uint32_t, 4> operands;
   SmallVector<StringRef, 2> elidedAttrs;
diff --git a/mlir/lib/Target/SPIRV/Serialization/Serializer.cpp b/mlir/lib/Target/SPIRV/Serialization/Serializer.cpp
index 7fc7795..b56e778 100644
--- a/mlir/lib/Target/SPIRV/Serialization/Serializer.cpp
+++ b/mlir/lib/Target/SPIRV/Serialization/Serializer.cpp
@@ -136,7 +136,7 @@ void Serializer::collect(SmallVectorImpl<uint32_t> &binary) {
                     extensions.size() + extendedSets.size() +
                     memoryModel.size() + entryPoints.size() +
                     executionModes.size() + decorations.size() +
-                    typesGlobalValues.size() + functions.size();
+                    typesGlobalValues.size() + functions.size() + graphs.size();
 
   binary.clear();
   binary.reserve(moduleSize);
@@ -154,6 +154,7 @@ void Serializer::collect(SmallVectorImpl<uint32_t> &binary) {
   binary.append(decorations.begin(), decorations.end());
   binary.append(typesGlobalValues.begin(), typesGlobalValues.end());
   binary.append(functions.begin(), functions.end());
+  binary.append(graphs.begin(), graphs.end());
 }
 
 #ifndef NDEBUG
@@ -509,6 +510,9 @@ Serializer::processTypeImpl(Location loc, Type type, uint32_t &typeID,
   if ((isa<FunctionType>(type) &&
        succeeded(prepareFunctionType(loc, cast<FunctionType>(type), typeEnum,
                                      operands))) ||
+      (isa<GraphType>(type) &&
+       succeeded(
+           prepareGraphType(loc, cast<GraphType>(type), typeEnum, operands))) ||
       succeeded(prepareBasicType(loc, type, typeID, typeEnum, operands,
                                  deferSerialization, serializationCtx))) {
     if (deferSerialization)
@@ -539,7 +543,7 @@ Serializer::processTypeImpl(Location loc, Type type, uint32_t &typeID,
     return success();
   }
 
-  return failure();
+  return emitError(loc, "failed to process type: ") << type;
 }
 
 LogicalResult Serializer::prepareBasicType(
@@ -875,6 +879,33 @@ Serializer::prepareFunctionType(Location loc, FunctionType type,
   return success();
 }
 
+LogicalResult
+Serializer::prepareGraphType(Location loc, GraphType type,
+                             spirv::Opcode &typeEnum,
+                             SmallVectorImpl<uint32_t> &operands) {
+  typeEnum = spirv::Opcode::OpTypeGraphARM;
+  assert(type.getNumResults() >= 1 &&
+         "serialization requires at least a return value");
+
+  operands.push_back(type.getNumInputs());
+
+  for (Type argType : type.getInputs()) {
+    uint32_t argTypeID = 0;
+    if (failed(processType(loc, argType, argTypeID)))
+      return failure();
+    operands.push_back(argTypeID);
+  }
+
+  for (Type resType : type.getResults()) {
+    uint32_t resTypeID = 0;
+    if (failed(processType(loc, resType, resTypeID)))
+      return failure();
+    operands.push_back(resTypeID);
+  }
+
+  return success();
+}
+
 //===----------------------------------------------------------------------===//
 // Constant
 //===----------------------------------------------------------------------===//
@@ -1135,6 +1166,41 @@ uint32_t Serializer::prepareConstantInt(Location loc, IntegerAttr intAttr,
   return resultID;
 }
 
+uint32_t Serializer::prepareGraphConstantId(Location loc, Type graphConstType,
+                                            IntegerAttr intAttr) {
+  // De-duplicate graph constants.
+  if (uint32_t id = getGraphConstantARMId(intAttr)) {
+    return id;
+  }
+
+  // Process the type for this graph constant.
+  uint32_t typeID = 0;
+  if (failed(processType(loc, graphConstType, typeID))) {
+    return 0;
+  }
+
+  uint32_t resultID = getNextID();
+  APInt value = intAttr.getValue();
+  unsigned bitwidth = value.getBitWidth();
+  if (bitwidth > 32) {
+    emitError(loc, "Too wide attribute for OpGraphConstantARM: ")
+        << bitwidth << " bits";
+    return 0;
+  }
+  bool isSigned = value.isSignedIntN(bitwidth);
+
+  uint32_t word = 0;
+  if (isSigned) {
+    word = static_cast<int32_t>(value.getSExtValue());
+  } else {
+    word = static_cast<uint32_t>(value.getZExtValue());
+  }
+  encodeInstructionInto(typesGlobalValues, spirv::Opcode::OpGraphConstantARM,
+                        {typeID, resultID, word});
+  graphConstIDMap[intAttr] = resultID;
+  return resultID;
+}
+
 uint32_t Serializer::prepareConstantFp(Location loc, FloatAttr floatAttr,
                                        bool isSpec) {
   if (!isSpec) {
@@ -1469,9 +1535,19 @@ LogicalResult Serializer::processOperation(Operation *opInst) {
         return processConstantCompositeReplicateOp(op);
       })
       .Case([&](spirv::FuncOp op) { return processFuncOp(op); })
+      .Case([&](spirv::GraphARMOp op) { return processGraphARMOp(op); })
+      .Case([&](spirv::GraphEntryPointARMOp op) {
+        return processGraphEntryPointARMOp(op);
+      })
+      .Case([&](spirv::GraphOutputsARMOp op) {
+        return processGraphOutputsARMOp(op);
+      })
       .Case([&](spirv::GlobalVariableOp op) {
         return processGlobalVariableOp(op);
       })
+      .Case([&](spirv::GraphConstantARMOp op) {
+        return processGraphConstantARMOp(op);
+      })
       .Case([&](spirv::LoopOp op) { return processLoopOp(op); })
       .Case([&](spirv::ReferenceOfOp op) { return processReferenceOfOp(op); })
       .Case([&](spirv::SelectionOp op) { return processSelectionOp(op); })
diff --git a/mlir/lib/Target/SPIRV/Serialization/Serializer.h b/mlir/lib/Target/SPIRV/Serialization/Serializer.h
index fb2cecd..add372b 100644
--- a/mlir/lib/Target/SPIRV/Serialization/Serializer.h
+++ b/mlir/lib/Target/SPIRV/Serialization/Serializer.h
@@ -122,6 +122,8 @@ private:
   LogicalResult
   processSpecConstantOperationOp(spirv::SpecConstantOperationOp op);
 
+  LogicalResult processGraphConstantARMOp(spirv::GraphConstantARMOp op);
+
   /// SPIR-V dialect supports OpUndef using spirv.UndefOp that produces a SSA
   /// value to use with other operations. The SPIR-V spec recommends that
   /// OpUndef be generated at module level. The serialization generates an
@@ -135,6 +137,15 @@ private:
   LogicalResult processFuncOp(spirv::FuncOp op);
   LogicalResult processFuncParameter(spirv::FuncOp op);
 
+  /// Processes a SPIR-V GraphARM op.
+  LogicalResult processGraphARMOp(spirv::GraphARMOp op);
+
+  /// Processes a SPIR-V GraphEntryPointARM op.
+  LogicalResult processGraphEntryPointARMOp(spirv::GraphEntryPointARMOp op);
+
+  /// Processes a SPIR-V GraphOutputsARMOp op.
+  LogicalResult processGraphOutputsARMOp(spirv::GraphOutputsARMOp op);
+
   LogicalResult processVariableOp(spirv::VariableOp op);
 
   /// Process a SPIR-V GlobalVariableOp
@@ -189,6 +200,10 @@ private:
                                     spirv::Opcode &typeEnum,
                                     SmallVectorImpl<uint32_t> &operands);
 
+  LogicalResult prepareGraphType(Location loc, GraphType type,
+                                 spirv::Opcode &typeEnum,
+                                 SmallVectorImpl<uint32_t> &operands);
+
   //===--------------------------------------------------------------------===//
   // Constant
   //===--------------------------------------------------------------------===//
@@ -238,6 +253,13 @@ private:
   uint32_t prepareConstantInt(Location loc, IntegerAttr intAttr,
                               bool isSpec = false);
 
+  uint32_t getGraphConstantARMId(Attribute value) const {
+    return graphConstIDMap.lookup(value);
+  }
+
+  uint32_t prepareGraphConstantId(Location loc, Type graphConstType,
+                                  IntegerAttr intAttr);
+
   uint32_t prepareConstantFp(Location loc, FloatAttr floatAttr,
                              bool isSpec = false);
 
@@ -372,6 +394,7 @@ private:
   SmallVector<uint32_t, 0> decorations;
   SmallVector<uint32_t, 0> typesGlobalValues;
   SmallVector<uint32_t, 0> functions;
+  SmallVector<uint32_t, 0> graphs;
 
   /// Recursive struct references are serialized as OpTypePointer instructions
   /// to the recursive struct type. However, the OpTypePointer instruction
@@ -388,15 +411,22 @@ private:
       recursiveStructInfos;
 
   /// `functionHeader` contains all the instructions that must be in the first
-  /// block in the function, and `functionBody` contains the rest. After
-  /// processing FuncOp, the encoded instructions of a function are appended to
-  /// `functions`. An example of instructions in `functionHeader` in order:
+  /// block in the function or graph, and `functionBody` contains the rest.
+  /// After processing FuncOp/GraphARMOp, the encoded instructions of a function
+  /// or graph are appended to `functions` or `graphs` respectively. Examples of
+  /// instructions in `functionHeader` in order:
+  ///
+  /// For a FuncOp:
   /// OpFunction ...
   /// OpFunctionParameter ...
   /// OpFunctionParameter ...
   /// OpLabel ...
   /// OpVariable ...
   /// OpVariable ...
+  ///
+  /// For a GraphARMOp
+  /// OpGraphARM ...
+  /// OpGraphInputARM ...
   SmallVector<uint32_t, 0> functionHeader;
   SmallVector<uint32_t, 0> functionBody;
 
@@ -412,6 +442,9 @@ private:
   /// Map from specialization constant names to their <id>s.
   llvm::StringMap<uint32_t> specConstIDMap;
 
+  /// Map from graph constant ID value to their <id>s.
+  DenseMap<Attribute, uint32_t> graphConstIDMap;
+
   /// Map from GlobalVariableOps name to <id>s.
   llvm::StringMap<uint32_t> globalVarIDMap;
 
diff --git a/mlir/lib/Tools/mlir-opt/CMakeLists.txt b/mlir/lib/Tools/mlir-opt/CMakeLists.txt
index f24d4c6..858c9c1 100644
--- a/mlir/lib/Tools/mlir-opt/CMakeLists.txt
+++ b/mlir/lib/Tools/mlir-opt/CMakeLists.txt
@@ -13,4 +13,5 @@ add_mlir_library(MLIROptLib
   MLIRPluginsLib
   MLIRSupport
   MLIRIRDL
+  MLIRRemarkStreamer
   )
diff --git a/mlir/lib/Tools/mlir-opt/MlirOptMain.cpp b/mlir/lib/Tools/mlir-opt/MlirOptMain.cpp
index de714d8b..4f3b2ed 100644
--- a/mlir/lib/Tools/mlir-opt/MlirOptMain.cpp
+++ b/mlir/lib/Tools/mlir-opt/MlirOptMain.cpp
@@ -23,9 +23,11 @@
 #include "mlir/IR/Diagnostics.h"
 #include "mlir/IR/Location.h"
 #include "mlir/IR/MLIRContext.h"
+#include "mlir/IR/Remarks.h"
 #include "mlir/Parser/Parser.h"
 #include "mlir/Pass/PassManager.h"
 #include "mlir/Pass/PassRegistry.h"
+#include "mlir/Remark/RemarkStreamer.h"
 #include "mlir/Support/FileUtilities.h"
 #include "mlir/Support/Timing.h"
 #include "mlir/Support/ToolUtilities.h"
@@ -33,6 +35,7 @@
 #include "mlir/Tools/Plugins/DialectPlugin.h"
 #include "mlir/Tools/Plugins/PassPlugin.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Remarks/RemarkFormat.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/InitLLVM.h"
 #include "llvm/Support/LogicalResult.h"
@@ -204,6 +207,58 @@ struct MlirOptMainConfigCLOptions : public MlirOptMainConfig {
             cl::location(generateReproducerFileFlag), cl::init(""),
             cl::value_desc("filename"));
 
+    static cl::OptionCategory remarkCategory(
+        "Remark Options",
+        "Filter remarks by regular expression (llvm::Regex syntax).");
+
+    static llvm::cl::opt<RemarkFormat, /*ExternalStorage=*/true> remarkFormat{
+        "remark-format",
+        llvm::cl::desc("Specify the format for remark output."),
+        cl::location(remarkFormatFlag),
+        llvm::cl::value_desc("format"),
+        llvm::cl::init(REMARK_FORMAT_STDOUT),
+        llvm::cl::values(
+            clEnumValN(REMARK_FORMAT_STDOUT, "emitRemark",
+                       "Print as emitRemark to command-line"),
+            clEnumValN(REMARK_FORMAT_YAML, "yaml", "Print yaml file"),
+            clEnumValN(REMARK_FORMAT_BITSTREAM, "bitstream",
+                       "Print bitstream file")),
+        llvm::cl::cat(remarkCategory)};
+
+    static cl::opt<std::string, /*ExternalStorage=*/true> remarksAll(
+        "remarks-filter",
+        cl::desc("Show all remarks: passed, missed, failed, analysis"),
+        cl::location(remarksAllFilterFlag), cl::init(""),
+        cl::cat(remarkCategory));
+
+    static cl::opt<std::string, /*ExternalStorage=*/true> remarksFile(
+        "remarks-output-file",
+        cl::desc(
+            "Output file for yaml and bitstream remark formats. Default is "
+            "mlir-remarks.yaml or mlir-remarks.bitstream"),
+        cl::location(remarksOutputFileFlag), cl::init(""),
+        cl::cat(remarkCategory));
+
+    static cl::opt<std::string, /*ExternalStorage=*/true> remarksPassed(
+        "remarks-filter-passed", cl::desc("Show passed remarks"),
+        cl::location(remarksPassedFilterFlag), cl::init(""),
+        cl::cat(remarkCategory));
+
+    static cl::opt<std::string, /*ExternalStorage=*/true> remarksFailed(
+        "remarks-filter-failed", cl::desc("Show failed remarks"),
+        cl::location(remarksFailedFilterFlag), cl::init(""),
+        cl::cat(remarkCategory));
+
+    static cl::opt<std::string, /*ExternalStorage=*/true> remarksMissed(
+        "remarks-filter-missed", cl::desc("Show missed remarks"),
+        cl::location(remarksMissedFilterFlag), cl::init(""),
+        cl::cat(remarkCategory));
+
+    static cl::opt<std::string, /*ExternalStorage=*/true> remarksAnalyse(
+        "remarks-filter-analyse", cl::desc("Show analysis remarks"),
+        cl::location(remarksAnalyseFilterFlag), cl::init(""),
+        cl::cat(remarkCategory));
+
     /// Set the callback to load a pass plugin.
     passPlugins.setCallback([&](const std::string &pluginPath) {
       auto plugin = PassPlugin::load(pluginPath);
@@ -241,23 +296,23 @@ public:
     setHandler([verbosityLevel, showNotes](Diagnostic &diag) {
       auto severity = diag.getSeverity();
       switch (severity) {
-      case DiagnosticSeverity::Error:
+      case mlir::DiagnosticSeverity::Error:
         // failure indicates that the error is not handled by the filter and
         // goes through to the default handler. Therefore, the error can be
         // successfully printed.
         return failure();
-      case DiagnosticSeverity::Warning:
+      case mlir::DiagnosticSeverity::Warning:
         if (verbosityLevel == VerbosityLevel::ErrorsOnly)
           return success();
         else
           return failure();
-      case DiagnosticSeverity::Remark:
+      case mlir::DiagnosticSeverity::Remark:
         if (verbosityLevel == VerbosityLevel::ErrorsOnly ||
             verbosityLevel == VerbosityLevel::ErrorsAndWarnings)
           return success();
         else
           return failure();
-      case DiagnosticSeverity::Note:
+      case mlir::DiagnosticSeverity::Note:
         if (showNotes)
           return failure();
         else
@@ -462,6 +517,41 @@ performActions(raw_ostream &os,
 
   context->enableMultithreading(wasThreadingEnabled);
 
+  remark::RemarkCategories cats{
+      config.getRemarksAllFilter(), config.getRemarksPassedFilter(),
+      config.getRemarksMissedFilter(), config.getRemarksAnalyseFilter(),
+      config.getRemarksFailedFilter()};
+
+  mlir::MLIRContext &ctx = *context;
+
+  switch (config.getRemarkFormat()) {
+  case REMARK_FORMAT_STDOUT:
+    if (failed(mlir::remark::enableOptimizationRemarks(
+            ctx, nullptr, cats, true /*printAsEmitRemarks*/)))
+      return failure();
+    break;
+
+  case REMARK_FORMAT_YAML: {
+    std::string file = config.getRemarksOutputFile().empty()
+                           ? "mlir-remarks.yaml"
+                           : config.getRemarksOutputFile();
+    if (failed(mlir::remark::enableOptimizationRemarksWithLLVMStreamer(
+            ctx, file, llvm::remarks::Format::YAML, cats)))
+      return failure();
+    break;
+  }
+
+  case REMARK_FORMAT_BITSTREAM: {
+    std::string file = config.getRemarksOutputFile().empty()
+                           ? "mlir-remarks.bitstream"
+                           : config.getRemarksOutputFile();
+    if (failed(mlir::remark::enableOptimizationRemarksWithLLVMStreamer(
+            ctx, file, llvm::remarks::Format::Bitstream, cats)))
+      return failure();
+    break;
+  }
+  }
+
   // Prepare the pass manager, applying command-line and reproducer options.
   PassManager pm(op.get()->getName(), PassManager::Nesting::Implicit);
   pm.enableVerifier(config.shouldVerifyPasses());
@@ -523,8 +613,8 @@ processBuffer(raw_ostream &os, std::unique_ptr<MemoryBuffer> ownedBuffer,
       SMLoc());
   sourceMgr->AddNewSourceBuffer(std::move(ownedBuffer), SMLoc());
 
-  // Create a context just for the current buffer. Disable threading on creation
-  // since we'll inject the thread-pool separately.
+  // Create a context just for the current buffer. Disable threading on
+  // creation since we'll inject the thread-pool separately.
   MLIRContext context(registry, MLIRContext::Threading::DISABLED);
   if (threadPool)
     context.setThreadPool(*threadPool);
@@ -669,9 +759,9 @@ LogicalResult mlir::MlirOptMain(int argc, char **argv,
   if (config.shouldListPasses())
     return printRegisteredPassesAndReturn();
 
-  // When reading from stdin and the input is a tty, it is often a user mistake
-  // and the process "appears to be stuck". Print a message to let the user know
-  // about it!
+  // When reading from stdin and the input is a tty, it is often a user
+  // mistake and the process "appears to be stuck". Print a message to let the
+  // user know about it!
   if (inputFilename == "-" &&
       sys::Process::FileDescriptorIsDisplayed(fileno(stdin)))
     llvm::errs() << "(processing input from stdin now, hit ctrl-c/ctrl-d to "
diff --git a/mlir/lib/Transforms/Utils/DialectConversion.cpp b/mlir/lib/Transforms/Utils/DialectConversion.cpp
index df9700f..f7565cf 100644
--- a/mlir/lib/Transforms/Utils/DialectConversion.cpp
+++ b/mlir/lib/Transforms/Utils/DialectConversion.cpp
@@ -3098,8 +3098,154 @@ unsigned OperationLegalizer::applyCostModelToPatterns(
 }
 
 //===----------------------------------------------------------------------===//
+// Reconcile Unrealized Casts
+//===----------------------------------------------------------------------===//
+
+/// Try to reconcile all given UnrealizedConversionCastOps and store the
+/// left-over ops in `remainingCastOps` (if provided). See documentation in
+/// DialectConversion.h for more details.
+/// The `isCastOpOfInterestFn` is used to filter the cast ops to proceed: the
+/// algorithm may visit an operand (or user) which is a cast op, but will not
+/// try to reconcile it if not in the filtered set.
+template <typename RangeT>
+static void reconcileUnrealizedCastsImpl(
+    RangeT castOps,
+    function_ref<bool(UnrealizedConversionCastOp)> isCastOpOfInterestFn,
+    SmallVectorImpl<UnrealizedConversionCastOp> *remainingCastOps) {
+  // A worklist of cast ops to process.
+  SetVector<UnrealizedConversionCastOp> worklist(llvm::from_range, castOps);
+
+  // Helper function that return the unrealized_conversion_cast op that
+  // defines all inputs of the given op (in the same order). Return "nullptr"
+  // if there is no such op.
+  auto getInputCast =
+      [](UnrealizedConversionCastOp castOp) -> UnrealizedConversionCastOp {
+    if (castOp.getInputs().empty())
+      return {};
+    auto inputCastOp =
+        castOp.getInputs().front().getDefiningOp<UnrealizedConversionCastOp>();
+    if (!inputCastOp)
+      return {};
+    if (inputCastOp.getOutputs() != castOp.getInputs())
+      return {};
+    return inputCastOp;
+  };
+
+  // Process ops in the worklist bottom-to-top.
+  while (!worklist.empty()) {
+    UnrealizedConversionCastOp castOp = worklist.pop_back_val();
+
+    // Traverse the chain of input cast ops to see if an op with the same
+    // input types can be found.
+    UnrealizedConversionCastOp nextCast = castOp;
+    while (nextCast) {
+      if (nextCast.getInputs().getTypes() == castOp.getResultTypes()) {
+        if (llvm::any_of(nextCast.getInputs(), [&](Value v) {
+              return v.getDefiningOp() == castOp;
+            })) {
+          // Ran into a cycle.
+          break;
+        }
+
+        // Found a cast where the input types match the output types of the
+        // matched op. We can directly use those inputs.
+        castOp.replaceAllUsesWith(nextCast.getInputs());
+        break;
+      }
+      nextCast = getInputCast(nextCast);
+    }
+  }
+
+  // A set of all alive cast ops. I.e., ops whose results are (transitively)
+  // used by an op that is not a cast op.
+  DenseSet<Operation *> liveOps;
+
+  // Helper function that marks the given op and transitively reachable input
+  // cast ops as alive.
+  auto markOpLive = [&](Operation *rootOp) {
+    SmallVector<Operation *> worklist;
+    worklist.push_back(rootOp);
+    while (!worklist.empty()) {
+      Operation *op = worklist.pop_back_val();
+      if (liveOps.insert(op).second) {
+        // Successfully inserted: process reachable input cast ops.
+        for (Value v : op->getOperands())
+          if (auto castOp = v.getDefiningOp<UnrealizedConversionCastOp>())
+            if (isCastOpOfInterestFn(castOp))
+              worklist.push_back(castOp);
+      }
+    }
+  };
+
+  // Find all alive cast ops.
+  for (UnrealizedConversionCastOp op : castOps) {
+    // The op may have been marked live already as being an operand of another
+    // live cast op.
+    if (liveOps.contains(op.getOperation()))
+      continue;
+    // If any of the users is not a cast op, mark the current op (and its
+    // input ops) as live.
+    if (llvm::any_of(op->getUsers(), [&](Operation *user) {
+          auto castOp = dyn_cast<UnrealizedConversionCastOp>(user);
+          return !castOp || !isCastOpOfInterestFn(castOp);
+        }))
+      markOpLive(op);
+  }
+
+  // Erase all dead cast ops.
+  for (UnrealizedConversionCastOp op : castOps) {
+    if (liveOps.contains(op)) {
+      // Op is alive and was not erased. Add it to the remaining cast ops.
+      if (remainingCastOps)
+        remainingCastOps->push_back(op);
+      continue;
+    }
+
+    // Op is dead. Erase it.
+    op->dropAllUses();
+    op->erase();
+  }
+}
+
+void mlir::reconcileUnrealizedCasts(
+    ArrayRef<UnrealizedConversionCastOp> castOps,
+    SmallVectorImpl<UnrealizedConversionCastOp> *remainingCastOps) {
+  // Set of all cast ops for faster lookups.
+  DenseSet<UnrealizedConversionCastOp> castOpSet;
+  for (UnrealizedConversionCastOp op : castOps)
+    castOpSet.insert(op);
+  reconcileUnrealizedCasts(castOpSet, remainingCastOps);
+}
+
+void mlir::reconcileUnrealizedCasts(
+    const DenseSet<UnrealizedConversionCastOp> &castOps,
+    SmallVectorImpl<UnrealizedConversionCastOp> *remainingCastOps) {
+  reconcileUnrealizedCastsImpl(
+      llvm::make_range(castOps.begin(), castOps.end()),
+      [&](UnrealizedConversionCastOp castOp) {
+        return castOps.contains(castOp);
+      },
+      remainingCastOps);
+}
+
+namespace mlir {
+static void reconcileUnrealizedCasts(
+    const DenseMap<UnrealizedConversionCastOp, UnresolvedMaterializationInfo>
+        &castOps,
+    SmallVectorImpl<UnrealizedConversionCastOp> *remainingCastOps) {
+  reconcileUnrealizedCastsImpl(
+      castOps.keys(),
+      [&](UnrealizedConversionCastOp castOp) {
+        return castOps.contains(castOp);
+      },
+      remainingCastOps);
+}
+} // namespace mlir
+
+//===----------------------------------------------------------------------===//
 // OperationConverter
 //===----------------------------------------------------------------------===//
+
 namespace {
 enum OpConversionMode {
   /// In this mode, the conversion will ignore failed conversions to allow
@@ -3264,18 +3410,13 @@ LogicalResult OperationConverter::convertOperations(ArrayRef<Operation *> ops) {
   // After a successful conversion, apply rewrites.
   rewriterImpl.applyRewrites();
 
-  // Gather all unresolved materializations.
-  SmallVector<UnrealizedConversionCastOp> allCastOps;
-  const DenseMap<UnrealizedConversionCastOp, UnresolvedMaterializationInfo>
-      &materializations = rewriterImpl.unresolvedMaterializations;
-  for (auto it : materializations)
-    allCastOps.push_back(it.first);
-
   // Reconcile all UnrealizedConversionCastOps that were inserted by the
-  // dialect conversion frameworks. (Not the one that were inserted by
+  // dialect conversion frameworks. (Not the ones that were inserted by
   // patterns.)
+  const DenseMap<UnrealizedConversionCastOp, UnresolvedMaterializationInfo>
+      &materializations = rewriterImpl.unresolvedMaterializations;
   SmallVector<UnrealizedConversionCastOp> remainingCastOps;
-  reconcileUnrealizedCasts(allCastOps, &remainingCastOps);
+  reconcileUnrealizedCasts(materializations, &remainingCastOps);
 
   // Drop markers.
   for (UnrealizedConversionCastOp castOp : remainingCastOps)
@@ -3300,79 +3441,6 @@ LogicalResult OperationConverter::convertOperations(ArrayRef<Operation *> ops) {
 }
 
 //===----------------------------------------------------------------------===//
-// Reconcile Unrealized Casts
-//===----------------------------------------------------------------------===//
-
-void mlir::reconcileUnrealizedCasts(
-    ArrayRef<UnrealizedConversionCastOp> castOps,
-    SmallVectorImpl<UnrealizedConversionCastOp> *remainingCastOps) {
-  SetVector<UnrealizedConversionCastOp> worklist(llvm::from_range, castOps);
-  // This set is maintained only if `remainingCastOps` is provided.
-  DenseSet<Operation *> erasedOps;
-
-  // Helper function that adds all operands to the worklist that are an
-  // unrealized_conversion_cast op result.
-  auto enqueueOperands = [&](UnrealizedConversionCastOp castOp) {
-    for (Value v : castOp.getInputs())
-      if (auto inputCastOp = v.getDefiningOp<UnrealizedConversionCastOp>())
-        worklist.insert(inputCastOp);
-  };
-
-  // Helper function that return the unrealized_conversion_cast op that
-  // defines all inputs of the given op (in the same order). Return "nullptr"
-  // if there is no such op.
-  auto getInputCast =
-      [](UnrealizedConversionCastOp castOp) -> UnrealizedConversionCastOp {
-    if (castOp.getInputs().empty())
-      return {};
-    auto inputCastOp =
-        castOp.getInputs().front().getDefiningOp<UnrealizedConversionCastOp>();
-    if (!inputCastOp)
-      return {};
-    if (inputCastOp.getOutputs() != castOp.getInputs())
-      return {};
-    return inputCastOp;
-  };
-
-  // Process ops in the worklist bottom-to-top.
-  while (!worklist.empty()) {
-    UnrealizedConversionCastOp castOp = worklist.pop_back_val();
-    if (castOp->use_empty()) {
-      // DCE: If the op has no users, erase it. Add the operands to the
-      // worklist to find additional DCE opportunities.
-      enqueueOperands(castOp);
-      if (remainingCastOps)
-        erasedOps.insert(castOp.getOperation());
-      castOp->erase();
-      continue;
-    }
-
-    // Traverse the chain of input cast ops to see if an op with the same
-    // input types can be found.
-    UnrealizedConversionCastOp nextCast = castOp;
-    while (nextCast) {
-      if (nextCast.getInputs().getTypes() == castOp.getResultTypes()) {
-        // Found a cast where the input types match the output types of the
-        // matched op. We can directly use those inputs and the matched op can
-        // be removed.
-        enqueueOperands(castOp);
-        castOp.replaceAllUsesWith(nextCast.getInputs());
-        if (remainingCastOps)
-          erasedOps.insert(castOp.getOperation());
-        castOp->erase();
-        break;
-      }
-      nextCast = getInputCast(nextCast);
-    }
-  }
-
-  if (remainingCastOps)
-    for (UnrealizedConversionCastOp op : castOps)
-      if (!erasedOps.contains(op.getOperation()))
-        remainingCastOps->push_back(op);
-}
-
-//===----------------------------------------------------------------------===//
 // Type Conversion
 //===----------------------------------------------------------------------===//
 
diff --git a/mlir/test/Conversion/AMDGPUToROCDL/permlane.mlir b/mlir/test/Conversion/AMDGPUToROCDL/permlane.mlir
index aae2b1d..a92321d 100644..100755
--- a/mlir/test/Conversion/AMDGPUToROCDL/permlane.mlir
+++ b/mlir/test/Conversion/AMDGPUToROCDL/permlane.mlir
@@ -4,8 +4,11 @@
 // CHECK-SAME: (%[[ARG0:.*]]: i32)
 func.func @test_permlane16_i32(%arg0 : i32) -> i32 {
 // CHECK:  %[[PERM:.*]] = rocdl.permlane16.swap %[[ARG0]], %[[ARG0]], false, false : (i32, i32) -> <(i32, i32)>
-// CHECK:  %[[RES:.*]] = llvm.extractvalue %[[PERM]][0] : !llvm.struct<(i32, i32)>
-// CHECK:  return %[[RES]] : i32
+// CHECK:  %[[E0:.*]] = llvm.extractvalue %[[PERM]][0] : !llvm.struct<(i32, i32)>
+// CHECK:  %[[E1:.*]] = llvm.extractvalue %[[PERM]][1] : !llvm.struct<(i32, i32)>
+// CHECK:  %[[CMP:.*]] = llvm.icmp "eq" %[[E0]], %[[ARG0]] : i32
+// CHECK:  %[[SEL:.*]] = llvm.select %[[CMP]], %[[E1]], %[[E0]] : i1, i32
+// CHECK:  return %[[SEL]] : i32
   %0 = amdgpu.permlane_swap %arg0 16 : i32
   return %0 : i32
 }
@@ -14,8 +17,11 @@ func.func @test_permlane16_i32(%arg0 : i32) -> i32 {
 // CHECK-SAME: (%[[ARG0:.*]]: i32)
 func.func @test_permlane16_i32_optional_attr(%arg0 : i32) -> i32 {
 // CHECK:  %[[PERM:.*]] = rocdl.permlane16.swap %[[ARG0]], %[[ARG0]], true, true : (i32, i32) -> <(i32, i32)>
-// CHECK:  %[[RES:.*]] = llvm.extractvalue %[[PERM]][0] : !llvm.struct<(i32, i32)>
-// CHECK:  return %[[RES]] : i32
+// CHECK:  %[[E0:.*]] = llvm.extractvalue %[[PERM]][0] : !llvm.struct<(i32, i32)>
+// CHECK:  %[[E1:.*]] = llvm.extractvalue %[[PERM]][1] : !llvm.struct<(i32, i32)>
+// CHECK:  %[[CMP:.*]] = llvm.icmp "eq" %[[E0]], %[[ARG0]] : i32
+// CHECK:  %[[SEL:.*]] = llvm.select %[[CMP]], %[[E1]], %[[E0]] : i1, i32
+// CHECK:  return %[[SEL]] : i32
   %0 = amdgpu.permlane_swap %arg0 16 { fetch_inactive = true, bound_ctrl = true }  : i32
   return %0 : i32
 }
@@ -24,8 +30,11 @@ func.func @test_permlane16_i32_optional_attr(%arg0 : i32) -> i32 {
 // CHECK-SAME: (%[[ARG0:.*]]: i32)
 func.func @test_permlane32_i32(%arg0 : i32) -> i32 {
 // CHECK:  %[[PERM:.*]] = rocdl.permlane32.swap %[[ARG0]], %[[ARG0]], false, false : (i32, i32) -> <(i32, i32)>
-// CHECK:  %[[RES:.*]] = llvm.extractvalue %[[PERM]][0] : !llvm.struct<(i32, i32)>
-// CHECK:  return %[[RES]] : i32
+// CHECK:  %[[E0:.*]] = llvm.extractvalue %[[PERM]][0] : !llvm.struct<(i32, i32)>
+// CHECK:  %[[E1:.*]] = llvm.extractvalue %[[PERM]][1] : !llvm.struct<(i32, i32)>
+// CHECK:  %[[CMP:.*]] = llvm.icmp "eq" %[[E0]], %[[ARG0]] : i32
+// CHECK:  %[[SEL:.*]] = llvm.select %[[CMP]], %[[E1]], %[[E0]] : i1, i32
+// CHECK:  return %[[SEL]] : i32
   %0 = amdgpu.permlane_swap %arg0 32 : i32
   return %0 : i32
 }
@@ -35,8 +44,11 @@ func.func @test_permlane32_i32(%arg0 : i32) -> i32 {
 func.func @test_permlane16_f32(%arg0 : f32) -> f32 {
 // CHECK:  %[[CAST:.*]] = llvm.bitcast %[[ARG0]] : f32 to i32
 // CHECK:  %[[PERM:.*]] = rocdl.permlane16.swap %[[CAST]], %[[CAST]], false, false : (i32, i32) -> <(i32, i32)>
-// CHECK:  %[[RES:.*]] = llvm.extractvalue %[[PERM]][0] : !llvm.struct<(i32, i32)>
-// CHECK:  %[[RES_CAST:.*]] = llvm.bitcast %[[RES]] : i32 to f32
+// CHECK:  %[[E0:.*]] = llvm.extractvalue %[[PERM]][0] : !llvm.struct<(i32, i32)>
+// CHECK:  %[[E1:.*]] = llvm.extractvalue %[[PERM]][1] : !llvm.struct<(i32, i32)>
+// CHECK:  %[[CMP:.*]] = llvm.icmp "eq" %[[E0]], %[[CAST]] : i32
+// CHECK:  %[[SEL:.*]] = llvm.select %[[CMP]], %[[E1]], %[[E0]] : i1, i32
+// CHECK:  %[[RES_CAST:.*]] = llvm.bitcast %[[SEL]] : i32 to f32
 // CHECK:  return %[[RES_CAST]] : f32
   %0 = amdgpu.permlane_swap %arg0 16 : f32
   return %0 : f32
@@ -47,8 +59,11 @@ func.func @test_permlane16_f32(%arg0 : f32) -> f32 {
 func.func @test_permlane32_f32(%arg0 : f32) -> f32 {
 // CHECK:  %[[CAST:.*]] = llvm.bitcast %[[ARG0]] : f32 to i32
 // CHECK:  %[[PERM:.*]] = rocdl.permlane32.swap %[[CAST]], %[[CAST]], false, false : (i32, i32) -> <(i32, i32)>
-// CHECK:  %[[RES:.*]] = llvm.extractvalue %[[PERM]][0] : !llvm.struct<(i32, i32)>
-// CHECK:  %[[RES_CAST:.*]] = llvm.bitcast %[[RES]] : i32 to f32
+// CHECK:  %[[E0:.*]] = llvm.extractvalue %[[PERM]][0] : !llvm.struct<(i32, i32)>
+// CHECK:  %[[E1:.*]] = llvm.extractvalue %[[PERM]][1] : !llvm.struct<(i32, i32)>
+// CHECK:  %[[CMP:.*]] = llvm.icmp "eq" %[[E0]], %[[CAST]] : i32
+// CHECK:  %[[SEL:.*]] = llvm.select %[[CMP]], %[[E1]], %[[E0]] : i1, i32
+// CHECK:  %[[RES_CAST:.*]] = llvm.bitcast %[[SEL]] : i32 to f32
 // CHECK:  return %[[RES_CAST]] : f32
   %0 = amdgpu.permlane_swap %arg0 32 : f32
   return %0 : f32
@@ -60,8 +75,11 @@ func.func @test_permlane16_f16(%arg0 : f16) -> f16 {
 // CHECK:  %[[CAST:.*]] = llvm.bitcast %[[ARG0]] : f16 to i16
 // CHECK:  %[[ZEXT:.*]] = llvm.zext %[[CAST]] : i16 to i32
 // CHECK:  %[[PERM:.*]] = rocdl.permlane16.swap %[[ZEXT]], %[[ZEXT]], false, false : (i32, i32) -> <(i32, i32)>
-// CHECK:  %[[RES:.*]] = llvm.extractvalue %[[PERM]][0] : !llvm.struct<(i32, i32)>
-// CHECK:  %[[TRUNC:.*]] = llvm.trunc %[[RES]] : i32 to i16
+// CHECK:  %[[E0:.*]] = llvm.extractvalue %[[PERM]][0] : !llvm.struct<(i32, i32)>
+// CHECK:  %[[E1:.*]] = llvm.extractvalue %[[PERM]][1] : !llvm.struct<(i32, i32)>
+// CHECK:  %[[CMP:.*]] = llvm.icmp "eq" %[[E0]], %[[ZEXT]] : i32
+// CHECK:  %[[SEL:.*]] = llvm.select %[[CMP]], %[[E1]], %[[E0]] : i1, i32
+// CHECK:  %[[TRUNC:.*]] = llvm.trunc %[[SEL]] : i32 to i16
 // CHECK:  %[[RES_CAST:.*]] = llvm.bitcast %[[TRUNC]] : i16 to f16
 // CHECK:  return %[[RES_CAST]] : f16
   %0 = amdgpu.permlane_swap %arg0 16 : f16
@@ -74,8 +92,11 @@ func.func @test_permlane32_f16(%arg0 : f16) -> f16 {
 // CHECK:  %[[CAST:.*]] = llvm.bitcast %[[ARG0]] : f16 to i16
 // CHECK:  %[[ZEXT:.*]] = llvm.zext %[[CAST]] : i16 to i32
 // CHECK:  %[[PERM:.*]] = rocdl.permlane32.swap %[[ZEXT]], %[[ZEXT]], false, false : (i32, i32) -> <(i32, i32)>
-// CHECK:  %[[RES:.*]] = llvm.extractvalue %[[PERM]][0] : !llvm.struct<(i32, i32)>
-// CHECK:  %[[TRUNC:.*]] = llvm.trunc %[[RES]] : i32 to i16
+// CHECK:  %[[E0:.*]] = llvm.extractvalue %[[PERM]][0] : !llvm.struct<(i32, i32)>
+// CHECK:  %[[E1:.*]] = llvm.extractvalue %[[PERM]][1] : !llvm.struct<(i32, i32)>
+// CHECK:  %[[CMP:.*]] = llvm.icmp "eq" %[[E0]], %[[ZEXT]] : i32
+// CHECK:  %[[SEL:.*]] = llvm.select %[[CMP]], %[[E1]], %[[E0]] : i1, i32
+// CHECK:  %[[TRUNC:.*]] = llvm.trunc %[[SEL]] : i32 to i16
 // CHECK:  %[[RES_CAST:.*]] = llvm.bitcast %[[TRUNC]] : i16 to f16
 // CHECK:  return %[[RES_CAST]] : f16
   %0 = amdgpu.permlane_swap %arg0 32 : f16
@@ -90,10 +111,16 @@ func.func @test_permlane16_2xi32(%arg0 : vector<2xi32>) -> vector<2xi32> {
 // CHECK-DAG:  %[[C0:.*]] = llvm.mlir.constant(0 : i32) : i32
 // CHECK:      %[[ELEM0:.*]] = llvm.extractelement %[[ARG0]][%[[C0]] : i32] : vector<2xi32>
 // CHECK:      %[[ELEM1:.*]] = llvm.extractelement %[[ARG0]][%[[C1]] : i32] : vector<2xi32>
-// CHECK:      %[[PERM0_TUPLE:.*]] = rocdl.permlane16.swap %[[ELEM0]], %[[ELEM0]], false, false : (i32, i32) -> <(i32, i32)>
-// CHECK:      %[[PERM0:.*]] = llvm.extractvalue %[[PERM0_TUPLE]][0] : !llvm.struct<(i32, i32)>
-// CHECK:      %[[PERM1_TUPLE:.*]] = rocdl.permlane16.swap %[[ELEM1]], %[[ELEM1]], false, false : (i32, i32) -> <(i32, i32)>
-// CHECK:      %[[PERM1:.*]] = llvm.extractvalue %[[PERM1_TUPLE]][0] : !llvm.struct<(i32, i32)>
+// CHECK:      %[[T0:.*]] = rocdl.permlane16.swap %[[ELEM0]], %[[ELEM0]], false, false : (i32, i32) -> <(i32, i32)>
+// CHECK:      %[[T0_0:.*]] = llvm.extractvalue %[[T0]][0] : !llvm.struct<(i32, i32)>
+// CHECK:      %[[T0_1:.*]] = llvm.extractvalue %[[T0]][1] : !llvm.struct<(i32, i32)>
+// CHECK:      %[[CMP0:.*]] = llvm.icmp "eq" %[[T0_0]], %[[ELEM0]] : i32
+// CHECK:      %[[PERM0:.*]] = llvm.select %[[CMP0]], %[[T0_1]], %[[T0_0]] : i1, i32
+// CHECK:      %[[T1:.*]] = rocdl.permlane16.swap %[[ELEM1]], %[[ELEM1]], false, false : (i32, i32) -> <(i32, i32)>
+// CHECK:      %[[T1_0:.*]] = llvm.extractvalue %[[T1]][0] : !llvm.struct<(i32, i32)>
+// CHECK:      %[[T1_1:.*]] = llvm.extractvalue %[[T1]][1] : !llvm.struct<(i32, i32)>
+// CHECK:      %[[CMP1:.*]] = llvm.icmp "eq" %[[T1_0]], %[[ELEM1]] : i32
+// CHECK:      %[[PERM1:.*]] = llvm.select %[[CMP1]], %[[T1_1]], %[[T1_0]] : i1, i32
 // CHECK:      %[[VEC_INSERT0:.*]] = llvm.insertelement %[[PERM0]], %[[POISON]][%[[C0]] : i32] : vector<2xi32>
 // CHECK:      %[[VEC_INSERT1:.*]] = llvm.insertelement %[[PERM1]], %[[VEC_INSERT0]][%[[C1]] : i32] : vector<2xi32>
 // CHECK:      return %[[VEC_INSERT1]] : vector<2xi32>
@@ -109,10 +136,16 @@ func.func @test_permlane32_2xi32(%arg0 : vector<2xi32>) -> vector<2xi32> {
 // CHECK-DAG:  %[[C0:.*]] = llvm.mlir.constant(0 : i32) : i32
 // CHECK:      %[[ELEM0:.*]] = llvm.extractelement %[[ARG0]][%[[C0]] : i32] : vector<2xi32>
 // CHECK:      %[[ELEM1:.*]] = llvm.extractelement %[[ARG0]][%[[C1]] : i32] : vector<2xi32>
-// CHECK:      %[[PERM0_TUPLE:.*]] = rocdl.permlane32.swap %[[ELEM0]], %[[ELEM0]], false, false : (i32, i32) -> <(i32, i32)>
-// CHECK:      %[[PERM0:.*]] = llvm.extractvalue %[[PERM0_TUPLE]][0] : !llvm.struct<(i32, i32)>
-// CHECK:      %[[PERM1_TUPLE:.*]] = rocdl.permlane32.swap %[[ELEM1]], %[[ELEM1]], false, false : (i32, i32) -> <(i32, i32)>
-// CHECK:      %[[PERM1:.*]] = llvm.extractvalue %[[PERM1_TUPLE]][0] : !llvm.struct<(i32, i32)>
+// CHECK:      %[[T0:.*]] = rocdl.permlane32.swap %[[ELEM0]], %[[ELEM0]], false, false : (i32, i32) -> <(i32, i32)>
+// CHECK:      %[[T0_0:.*]] = llvm.extractvalue %[[T0]][0] : !llvm.struct<(i32, i32)>
+// CHECK:      %[[T0_1:.*]] = llvm.extractvalue %[[T0]][1] : !llvm.struct<(i32, i32)>
+// CHECK:      %[[CMP0:.*]] = llvm.icmp "eq" %[[T0_0]], %[[ELEM0]] : i32
+// CHECK:      %[[PERM0:.*]] = llvm.select %[[CMP0]], %[[T0_1]], %[[T0_0]] : i1, i32
+// CHECK:      %[[T1:.*]] = rocdl.permlane32.swap %[[ELEM1]], %[[ELEM1]], false, false : (i32, i32) -> <(i32, i32)>
+// CHECK:      %[[T1_0:.*]] = llvm.extractvalue %[[T1]][0] : !llvm.struct<(i32, i32)>
+// CHECK:      %[[T1_1:.*]] = llvm.extractvalue %[[T1]][1] : !llvm.struct<(i32, i32)>
+// CHECK:      %[[CMP1:.*]] = llvm.icmp "eq" %[[T1_0]], %[[ELEM1]] : i32
+// CHECK:      %[[PERM1:.*]] = llvm.select %[[CMP1]], %[[T1_1]], %[[T1_0]] : i1, i32
 // CHECK:      %[[VEC_INSERT0:.*]] = llvm.insertelement %[[PERM0]], %[[POISON]][%[[C0]] : i32] : vector<2xi32>
 // CHECK:      %[[VEC_INSERT1:.*]] = llvm.insertelement %[[PERM1]], %[[VEC_INSERT0]][%[[C1]] : i32] : vector<2xi32>
 // CHECK:      return %[[VEC_INSERT1]] : vector<2xi32>
@@ -130,9 +163,15 @@ func.func @test_permlane16_4xf16(%arg0 : vector<4xf16>) -> vector<4xf16> {
 // CHECK:      %[[ELEM0:.*]] = llvm.extractelement %[[CAST1]][%[[C0]] : i32] : vector<2xi32>
 // CHECK:      %[[ELEM1:.*]] = llvm.extractelement %[[CAST1]][%[[C1]] : i32] : vector<2xi32>
 // CHECK:      %[[PERM0_TUPLE:.*]] = rocdl.permlane16.swap %[[ELEM0]], %[[ELEM0]], false, false : (i32, i32) -> <(i32, i32)>
-// CHECK:      %[[PERM0:.*]] = llvm.extractvalue %[[PERM0_TUPLE]][0] : !llvm.struct<(i32, i32)>
+// CHECK:      %[[PERM0_E0:.*]] = llvm.extractvalue %[[PERM0_TUPLE]][0] : !llvm.struct<(i32, i32)>
+// CHECK:      %[[PERM0_E1:.*]] = llvm.extractvalue %[[PERM0_TUPLE]][1] : !llvm.struct<(i32, i32)>
+// CHECK:      %[[CMP0:.*]] = llvm.icmp "eq" %[[PERM0_E0]], %[[ELEM0]] : i32
+// CHECK:      %[[PERM0:.*]] = llvm.select %[[CMP0]], %[[PERM0_E1]], %[[PERM0_E0]] : i1, i32
 // CHECK:      %[[PERM1_TUPLE:.*]] = rocdl.permlane16.swap %[[ELEM1]], %[[ELEM1]], false, false : (i32, i32) -> <(i32, i32)>
-// CHECK:      %[[PERM1:.*]] = llvm.extractvalue %[[PERM1_TUPLE]][0] : !llvm.struct<(i32, i32)>
+// CHECK:      %[[PERM1_E0:.*]] = llvm.extractvalue %[[PERM1_TUPLE]][0] : !llvm.struct<(i32, i32)>
+// CHECK:      %[[PERM1_E1:.*]] = llvm.extractvalue %[[PERM1_TUPLE]][1] : !llvm.struct<(i32, i32)>
+// CHECK:      %[[CMP1:.*]] = llvm.icmp "eq" %[[PERM1_E0]], %[[ELEM1]] : i32
+// CHECK:      %[[PERM1:.*]] = llvm.select %[[CMP1]], %[[PERM1_E1]], %[[PERM1_E0]] : i1, i32
 // CHECK:      %[[VEC_INSERT0:.*]] = llvm.insertelement %[[PERM0]], %[[POISON]][%[[C0]] : i32] : vector<2xi32>
 // CHECK:      %[[VEC_INSERT1:.*]] = llvm.insertelement %[[PERM1]], %[[VEC_INSERT0]][%[[C1]] : i32] : vector<2xi32>
 // CHECK:      %[[CAST2:.*]] = llvm.bitcast %[[VEC_INSERT1]] : vector<2xi32> to vector<4xf16>
@@ -151,9 +190,15 @@ func.func @test_permlane32_4xf16(%arg0 : vector<4xf16>) -> vector<4xf16> {
 // CHECK:      %[[ELEM0:.*]] = llvm.extractelement %[[CAST1]][%[[C0]] : i32] : vector<2xi32>
 // CHECK:      %[[ELEM1:.*]] = llvm.extractelement %[[CAST1]][%[[C1]] : i32] : vector<2xi32>
 // CHECK:      %[[PERM0_TUPLE:.*]] = rocdl.permlane32.swap %[[ELEM0]], %[[ELEM0]], false, false : (i32, i32) -> <(i32, i32)>
-// CHECK:      %[[PERM0:.*]] = llvm.extractvalue %[[PERM0_TUPLE]][0] : !llvm.struct<(i32, i32)>
+// CHECK:      %[[PERM0_E0:.*]] = llvm.extractvalue %[[PERM0_TUPLE]][0] : !llvm.struct<(i32, i32)>
+// CHECK:      %[[PERM0_E1:.*]] = llvm.extractvalue %[[PERM0_TUPLE]][1] : !llvm.struct<(i32, i32)>
+// CHECK:      %[[CMP0:.*]] = llvm.icmp "eq" %[[PERM0_E0]], %[[ELEM0]] : i32
+// CHECK:      %[[PERM0:.*]] = llvm.select %[[CMP0]], %[[PERM0_E1]], %[[PERM0_E0]] : i1, i32
 // CHECK:      %[[PERM1_TUPLE:.*]] = rocdl.permlane32.swap %[[ELEM1]], %[[ELEM1]], false, false : (i32, i32) -> <(i32, i32)>
-// CHECK:      %[[PERM1:.*]] = llvm.extractvalue %[[PERM1_TUPLE]][0] : !llvm.struct<(i32, i32)>
+// CHECK:      %[[PERM1_E0:.*]] = llvm.extractvalue %[[PERM1_TUPLE]][0] : !llvm.struct<(i32, i32)>
+// CHECK:      %[[PERM1_E1:.*]] = llvm.extractvalue %[[PERM1_TUPLE]][1] : !llvm.struct<(i32, i32)>
+// CHECK:      %[[CMP1:.*]] = llvm.icmp "eq" %[[PERM1_E0]], %[[ELEM1]] : i32
+// CHECK:      %[[PERM1:.*]] = llvm.select %[[CMP1]], %[[PERM1_E1]], %[[PERM1_E0]] : i1, i32
 // CHECK:      %[[VEC_INSERT0:.*]] = llvm.insertelement %[[PERM0]], %[[POISON]][%[[C0]] : i32] : vector<2xi32>
 // CHECK:      %[[VEC_INSERT1:.*]] = llvm.insertelement %[[PERM1]], %[[VEC_INSERT0]][%[[C1]] : i32] : vector<2xi32>
 // CHECK:      %[[CAST2:.*]] = llvm.bitcast %[[VEC_INSERT1]] : vector<2xi32> to vector<4xf16>
diff --git a/mlir/test/Conversion/ArmSMEToLLVM/arm-sme-to-llvm.mlir b/mlir/test/Conversion/ArmSMEToLLVM/arm-sme-to-llvm.mlir
index 6a4d77e..fd89102 100644
--- a/mlir/test/Conversion/ArmSMEToLLVM/arm-sme-to-llvm.mlir
+++ b/mlir/test/Conversion/ArmSMEToLLVM/arm-sme-to-llvm.mlir
@@ -586,9 +586,10 @@ func.func @arm_sme_extract_tile_slice_ver_i128(%tile_slice_index : index) -> vec
 // -----
 
 // CHECK-LABEL: @arm_sme_streaming_vl_bytes
-// CHECK: %[[COUNT:.*]] = "arm_sme.intr.cntsb"() : () -> i64
-// CHECK: %[[INDEX_COUNT:.*]] = arith.index_cast %[[COUNT]] : i64 to index
-// CHECK: return %[[INDEX_COUNT]] : index
+// CHECK: %[[CONST:.*]] = arith.constant 8 : index
+// CHECK: %[[CNTSD:.*]] = "arm_sme.intr.cntsd"() : () -> i64
+// CHECK: %[[CNTSD_IDX:.*]] = arith.index_cast %[[CNTSD]] : i64 to index
+// CHECK: %[[MUL:.*]] = arith.muli %[[CNTSD_IDX]], %[[CONST]] : index
 func.func @arm_sme_streaming_vl_bytes() -> index {
   %svl_b = arm_sme.streaming_vl <byte>
   return %svl_b : index
@@ -597,7 +598,10 @@ func.func @arm_sme_streaming_vl_bytes() -> index {
 // -----
 
 // CHECK-LABEL: @arm_sme_streaming_vl_half_words
-// CHECK: "arm_sme.intr.cntsh"() : () -> i64
+// CHECK: %[[CONST:.*]] = arith.constant 4 : index
+// CHECK: %[[CNTSD:.*]] = "arm_sme.intr.cntsd"() : () -> i64
+// CHECK: %[[CNTSD_IDX:.*]] = arith.index_cast %[[CNTSD]] : i64 to index
+// CHECK: %[[MUL:.*]] = arith.muli %[[CNTSD_IDX]], %[[CONST]] : index
 func.func @arm_sme_streaming_vl_half_words() -> index {
   %svl_h = arm_sme.streaming_vl <half>
   return %svl_h : index
@@ -606,7 +610,10 @@ func.func @arm_sme_streaming_vl_half_words() -> index {
 // -----
 
 // CHECK-LABEL: @arm_sme_streaming_vl_words
-// CHECK: "arm_sme.intr.cntsw"() : () -> i64
+// CHECK: %[[CONST:.*]] = arith.constant 2 : index
+// CHECK: %[[CNTSD:.*]] = "arm_sme.intr.cntsd"() : () -> i64
+// CHECK: %[[CNTSD_IDX:.*]] = arith.index_cast %[[CNTSD]] : i64 to index
+// CHECK: %[[MUL:.*]] = arith.muli %[[CNTSD_IDX]], %[[CONST]] : index
 func.func @arm_sme_streaming_vl_words() -> index {
   %svl_w = arm_sme.streaming_vl <word>
   return %svl_w : index
diff --git a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir
index c6261b37..ef631ce 100644..100755
--- a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir
+++ b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir
@@ -749,13 +749,19 @@ gpu.module @test_module {
     %shfl1, %pred1 = gpu.shuffle xor %arg0, %arg1, %arg4 : f32
     // CHECK: %[[#CAST_VALUE:]] = llvm.bitcast %[[#VALUE]] : f32 to i32
     // CHECK: %[[#PERMUTE:]] = rocdl.permlane16.swap %[[#CAST_VALUE]], %[[#CAST_VALUE]], false, false : (i32, i32) -> <(i32, i32)>
-    // CHECK: %[[#EXTRACT:]] = llvm.extractvalue %[[#PERMUTE:]][0] : !llvm.struct<(i32, i32)>
-    // CHECK: %[[#CAST_SHFL_VALUE:]] = llvm.bitcast %[[#EXTRACT]] : i32 to f32
+    // CHECK: %[[#EXTRACT0:]] = llvm.extractvalue %[[#PERMUTE:]][0] : !llvm.struct<(i32, i32)>
+    // CHECK: %[[#EXTRACT1:]] = llvm.extractvalue %[[#PERMUTE:]][1] : !llvm.struct<(i32, i32)>
+    // CHECK: %[[#CMP:]] = llvm.icmp "eq" %[[#EXTRACT0]], %[[#CAST_VALUE]] : i32
+    // CHECK: %[[#SEL:]] = llvm.select %[[#CMP]], %[[#EXTRACT1]], %[[#EXTRACT0]] : i1, i32
+    // CHECK: %[[#CAST_SHFL_VALUE:]] = llvm.bitcast %[[#SEL]] : i32 to f32
     %shfl2, %pred2 = gpu.shuffle xor %arg0, %arg2, %arg4 : f32
     // CHECK: %[[#CAST_VALUE:]] = llvm.bitcast %[[#VALUE]] : f32 to i32
     // CHECK: %[[#PERMUTE:]] = rocdl.permlane32.swap %[[#CAST_VALUE]], %[[#CAST_VALUE]], false, false : (i32, i32) -> <(i32, i32)>
-    // CHECK: %[[#EXTRACT:]] = llvm.extractvalue %[[#PERMUTE:]][0] : !llvm.struct<(i32, i32)>
-    // CHECK: %[[#CAST_SHFL_VALUE:]] = llvm.bitcast %[[#EXTRACT]] : i32 to f32
+    // CHECK: %[[#EXTRACT0:]] = llvm.extractvalue %[[#PERMUTE:]][0] : !llvm.struct<(i32, i32)>
+    // CHECK: %[[#EXTRACT1:]] = llvm.extractvalue %[[#PERMUTE:]][1] : !llvm.struct<(i32, i32)>
+    // CHECK: %[[#CMP:]] = llvm.icmp "eq" %[[#EXTRACT0]], %[[#CAST_VALUE]] : i32
+    // CHECK: %[[#SEL:]] = llvm.select %[[#CMP]], %[[#EXTRACT1]], %[[#EXTRACT0]] : i1, i32
+    // CHECK: %[[#CAST_SHFL_VALUE:]] = llvm.bitcast %[[#SEL]] : i32 to f32
     %shfl3, %pred3 = gpu.shuffle xor  %arg0, %arg3, %arg4 : f32
     func.return %shfl1, %shfl2, %shfl3 : f32, f32, f32
   }
diff --git a/mlir/test/Conversion/ReconcileUnrealizedCasts/reconcile-unrealized-casts.mlir b/mlir/test/Conversion/ReconcileUnrealizedCasts/reconcile-unrealized-casts.mlir
index 3573114..ac5ca32 100644
--- a/mlir/test/Conversion/ReconcileUnrealizedCasts/reconcile-unrealized-casts.mlir
+++ b/mlir/test/Conversion/ReconcileUnrealizedCasts/reconcile-unrealized-casts.mlir
@@ -194,3 +194,53 @@ func.func @emptyCast() -> index {
     %0 = builtin.unrealized_conversion_cast to index
     return %0 : index
 }
+
+// -----
+
+// CHECK-LABEL: test.graph_region
+//  CHECK-NEXT:   "test.return"() : () -> ()
+test.graph_region {
+  %0 = builtin.unrealized_conversion_cast %2 : i32 to i64
+  %1 = builtin.unrealized_conversion_cast %0 : i64 to i16
+  %2 = builtin.unrealized_conversion_cast %1 : i16 to i32
+  "test.return"() : () -> ()
+}
+
+// -----
+
+// CHECK-LABEL: test.graph_region
+//  CHECK-NEXT:   %[[cast0:.*]] = builtin.unrealized_conversion_cast %[[cast2:.*]] : i32 to i64
+//  CHECK-NEXT:   %[[cast1:.*]] = builtin.unrealized_conversion_cast %[[cast0]] : i64 to i16
+//  CHECK-NEXT:   %[[cast2]] = builtin.unrealized_conversion_cast %[[cast1]] : i16 to i32
+//  CHECK-NEXT:   "test.user"(%[[cast2]]) : (i32) -> ()
+//  CHECK-NEXT:   "test.return"() : () -> ()
+test.graph_region {
+  %0 = builtin.unrealized_conversion_cast %2 : i32 to i64
+  %1 = builtin.unrealized_conversion_cast %0 : i64 to i16
+  %2 = builtin.unrealized_conversion_cast %1 : i16 to i32
+  "test.user"(%2) : (i32) -> ()
+  "test.return"() : () -> ()
+}
+
+// -----
+
+// CHECK-LABEL: test.graph_region
+//  CHECK-NEXT:   "test.return"() : () -> ()
+test.graph_region {
+  %0 = builtin.unrealized_conversion_cast %0 : i32 to i32
+  "test.return"() : () -> ()
+}
+
+// -----
+
+// CHECK-LABEL: test.graph_region
+//  CHECK-NEXT:   %[[c0:.*]] = arith.constant
+//  CHECK-NEXT:   %[[cast:.*]]:2 = builtin.unrealized_conversion_cast %[[c0]], %[[cast]]#1 : i32, i32 to i32, i32
+//  CHECK-NEXT:   "test.user"(%[[cast]]#0) : (i32) -> ()
+//  CHECK-NEXT:   "test.return"() : () -> ()
+test.graph_region {
+  %cst = arith.constant 0 : i32
+  %0, %1 = builtin.unrealized_conversion_cast %cst, %1 : i32, i32 to i32, i32
+  "test.user"(%0) : (i32) -> ()
+  "test.return"() : () -> ()
+}
diff --git a/mlir/test/Dialect/Arith/int-range-opts.mlir b/mlir/test/Dialect/Arith/int-range-opts.mlir
index ea5969a..e6e48d3 100644
--- a/mlir/test/Dialect/Arith/int-range-opts.mlir
+++ b/mlir/test/Dialect/Arith/int-range-opts.mlir
@@ -132,3 +132,19 @@ func.func @wraps() -> i8 {
   %mod = arith.remsi %val, %c64 : i8
   return %mod : i8
 }
+
+// -----
+
+// CHECK-LABEL: @analysis_crash
+func.func @analysis_crash(%arg0: i32, %arg1: tensor<128xi1>) -> tensor<128xi64> {
+  %c0_i32 = arith.constant 0 : i32
+  %cst = arith.constant dense<-1> : tensor<128xi32>
+  %splat = tensor.splat %arg0 : tensor<128xi32>
+  %0 = scf.for %arg2 = %c0_i32 to %arg0 step %arg0 iter_args(%arg3 = %splat) -> (tensor<128xi32>)  : i32 {
+    scf.yield %arg3 : tensor<128xi32>
+  }
+  %1 = arith.select %arg1, %0#0, %cst : tensor<128xi1>, tensor<128xi32>
+  // Make sure the analysis doesn't crash when materializing the range as a tensor constant.
+  %2 = arith.extsi %1 : tensor<128xi32> to tensor<128xi64>
+  return %2 : tensor<128xi64>
+}
diff --git a/mlir/test/Dialect/Linalg/data-layout-propagation.mlir b/mlir/test/Dialect/Linalg/data-layout-propagation.mlir
index fb16e1e..a5f8d63 100644
--- a/mlir/test/Dialect/Linalg/data-layout-propagation.mlir
+++ b/mlir/test/Dialect/Linalg/data-layout-propagation.mlir
@@ -1577,3 +1577,33 @@ func.func @push_extract_through_generic_rank0_operand(%arg0: tensor<128x128xf32>
 // CHECK:         %[[GENERIC:.+]] = linalg.generic
 // CHECK:         %[[EXTRACT:.+]] = tensor.extract_slice %[[GENERIC]]         
 // CHECK:         return %[[EXTRACT]]
+
+// -----
+// Test that if one extract doesnt pass the control function which in this case is set to
+// only allow extracts from the same block, then an extract from a later operand can still be pushed
+// down.
+func.func @push_extract_through_generic_secondextract(%arg0: tensor<128x128xf32>, %arg1: tensor<?x?xbf16>, %arg2: index) -> tensor<?x?xbf16> {
+  %c0 = arith.constant 0 : index
+  %c32 = arith.constant 32 : index
+  %extracted_slice1 = tensor.extract_slice %arg0[%arg2, %arg2] [%arg2, %arg2] [1, 1] : tensor<128x128xf32> to tensor<?x?xf32>
+  %for = scf.for %arg3 = %c0 to %c32 step %arg2 iter_args(%arg4 = %arg1) -> tensor<?x?xbf16> {
+    %extracted_slice = tensor.extract_slice %arg0[%arg2, %arg2] [%arg2, %arg2] [1, 1] : tensor<128x128xf32> to tensor<?x?xf32>
+    %0 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,affine_map<(d0, d1) -> (d0, d1)> ,affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%extracted_slice1, %extracted_slice : tensor<?x?xf32>,  tensor<?x?xf32>) outs(%arg1 : tensor<?x?xbf16>) {
+    ^bb0(%in: f32, %in_1 : f32, %out: bf16):
+      %1 = arith.truncf %in : f32 to bf16
+      linalg.yield %1 : bf16
+    } -> tensor<?x?xbf16>
+    scf.yield %0 : tensor<?x?xbf16>
+  }
+ return %for : tensor<?x?xbf16>
+}
+
+// CHECK-LABEL: func.func @push_extract_through_generic_secondextract
+// CHECK-SAME:    %[[ARG0:[a-zA-Z0-9]+]]
+// CHECK:         %[[EXTRACT:.+]] = tensor.extract_slice
+// CHECK:         %[[FOR:.+]] = scf.for
+// CHECK:           %[[PAD:.+]] = tensor.pad %[[EXTRACT]]
+// CHECK:           %[[GENERIC:.+]] = linalg.generic
+// CHECK-SAME:        ins(%[[PAD]], %[[ARG0]]
+// CHECK:           %[[EXTRACT2:.+]] =  tensor.extract_slice %[[GENERIC]]
+// CHECK:           scf.yield %[[EXTRACT2]]
diff --git a/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir b/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir
index 5f42938..9005110 100644
--- a/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir
+++ b/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir
@@ -915,7 +915,7 @@ func.func @sparse_case(%arg0: tensor<8x8xf32, #CSR>, %arg1: tensor<8xf32>) -> te
 
 // -----
 
-func.func @reduce_dispatch_0() -> tensor<4x2xf32> {
+func.func @parallel_insert_slice() -> tensor<4x2xf32> {
   %c2 = arith.constant 2 : index
   %c4 = arith.constant 4 : index
   %cst = arith.constant 0.000000e+00 : f32
@@ -923,6 +923,7 @@ func.func @reduce_dispatch_0() -> tensor<4x2xf32> {
   %res = scf.forall (%arg0, %arg1) in (%c4, %c2) shared_outs(%o = %0) -> (tensor<4x2xf32>) {
     %1 = tensor.empty() : tensor<1x1xf32>
     %2 = linalg.fill ins(%cst : f32) outs(%1 : tensor<1x1xf32>) -> tensor<1x1xf32>
+    // CHECK: scf.forall.in_parallel
     scf.forall.in_parallel {
       //      CHECK: tensor.parallel_insert_slice %{{[0-9a-z]*}} into %{{[0-9a-z]*}}
       // CHECK-SAME: [%{{.*}}, %{{.*}}] [1, 1] [1, 1] : tensor<f32> into tensor<4x2xf32>
diff --git a/mlir/test/Dialect/SCF/invalid.mlir b/mlir/test/Dialect/SCF/invalid.mlir
index bb79580..37fc86b 100644
--- a/mlir/test/Dialect/SCF/invalid.mlir
+++ b/mlir/test/Dialect/SCF/invalid.mlir
@@ -645,7 +645,7 @@ func.func @wrong_terminator_op(%in: tensor<100xf32>, %out: tensor<100xf32>) {
 
   %result = scf.forall (%thread_idx) in (%num_threads) shared_outs(%o = %out) -> (tensor<100xf32>) {
       %1 = tensor.extract_slice %in[%thread_idx][1][1] : tensor<100xf32> to tensor<1xf32>
-      // expected-error @+1 {{expected only tensor.parallel_insert_slice ops}}
+      // expected-error @+1 {{expected only ParallelCombiningOpInterface ops}}
       scf.forall.in_parallel {
         tensor.parallel_insert_slice %1 into %o[%thread_idx][1][1] :
           tensor<1xf32> into tensor<100xf32>
diff --git a/mlir/test/Dialect/SCF/one-shot-bufferize-analysis.mlir b/mlir/test/Dialect/SCF/one-shot-bufferize-analysis.mlir
index 9bb87ff..ed36855 100644
--- a/mlir/test/Dialect/SCF/one-shot-bufferize-analysis.mlir
+++ b/mlir/test/Dialect/SCF/one-shot-bufferize-analysis.mlir
@@ -908,3 +908,111 @@ func.func @parallel_region_no_read()
   }
   return
 }
+
+// -----
+
+// CHECK-LABEL: func @in_order_multiple_parallel_writes
+func.func @in_order_multiple_parallel_writes(%2: tensor<320xf32> {bufferization.writable = true},
+                                            %3: tensor<320xf32> {bufferization.writable = true})
+  -> (tensor<320xf32>, tensor<320xf32>)
+{
+  %c0 = arith.constant 0 : index
+  %cst = arith.constant -0.000000e+00 : f32
+  %c320 = arith.constant 320 : index
+  %4:2 = scf.forall (%arg0) in (%c320) shared_outs(%arg1 = %2, %arg2 = %3) -> (tensor<320xf32>, tensor<320xf32>) {
+    // CHECK: tensor.extract_slice {{.*}} {__inplace_operands_attr__ = ["true", "none"]}
+    %6 = tensor.extract_slice %arg1[%arg0] [1] [1] : tensor<320xf32> to tensor<1xf32>
+    // CHECK: tensor.extract_slice {{.*}} {__inplace_operands_attr__ = ["true", "none"]}
+    %7 = tensor.extract_slice %arg2[%arg0] [1] [1] : tensor<320xf32> to tensor<1xf32>
+    // CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "true"]}
+    %8 = linalg.fill ins(%cst : f32) outs(%7 : tensor<1xf32>) -> tensor<1xf32>
+
+    // CHECK: tensor.parallel_insert_slice {{.*}} {__inplace_operands_attr__ = ["true", "true", "none"]}
+    // CHECK: tensor.parallel_insert_slice {{.*}} {__inplace_operands_attr__ = ["true", "true", "none"]}
+    scf.forall.in_parallel {
+      tensor.parallel_insert_slice %6 into %arg2[%arg0] [1] [1] : tensor<1xf32> into tensor<320xf32>
+      tensor.parallel_insert_slice %8 into %arg1[%arg0] [1] [1] : tensor<1xf32> into tensor<320xf32>
+    }
+  }
+  return %4#0, %4#1 : tensor<320xf32>, tensor<320xf32>
+}
+
+// -----
+
+// CHECK-LABEL: func @out_of_order_parallel_write
+func.func @out_of_order_parallel_write(%2: tensor<320xf32> {bufferization.writable = true},
+                                       %3: tensor<320xf32> {bufferization.writable = true})
+  -> (tensor<320xf32>, tensor<320xf32>)
+{
+  %c0 = arith.constant 0 : index
+  %cst = arith.constant -0.000000e+00 : f32
+  %c320 = arith.constant 320 : index
+  %4:2 = scf.forall (%arg0) in (%c320) shared_outs(%arg1 = %2, %arg2 = %3) -> (tensor<320xf32>, tensor<320xf32>) {
+    // The extract_slice cannot operate in place because it is used after the
+    // first write.
+    // CHECK: tensor.extract_slice {{.*}} {__inplace_operands_attr__ = ["true", "none"]}
+    %6 = tensor.extract_slice %arg1[%arg0] [1] [1] : tensor<320xf32> to tensor<1xf32>
+
+    // Additionally the fill aliases the thread local slice.
+    // CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "false"]}
+    %7 = linalg.fill ins(%cst : f32) outs(%6 : tensor<1xf32>) -> tensor<1xf32>
+
+    scf.forall.in_parallel {
+      // CHECK: tensor.parallel_insert_slice {{.*}} {__inplace_operands_attr__ = ["true", "true", "none"]}
+      tensor.parallel_insert_slice %7 into %arg1[%arg0] [1] [1] : tensor<1xf32> into tensor<320xf32>
+      // CHECK: tensor.parallel_insert_slice {{.*}} {__inplace_operands_attr__ = ["true", "true", "none"]}
+      tensor.parallel_insert_slice %6 into %arg2[%arg0] [1] [1] : tensor<1xf32> into tensor<320xf32>
+    }
+  }
+  return %4#0, %4#1 : tensor<320xf32>, tensor<320xf32>
+}
+
+// -----
+
+// CHECK-LABEL: func @out_of_order_parallel_write
+func.func @out_of_order_parallel_write_multiple_reads(%2: tensor<320xf32> {bufferization.writable = true},
+                                                      %3: tensor<320xf32> {bufferization.writable = true})
+  -> (tensor<320xf32>, tensor<320xf32>)
+{
+  %c0 = arith.constant 0 : index
+  %cst = arith.constant -0.000000e+00 : f32
+  %c320 = arith.constant 320 : index
+  %4:2 = scf.forall (%arg0) in (%c320) shared_outs(%arg1 = %2, %arg2 = %3) -> (tensor<320xf32>, tensor<320xf32>) {
+    // CHECK: tensor.extract_slice {{.*}} {__inplace_operands_attr__ = ["false", "none"]}
+    %6 = tensor.extract_slice %arg1[%arg0] [1] [1] : tensor<320xf32> to tensor<1xf32>
+    // CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "true"]}
+    %7 = linalg.fill ins(%cst : f32) outs(%6 : tensor<1xf32>) -> tensor<1xf32>
+
+    %reverse = arith.subi %c320, %arg0 : index
+    // CHECK: tensor.extract_slice {{.*}} {__inplace_operands_attr__ = ["true", "none"]}
+    %8 = tensor.extract_slice %arg1[%reverse] [1] [1] : tensor<320xf32> to tensor<1xf32>
+    scf.forall.in_parallel {
+      // Also cannot operate in place due to subsequent conflicting reads.
+      // CHECK: tensor.parallel_insert_slice {{.*}} {__inplace_operands_attr__ = ["true", "true", "none"]}
+      tensor.parallel_insert_slice %7 into %arg1[%arg0] [1] [1] : tensor<1xf32> into tensor<320xf32>
+      // CHECK: tensor.parallel_insert_slice {{.*}} {__inplace_operands_attr__ = ["true", "true", "none"]}
+      tensor.parallel_insert_slice %8 into %arg2[%reverse] [1] [1] : tensor<1xf32> into tensor<320xf32>
+    }
+  }
+  return %4#0, %4#1 : tensor<320xf32>, tensor<320xf32>
+}
+// -----
+
+// CHECK-LABEL: func @in_order_multiple_parallel_writes
+func.func @in_order_multiple_parallel_writes(%2: tensor<320xf32> {bufferization.writable = true})
+  -> (tensor<320xf32>)
+{
+  %c0 = arith.constant 0 : index
+  %cst = arith.constant -0.000000e+00 : f32
+  %c320 = arith.constant 320 : index
+  %4 = scf.forall (%arg0) in (%c320) shared_outs(%arg1 = %2) -> (tensor<320xf32>) {
+    // CHECK: tensor.extract_slice {{.*}} {__inplace_operands_attr__ = ["true", "none"]}
+    %6 = tensor.extract_slice %arg1[%arg0] [1] [1] : tensor<320xf32> to tensor<1xf32>
+    %reverse = arith.subi %c320, %arg0 : index
+    // CHECK: tensor.parallel_insert_slice {{.*}} {__inplace_operands_attr__ = ["true", "true", "none"]}
+    scf.forall.in_parallel {
+      tensor.parallel_insert_slice %6 into %arg1[%reverse] [1] [1] : tensor<1xf32> into tensor<320xf32>
+    }
+  }
+  return %4 : tensor<320xf32>
+} 
diff --git a/mlir/test/Dialect/SCF/one-shot-bufferize-tensor-copy-insertion.mlir b/mlir/test/Dialect/SCF/one-shot-bufferize-tensor-copy-insertion.mlir
index 8f4b924..92486b8 100644
--- a/mlir/test/Dialect/SCF/one-shot-bufferize-tensor-copy-insertion.mlir
+++ b/mlir/test/Dialect/SCF/one-shot-bufferize-tensor-copy-insertion.mlir
@@ -112,7 +112,7 @@ func.func @scf_while_non_equiv_condition_and_body(%A: tensor<5xi1>,
 //  CHECK-SAME:     %[[arg0:.*]]: tensor<100xf32>, %[[arg1:.*]]: tensor<100xf32>
 // CHECK-FUNC-LABEL: func @scf_forall_out_of_place(
 func.func @scf_forall_out_of_place(%in: tensor<100xf32>,
-                                           %out: tensor<100xf32>) {
+                                   %out: tensor<100xf32>) {
   %c1 = arith.constant 1 : index
   %num_threads = arith.constant 100 : index
 
@@ -132,3 +132,31 @@ func.func @scf_forall_out_of_place(%in: tensor<100xf32>,
   } {mapping = [#gpu.thread<x>]}
   return
 }
+
+// -----
+
+// CHECK-LABEL: func @in_order_multiple_parallel_writes
+func.func @in_order_multiple_parallel_writes(%2: tensor<320xf32>,
+                                             %3: tensor<320xf32>)
+  -> (tensor<320xf32>, tensor<320xf32>)
+{
+  %c0 = arith.constant 0 : index
+  %cst = arith.constant -0.000000e+00 : f32
+  %c320 = arith.constant 320 : index
+  %4:2 = scf.forall (%arg0) in (%c320) shared_outs(%arg1 = %2, %arg2 = %3) -> (tensor<320xf32>, tensor<320xf32>) {
+    // CHECK: tensor.extract_slice {{.*}}
+    %6 = tensor.extract_slice %arg1[%arg0] [1] [1] : tensor<320xf32> to tensor<1xf32>
+    // CHECK: tensor.extract_slice {{.*}}
+    %7 = tensor.extract_slice %arg2[%arg0] [1] [1] : tensor<320xf32> to tensor<1xf32>
+    // CHECK: linalg.fill {{.*}}
+    %8 = linalg.fill ins(%cst : f32) outs(%7 : tensor<1xf32>) -> tensor<1xf32>
+
+    // CHECK: tensor.parallel_insert_slice {{.*}}
+    // CHECK: tensor.parallel_insert_slice {{.*}}
+    scf.forall.in_parallel {
+      tensor.parallel_insert_slice %6 into %arg2[%arg0] [1] [1] : tensor<1xf32> into tensor<320xf32>
+      tensor.parallel_insert_slice %8 into %arg1[%arg0] [1] [1] : tensor<1xf32> into tensor<320xf32>
+    }
+  }
+  return %4#0, %4#1 : tensor<320xf32>, tensor<320xf32>
+}
diff --git a/mlir/test/Dialect/SPIRV/IR/target-and-abi.mlir b/mlir/test/Dialect/SPIRV/IR/target-and-abi.mlir
index 10fbcf0..63dea6a 100644
--- a/mlir/test/Dialect/SPIRV/IR/target-and-abi.mlir
+++ b/mlir/test/Dialect/SPIRV/IR/target-and-abi.mlir
@@ -101,6 +101,14 @@ func.func @interface_var(
 
 // -----
 
+// CHECK: {spirv.interface_var_abi = #spirv.interface_var_abi<(0, 0)>}
+// CHECK: {spirv.interface_var_abi = #spirv.interface_var_abi<(0, 1)>}
+spirv.ARM.Graph @interface_var(%arg: !spirv.arm.tensor<1xf32> {spirv.interface_var_abi = #spirv.interface_var_abi<(0, 0)>}) -> (
+    !spirv.arm.tensor<1xf32> {spirv.interface_var_abi = #spirv.interface_var_abi<(0, 1)>}
+) { spirv.ARM.GraphOutputs %arg : !spirv.arm.tensor<1xf32> }
+
+// -----
+
 //===----------------------------------------------------------------------===//
 // spirv.resource_limits
 //===----------------------------------------------------------------------===//
diff --git a/mlir/test/Dialect/SPIRV/Transforms/abi-interface.mlir b/mlir/test/Dialect/SPIRV/Transforms/abi-interface.mlir
index f3a3218..04667c8 100644
--- a/mlir/test/Dialect/SPIRV/Transforms/abi-interface.mlir
+++ b/mlir/test/Dialect/SPIRV/Transforms/abi-interface.mlir
@@ -35,6 +35,28 @@ spirv.module Logical GLSL450 {
 
 // -----
 
+module attributes {
+  spirv.target_env = #spirv.target_env<
+     #spirv.vce<v1.0, [VulkanMemoryModel, Shader, Int8, TensorsARM, GraphARM], [SPV_ARM_tensors, SPV_ARM_graph, SPV_KHR_vulkan_memory_model]>, #spirv.resource_limits<>>
+} {
+
+// CHECK-LABEL: spirv.module
+spirv.module Logical Vulkan {
+  //  CHECK-DAG:    spirv.GlobalVariable [[VARARG0:@.*]] bind(0, 0) : !spirv.ptr<!spirv.arm.tensor<1x16x16x16xi8>, UniformConstant>
+  //  CHECK-DAG:    spirv.GlobalVariable [[VARRES0:@.*]] bind(0, 1) : !spirv.ptr<!spirv.arm.tensor<1x16x16x16xi8>, UniformConstant>
+
+  //      CHECK:    spirv.ARM.GraphEntryPoint [[GN:@.*]], [[VARARG0]], [[VARRES0]]
+  //      CHECK:    spirv.ARM.Graph [[GN]]([[ARG0:%.*]]: !spirv.arm.tensor<1x16x16x16xi8>) -> !spirv.arm.tensor<1x16x16x16xi8> attributes {entry_point = true}
+  spirv.ARM.Graph @main(%arg0: !spirv.arm.tensor<1x16x16x16xi8> {spirv.interface_var_abi = #spirv.interface_var_abi<(0, 0)>})
+                  -> (!spirv.arm.tensor<1x16x16x16xi8> {spirv.interface_var_abi = #spirv.interface_var_abi<(0, 1)>}) attributes {entry_point = true} {
+    spirv.ARM.GraphOutputs %arg0 : !spirv.arm.tensor<1x16x16x16xi8>
+  }
+} // end spirv.module
+
+} // end module
+
+// -----
+
 module {
 // expected-error@+1 {{'spirv.module' op missing SPIR-V target env attribute}}
 spirv.module Logical GLSL450 {}
diff --git a/mlir/test/Dialect/Vector/td/xfer-drop-unit-dims.mlir b/mlir/test/Dialect/Vector/td/xfer-drop-unit-dims.mlir
new file mode 100644
index 0000000..5bffa20
--- /dev/null
+++ b/mlir/test/Dialect/Vector/td/xfer-drop-unit-dims.mlir
@@ -0,0 +1,11 @@
+module @transforms attributes { transform.with_named_sequence } {
+  transform.named_sequence @drop_unit_dims(%module: !transform.any_op {transform.readonly}) {
+
+    %func_op = transform.structured.match ops{["func.func"]} in %module : (!transform.any_op) -> !transform.op<"func.func">
+    transform.apply_patterns to %func_op {
+      transform.apply_patterns.vector.drop_inner_most_unit_dims_from_xfer_ops
+    } : !transform.op<"func.func">
+
+    transform.yield
+   }
+}
diff --git a/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir b/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir
index cd56c1b..18c2879 100644
--- a/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir
+++ b/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir
@@ -1,4 +1,6 @@
-// RUN: mlir-opt %s -test-vector-transfer-collapse-inner-most-dims -split-input-file | FileCheck %s
+// RUN: mlir-opt -split-input-file \
+// RUN: -transform-preload-library='transform-library-paths=%p/td/xfer-drop-unit-dims.mlir' \
+// RUN: -transform-interpreter=entry-point=drop_unit_dims %s | FileCheck %s
 
 //-----------------------------------------------------------------------------
 // 1. vector.transfer_read
diff --git a/mlir/test/Dialect/XeGPU/subgroup-distribute.mlir b/mlir/test/Dialect/XeGPU/subgroup-distribute.mlir
index 60acea0..30ca981 100644
--- a/mlir/test/Dialect/XeGPU/subgroup-distribute.mlir
+++ b/mlir/test/Dialect/XeGPU/subgroup-distribute.mlir
@@ -1,5 +1,8 @@
 // RUN: mlir-opt -xegpu-subgroup-distribute -allow-unregistered-dialect -canonicalize -cse -split-input-file %s | FileCheck %s
 
+// RUN: mlir-opt -xegpu-subgroup-distribute="enable-sg-reductions=false" -allow-unregistered-dialect \
+// RUN: -canonicalize -cse -split-input-file %s | FileCheck %s --check-prefix=CHECK-REDUCTION
+
 // CHECK-LABEL: gpu.func @store_nd_1d
 // CHECK: (%[[ARG0:[0-9a-zA-Z]+]]: memref<16xf32>) {
 // CHECK-DAG: %[[CST:.*]] = arith.constant dense<1.000000e+00> : vector<1xf32>
@@ -321,6 +324,116 @@ gpu.module @test {
 }
 
 // -----
+// CHECK-LABEL: gpu.func @vector_multi_reduction_dim1_distributed_dim0_reduction
+// CHECK:       %[[W:.*]]:2 = gpu.warp_execute_on_lane_0(%{{.*}})[16] ->
+// CHECK-SAME:    (!xegpu.tensor_desc<1x32xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>, vector<16x2xf32>) {
+// CHECK:             %[[SRC:.*]] = "some_def"() {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>} : () -> vector<16x32xf32>
+// CHECK-NEXT:        gpu.yield %{{.*}}, %[[SRC]] : !xegpu.tensor_desc<1x32xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>, vector<16x32xf32>
+// CHECK-NEXT:  }
+// CHECK:       %[[COL0:.*]] = vector.extract_strided_slice %[[W]]#1 {offsets = [0, 0], sizes = [16, 1], strides = [1, 1]} : vector<16x2xf32> to vector<16x1xf32>
+// CHECK-NEXT:  %[[CAST0:.*]] = vector.shape_cast %[[COL0]] : vector<16x1xf32> to vector<16xf32>
+// CHECK-NEXT:  %[[RED0:.*]] = vector.reduction <add>, %[[CAST0]], %{{.*}} : vector<16xf32> into f32
+// CHECK:       %[[COL1:.*]] = vector.extract_strided_slice %[[W]]#1 {offsets = [0, 1], sizes = [16, 1], strides = [1, 1]} : vector<16x2xf32> to vector<16x1xf32>
+// CHECK-NEXT:  %[[CAST1:.*]] = vector.shape_cast %[[COL1]] : vector<16x1xf32> to vector<16xf32>
+// CHECK-NEXT:  %[[RED1:.*]] = vector.reduction <add>, %[[CAST1]], %{{.*}} : vector<16xf32> into f32
+// CHECK-NEXT:  vector.from_elements %[[RED0]], %[[RED1]] : vector<2xf32>
+gpu.module @test {
+gpu.func @vector_multi_reduction_dim1_distributed_dim0_reduction() {
+  %0 = "some_def"() : () -> !xegpu.tensor_desc<1x32xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
+  %src = "some_def"() {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>}  : () -> (vector<16x32xf32>)
+  %acc = arith.constant {layout_result_0 = #xegpu.slice<#xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>, dims = [0]>} dense<0.0>  : vector<32xf32>
+  %1 = vector.multi_reduction <add>, %src, %acc {layout_result_0 = #xegpu.slice<#xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>, dims = [0]>}  [0]
+    : vector<16x32xf32> to vector<32xf32>
+  %3 = vector.shape_cast %1 {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>}
+    : vector<32xf32> to vector<1x32xf32>
+  xegpu.store_nd %3, %0 : vector<1x32xf32>, !xegpu.tensor_desc<1x32xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
+  gpu.return
+}
+}
+
+// -----
+// CHECK-REDUCTION-LABEL: gpu.func @vector_multi_reduction_dim1_distributed_dim1_reduction
+// CHECK-REDUCTION:         %[[W:.*]]:3 = gpu.warp_execute_on_lane_0(%{{.*}})[16] -> (!xegpu.tensor_desc<2x16xf32,
+// CHECK-REDUCTION-SAME:      #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>, f32, f32) {
+// CHECK-REDUCTION:           %[[SRC:.*]] = "some_def"() {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>} : () -> vector<2x16xf32>
+// CHECK-REDUCTION-NEXT:      %[[ROW0:.*]] = vector.extract %[[SRC]][0] : vector<16xf32> from vector<2x16xf32>
+// CHECK-REDUCTION-NEXT:      %[[R0:.*]] = vector.reduction <add>, %[[ROW0]], %{{.*}} : vector<16xf32> into f32
+// CHECK-REDUCTION-NEXT:      %[[ROW1:.*]] = vector.extract %[[SRC]][1] : vector<16xf32> from vector<2x16xf32>
+// CHECK-REDUCTION-NEXT:      %[[R1:.*]] = vector.reduction <add>, %[[ROW1]], %{{.*}} : vector<16xf32> into f32
+// CHECK-REDUCTION-NEXT:      gpu.yield %4, %[[R1]], %[[R0]] : !xegpu.tensor_desc<2x16xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>, f32, f32
+// CHECK-REDUCTION-NEXT:    }
+// CHECK-REDUCTION-NEXT:    vector.from_elements %[[W]]#2, %[[W]]#1 : vector<2xf32>
+gpu.module @test {
+gpu.func @vector_multi_reduction_dim1_distributed_dim1_reduction() {
+  %0 = "some_def"() : () -> !xegpu.tensor_desc<2x16xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
+  %src = "some_def"() {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>}  : () -> (vector<2x16xf32>)
+  %acc = arith.constant {layout_result_0 = #xegpu.slice<#xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>, dims = [1]>} dense<0.0>  : vector<2xf32>
+  %1 = vector.multi_reduction <add>, %src, %acc {layout_result_0 = #xegpu.slice<#xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>, dims = [1]>}
+    [1] : vector<2x16xf32> to vector<2xf32>
+  %3 = vector.shape_cast %1 {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>}
+    : vector<2xf32> to vector<2x1xf32>
+  %4 = vector.broadcast %3 {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>} : vector<2x1xf32> to vector<2x16xf32>
+  xegpu.store_nd %4, %0 : vector<2x16xf32>, !xegpu.tensor_desc<2x16xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
+  gpu.return
+}
+}
+
+// -----
+// CHECK-LABEL:   gpu.func @vector_multi_reduction_dim0_distributed_dim1_reduction
+// CHECK:             %[[W:.*]]:2 = gpu.warp_execute_on_lane_0(%0)[16] ->
+// CHECK-SAME:          (!xegpu.tensor_desc<32x1xf32, #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>>, vector<2x16xf32>) {
+// CHECK:                 %[[SRC:.*]] = "some_def"() {layout_result_0 = #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>} : () -> vector<32x16xf32>
+// CHECK-NEXT:            gpu.yield %{{.*}}, %[[SRC]] : !xegpu.tensor_desc<32x1xf32, #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>>, vector<32x16xf32>
+// CHECK-NEXT:        }
+// CHECK:             %[[ROW0:.*]] = vector.extract %[[W]]#1[0] : vector<16xf32> from vector<2x16xf32>
+// CHECK-NEXT:        %[[R0:.*]] = vector.reduction <add>, %[[ROW0]], %{{.*}} : vector<16xf32> into f32
+// CHECK:             %[[ROW1:.*]] = vector.extract %[[W]]#1[1] : vector<16xf32> from vector<2x16xf32>
+// CHECK-NEXT:        %[[R1:.*]] = vector.reduction <add>, %[[ROW1]], %{{.*}} : vector<16xf32> into f32
+// CHECK-NEXT:        vector.from_elements %[[R0]], %[[R1]] : vector<2xf32>
+gpu.module @test {
+gpu.func @vector_multi_reduction_dim0_distributed_dim1_reduction() {
+  %0 = "some_def"() : () -> !xegpu.tensor_desc<32x1xf32, #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>>
+  %src = "some_def"() {layout_result_0 = #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>}  : () -> (vector<32x16xf32>)
+  %acc = arith.constant {layout_result_0 = #xegpu.slice<#xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>, dims = [1]>} dense<0.0>  : vector<32xf32>
+  %1 = vector.multi_reduction <add>, %src, %acc {layout_result_0 = #xegpu.slice<#xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>, dims = [1]>}  [1]
+    : vector<32x16xf32> to vector<32xf32>
+  %3 = vector.shape_cast %1 {layout_result_0 = #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>}
+    : vector<32xf32> to vector<32x1xf32>
+  xegpu.store_nd %3, %0 : vector<32x1xf32>, !xegpu.tensor_desc<32x1xf32, #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>>
+  gpu.return
+}
+}
+
+// -----
+// CHECK-REDUCTION-LABEL: gpu.func @vector_multi_reduction_dim0_distributed_dim0_reduction
+// CHECK-REDUCTION:       %[[W:.*]]:3 = gpu.warp_execute_on_lane_0(%{{.*}})[16] -> (!xegpu.tensor_desc<16x2xf32,
+// CHECK-REDUCTION-SAME:    #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>>, f32, f32) {
+// CHECK-REDUCTION:          %[[SRC:.*]] = "some_def"() {layout_result_0 = #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>} : () -> vector<16x2xf32>
+// CHECK-REDUCTION-NEXT:     %[[COL0:.*]] = vector.extract_strided_slice %[[SRC]] {offsets = [0, 0], sizes = [16, 1], strides = [1, 1]} : vector<16x2xf32> to vector<16x1xf32>
+// CHECK-REDUCTION-NEXT:     %[[CAST0:.*]] = vector.shape_cast %[[COL0]] : vector<16x1xf32> to vector<16xf32>
+// CHECK-REDUCTION-NEXT:     %[[R0:.*]] = vector.reduction <add>, %[[CAST0]], %{{.*}} : vector<16xf32> into f32
+// CHECK-REDUCTION-NEXT:     %[[COL1:.*]] = vector.extract_strided_slice %5 {offsets = [0, 1], sizes = [16, 1], strides = [1, 1]} : vector<16x2xf32> to vector<16x1xf32>
+// CHECK-REDUCTION-NEXT:     %[[CAST1:.*]] = vector.shape_cast %[[COL1]] : vector<16x1xf32> to vector<16xf32>
+// CHECK-REDUCTION-NEXT:     %[[R1:.*]] = vector.reduction <add>, %[[CAST1]], %cst : vector<16xf32> into f32
+// CHECK-REDUCTION-NEXT:     gpu.yield %4, %[[R1]], %[[R0]] : !xegpu.tensor_desc<16x2xf32, #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>>, f32, f32
+// CHECK-REDUCTION-NEXT:   }
+// CHECK-REDUCTION-NEXT:   vector.from_elements %[[W]]#2, %[[W]]#1 : vector<2xf32>
+gpu.module @test {
+gpu.func @vector_multi_reduction_dim0_distributed_dim0_reduction() {
+  %0 = "some_def"() : () -> !xegpu.tensor_desc<16x2xf32, #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>>
+  %src = "some_def"() {layout_result_0 = #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>}  : () -> (vector<16x2xf32>)
+  %acc = arith.constant {layout_result_0 = #xegpu.slice<#xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>, dims = [0]>} dense<0.0>  : vector<2xf32>
+  %1 = vector.multi_reduction <add>, %src, %acc {layout_result_0 = #xegpu.slice<#xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>, dims = [0]>}
+    [0] : vector<16x2xf32> to vector<2xf32>
+  %3 = vector.shape_cast %1 {layout_result_0 = #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>}
+    : vector<2xf32> to vector<1x2xf32>
+  %4 = vector.broadcast %3 {layout_result_0 = #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>} : vector<1x2xf32> to vector<16x2xf32>
+  xegpu.store_nd %4, %0 : vector<16x2xf32>, !xegpu.tensor_desc<16x2xf32, #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>>
+  gpu.return
+}
+}
+
+// -----
 // CHECK-LABEL: gpu.func @scatter_ops_chunksize({{.*}}) {
 // CHECK: %[[MASK:.*]] = arith.constant dense<true> : vector<1xi1>
 // CHECK-NEXT: %[[LANE_OFFSET:.*]] = arith.constant dense<12> : vector<1xindex>
diff --git a/mlir/test/Dialect/XeGPU/xegpu-wg-to-sg-unify-ops.mlir b/mlir/test/Dialect/XeGPU/xegpu-wg-to-sg-unify-ops.mlir
index afb2bf8..3478a9b 100644
--- a/mlir/test/Dialect/XeGPU/xegpu-wg-to-sg-unify-ops.mlir
+++ b/mlir/test/Dialect/XeGPU/xegpu-wg-to-sg-unify-ops.mlir
@@ -2,6 +2,7 @@
 
 //CHECK: #map = affine_map<()[s0] -> (s0 floordiv 4)>
 //CHECK: #map1 = affine_map<()[s0] -> (s0 mod 4)>
+//CHECK: #map2 = affine_map<()[s0] -> (s0 floordiv 8)>
 gpu.module @test_distribution {
   // CHECK-LABEL: create_nd_tdesc_no_offset
   // CHECK-SAME: %[[ARG_0:.*]]: memref<256x128xf32>
@@ -365,4 +366,62 @@ gpu.module @test_distribution {
     xegpu.store_matrix %cst, %mdesc[0, 0] {layout = #xegpu.layout<sg_layout = [2, 4], sg_data = [32, 32]>} : vector<64x128xf32>, !xegpu.mem_desc<64x128xf32>
     gpu.return
   }
+
+  // CHECK-LABEL: vector_step_op
+  gpu.func @vector_step_op_slice_attr() {
+    //CHECK: [[sgId:%.+]] = gpu.subgroup_id : index
+    //CHECK-DAG: [[IDY:%.+]] = affine.apply #map2()[[[sgId]]]
+    //CHECK-DAG: [[c32:%.+]] = arith.constant 32 : index
+    //CHECK-DAG: [[LOCALY:%.+]] = index.mul [[IDY]], [[c32]]
+    //CHECK-DAG: [[c0:%.+]] = arith.constant 0 : index
+    //CHECK-DAG: [[Y:%.+]] = arith.addi [[LOCALY]], [[c0]] : index
+    //CHECK-DAG: [[c128:%.+]] = arith.constant 128 : index
+    //CHECK-DAG: [[MODY:%.+]] = index.remu [[Y]], [[c128]]
+    //CHECK-DAG: [[BASE:%.+]] = vector.step : vector<32xindex>
+    //CHECK-DAG: [[CAST:%.+]] = vector.broadcast [[MODY]] : index to vector<32xindex>
+    //CHECK: [[ADD:%.+]] = arith.addi [[BASE]], [[CAST]] : vector<32xindex>
+    %step = vector.step {layout_result_0 = #xegpu.slice<#xegpu.layout<sg_layout = [4, 8], sg_data = [32, 32]>, dims = [1]>}: vector<128xindex>
+    gpu.return
+  }
+
+  gpu.func @vector_step_op_layout_attr() {
+    //CHECK: [[sgId:%.+]] = gpu.subgroup_id : index
+    //CHECK-DAG: [[c16:%.+]] = arith.constant 16 : index
+    //CHECK-DAG: [[c8:%.+]] = arith.constant 8 : index
+    //CHECK-DAG: [[LOCALY:%.+]] = index.mul [[sgId]], [[c8]]
+    //CHECK-DAG: [[c0:%.+]] = arith.constant 0 : index
+    //CHECK-DAG: [[Y:%.+]] = arith.addi [[LOCALY]], [[c0]] : index
+    //CHECK-DAG: [[c128:%.+]] = arith.constant 128 : index
+    //CHECK-DAG: [[MODY:%.+]] = index.remu [[Y]], [[c128]]
+    //CHECK-DAG: [[BASE:%.+]] = vector.step : vector<8xindex>
+    //CHECK-DAG: [[CAST:%.+]] = vector.broadcast [[MODY]] : index to vector<8xindex>
+    //CHECK: [[ADD:%.+]] = arith.addi [[BASE]], [[CAST]] : vector<8xindex>
+    %step = vector.step {layout_result_0 = #xegpu.layout<sg_layout = [16], sg_data = [8]>}: vector<128xindex>
+    gpu.return
+  }
+
+  // CHECK-LABEL: constant_with_slice_attr
+  gpu.func @constant_with_slice_attr() {
+    //CHECK: [[cst:%.+]] = arith.constant dense<10> : vector<1xindex>
+    %cst = arith.constant {layout_result_0 = #xegpu.slice<#xegpu.layout<sg_layout = [4, 2, 6, 1], sg_data = [1, 1, 1, 1]>, dims = [1, 2, 3]>} dense<10> : vector<4xindex>
+    gpu.return
+  }
+
+  // CHECK-LABEL: vector_shape_cast
+  gpu.func @vector_shape_cast() {
+    %cst = arith.constant {layout_result_0 = #xegpu.slice<#xegpu.layout<sg_layout = [8, 1, 1, 4], sg_data = [1, 1, 1, 32]>, dims = [0, 1, 2]>} dense<10> : vector<128xindex>
+    %step = vector.step {layout_result_0 = #xegpu.slice<#xegpu.layout<sg_layout = [8, 1, 1, 4], sg_data = [1, 1, 1, 32]>, dims = [0, 1, 2]>} : vector<128xindex>
+    %muli = arith.muli %cst, %step {layout_result_0 = #xegpu.slice<#xegpu.layout<sg_layout = [8, 1, 1, 4], sg_data = [1, 1, 1, 32]>, dims = [0, 1, 2]>} : vector<128xindex>
+    //CHECK: vector.shape_cast {{.*}} : vector<32xindex> to vector<1x1x1x32xindex>
+    %shape_cast = vector.shape_cast %muli {layout_result_0 = #xegpu.layout<sg_layout = [8, 1, 1, 4], sg_data = [1, 1, 1, 32]>} : vector<128xindex> to vector<1x1x1x128xindex>
+    gpu.return
+  }
+
+  // CHECK-LABEL: vector_broadcast
+  gpu.func @vector_broadcast(%arg0: index, %arg1: index) {
+    %muli = arith.muli %arg0, %arg1 : index
+    // CHECK: vector.broadcast {{.*}} : index to vector<1x1x1x32xindex>
+    %broadcast = vector.broadcast %muli {layout_result_0 = #xegpu.layout<sg_layout = [4, 2, 6, 1], sg_data = [1, 1, 1, 32]>} : index to vector<4x2x6x32xindex>
+    gpu.return
+  }
 }
diff --git a/mlir/test/Examples/standalone/lit.local.cfg b/mlir/test/Examples/standalone/lit.local.cfg
index fe8397c..3b12dcbd 100644
--- a/mlir/test/Examples/standalone/lit.local.cfg
+++ b/mlir/test/Examples/standalone/lit.local.cfg
@@ -10,3 +10,4 @@ config.substitutions.append(("%host_cc", config.host_cc))
 config.substitutions.append(("%enable_libcxx", config.enable_libcxx))
 config.substitutions.append(("%mlir_cmake_dir", config.mlir_cmake_dir))
 config.substitutions.append(("%llvm_use_linker", config.llvm_use_linker))
+config.substitutions.append(("%cmake_build_type", config.cmake_build_type))
diff --git a/mlir/test/Examples/standalone/test.toy b/mlir/test/Examples/standalone/test.toy
index e99bab5..8b6d9dd 100644
--- a/mlir/test/Examples/standalone/test.toy
+++ b/mlir/test/Examples/standalone/test.toy
@@ -1,4 +1,5 @@
 # RUN: "%cmake_exe" "%mlir_src_root/examples/standalone" -G "%cmake_generator" \
+# RUN: -DCMAKE_BUILD_TYPE=%cmake_build_type \
 # RUN: -DCMAKE_CXX_COMPILER=%host_cxx -DCMAKE_C_COMPILER=%host_cc \
 # RUN: -DLLVM_ENABLE_LIBCXX=%enable_libcxx -DMLIR_DIR=%mlir_cmake_dir \
 # RUN: -DLLVM_USE_LINKER=%llvm_use_linker \
@@ -11,4 +12,4 @@
 # if any fail.
 # CHECK: Passed
 # CHECK-NOT: Failed
-# UNSUPPORTED: target={{.*(windows|android).*}}
+# UNSUPPORTED: target={{.*(android).*}}
diff --git a/mlir/test/Integration/Dialect/MemRef/assume-alignment-runtime-verification.mlir b/mlir/test/Integration/Dialect/MemRef/assume-alignment-runtime-verification.mlir
index 25a338d..01a826a 100644
--- a/mlir/test/Integration/Dialect/MemRef/assume-alignment-runtime-verification.mlir
+++ b/mlir/test/Integration/Dialect/MemRef/assume-alignment-runtime-verification.mlir
@@ -1,7 +1,8 @@
 // RUN: mlir-opt %s -generate-runtime-verification \
 // RUN:     -expand-strided-metadata \
 // RUN:     -test-cf-assert \
-// RUN:     -convert-to-llvm | \
+// RUN:     -convert-to-llvm \
+// RUN:     -reconcile-unrealized-casts | \
 // RUN: mlir-runner -e main -entry-point-result=void \
 // RUN:     -shared-libs=%mlir_runner_utils 2>&1 | \
 // RUN: FileCheck %s
diff --git a/mlir/test/Integration/Dialect/MemRef/atomic-rmw-runtime-verification.mlir b/mlir/test/Integration/Dialect/MemRef/atomic-rmw-runtime-verification.mlir
index 4c6a48d..1144a7c 100644
--- a/mlir/test/Integration/Dialect/MemRef/atomic-rmw-runtime-verification.mlir
+++ b/mlir/test/Integration/Dialect/MemRef/atomic-rmw-runtime-verification.mlir
@@ -1,6 +1,7 @@
 // RUN: mlir-opt %s -generate-runtime-verification \
 // RUN:     -test-cf-assert \
-// RUN:     -convert-to-llvm | \
+// RUN:     -convert-to-llvm \
+// RUN:     -reconcile-unrealized-casts | \
 // RUN: mlir-runner -e main -entry-point-result=void \
 // RUN:     -shared-libs=%mlir_runner_utils 2>&1 | \
 // RUN: FileCheck %s
diff --git a/mlir/test/Integration/Dialect/MemRef/store-runtime-verification.mlir b/mlir/test/Integration/Dialect/MemRef/store-runtime-verification.mlir
index dd000c6..82e6380 100644
--- a/mlir/test/Integration/Dialect/MemRef/store-runtime-verification.mlir
+++ b/mlir/test/Integration/Dialect/MemRef/store-runtime-verification.mlir
@@ -1,6 +1,7 @@
 // RUN: mlir-opt %s -generate-runtime-verification \
 // RUN:     -test-cf-assert \
-// RUN:     -convert-to-llvm | \
+// RUN:     -convert-to-llvm \
+// RUN:     -reconcile-unrealized-casts | \
 // RUN: mlir-runner -e main -entry-point-result=void \
 // RUN:     -shared-libs=%mlir_runner_utils 2>&1 | \
 // RUN: FileCheck %s
diff --git a/mlir/test/Pass/remarks.mlir b/mlir/test/Pass/remarks.mlir
new file mode 100644
index 0000000..8aa04e3
--- /dev/null
+++ b/mlir/test/Pass/remarks.mlir
@@ -0,0 +1,28 @@
+// RUN: mlir-opt %s --test-remark --remarks-filter-passed="category-1-passed" 2>&1 | FileCheck %s -check-prefix=CHECK-PASSED 
+// RUN: mlir-opt %s --test-remark --remarks-filter-missed="a-category-1-missed" 2>&1 | FileCheck %s -check-prefix=CHECK-MISSED
+// RUN: mlir-opt %s --test-remark --remarks-filter-failed="category-2-failed" 2>&1 | FileCheck %s -check-prefix=CHECK-FAILED
+// RUN: mlir-opt %s --test-remark --remarks-filter-analyse="category-2-analysis" 2>&1 | FileCheck %s -check-prefix=CHECK-ANALYSIS
+// RUN: mlir-opt %s --test-remark --remarks-filter="category.*" 2>&1 | FileCheck %s -check-prefix=CHECK-ALL
+// RUN: mlir-opt %s --test-remark --remarks-filter="category-1.*" 2>&1 | FileCheck %s -check-prefix=CHECK-ALL1
+module @foo {
+  "test.op"() : () -> ()
+  
+}
+
+
+// CHECK-PASSED: remarks.mlir:8:3: remark: [Passed] test-remark | Category:category-1-passed | Reason="because we are testing the remark pipeline", Remark="This is a test passed remark", Suggestion="try using the remark pipeline feature"
+// CHECK-MISSED:remarks.mlir:8:3: remark: [Missed] test-remark | Category:a-category-1-missed | Reason="because we are testing the remark pipeline", Remark="This is a test missed remark", Suggestion="try using the remark pipeline feature"
+// CHECK-FAILED: remarks.mlir:8:3: remark: [Failure] test-remark | Category:category-2-failed | Reason="because we are testing the remark pipeline", Remark="This is a test failed remark", Suggestion="try using the remark pipeline feature"
+// CHECK-ANALYSIS: remarks.mlir:8:3: remark: [Analysis] test-remark | Category:category-2-analysis | Remark="This is a test analysis remark"
+
+
+// CHECK-ALL: remarks.mlir:8:3: remark: [Passed] test-remark | Category:category-1-passed | Reason="because we are testing the remark pipeline", Remark="This is a test passed remark", Suggestion="try using the remark pipeline feature"
+// CHECK-ALL: remarks.mlir:8:3: remark: [Failure] test-remark | Category:category-2-failed | Reason="because we are testing the remark pipeline", Remark="This is a test failed remark", Suggestion="try using the remark pipeline feature"
+// CHECK-ALL: remarks.mlir:8:3: remark: [Analysis] test-remark | Category:category-2-analysis | Remark="This is a test analysis remark"
+
+// CHECK-ALL1: remarks.mlir:8:3: remark: [Passed] test-remark | Category:category-1-passed | Reason="because we are testing the remark pipeline", Remark="This is a test passed remark", Suggestion="try using the remark pipeline feature"
+// CHECK-ALL1-NOT: remarks.mlir:8:3: remark: [Missed]
+// CHECK-ALL1-NOT: remarks.mlir:8:3: remark: [Failure]
+// CHECK-ALL1-NOT: remarks.mlir:8:3: remark: [Analysis]
+
+
diff --git a/mlir/test/Target/LLVMIR/arm-sme-invalid.mlir b/mlir/test/Target/LLVMIR/arm-sme-invalid.mlir
index 14821da..6f5b1d8 100644
--- a/mlir/test/Target/LLVMIR/arm-sme-invalid.mlir
+++ b/mlir/test/Target/LLVMIR/arm-sme-invalid.mlir
@@ -36,6 +36,6 @@ llvm.func @arm_sme_tile_slice_to_vector_invalid_element_types(
 
 llvm.func @arm_sme_streaming_vl_invalid_return_type() -> i32 {
   // expected-error @+1 {{failed to verify that `res` is i64}}
-  %res = "arm_sme.intr.cntsb"() : () -> i32
+  %res = "arm_sme.intr.cntsd"() : () -> i32
   llvm.return %res : i32
 }
diff --git a/mlir/test/Target/LLVMIR/arm-sme.mlir b/mlir/test/Target/LLVMIR/arm-sme.mlir
index aedb673..0a13a75 100644
--- a/mlir/test/Target/LLVMIR/arm-sme.mlir
+++ b/mlir/test/Target/LLVMIR/arm-sme.mlir
@@ -419,12 +419,6 @@ llvm.func @arm_sme_tile_slice_to_vector_vert(%tileslice : i32,
 // -----
 
 llvm.func @arm_sme_streaming_vl() {
-  // CHECK: call i64 @llvm.aarch64.sme.cntsb()
-  %svl_b = "arm_sme.intr.cntsb"() : () -> i64
-  // CHECK: call i64 @llvm.aarch64.sme.cntsh()
-  %svl_h = "arm_sme.intr.cntsh"() : () -> i64
-  // CHECK: call i64 @llvm.aarch64.sme.cntsw()
-  %svl_w = "arm_sme.intr.cntsw"() : () -> i64
   // CHECK: call i64 @llvm.aarch64.sme.cntsd()
   %svl_d = "arm_sme.intr.cntsd"() : () -> i64
   llvm.return
diff --git a/mlir/test/Target/SPIRV/graph-ops.mlir b/mlir/test/Target/SPIRV/graph-ops.mlir
new file mode 100644
index 0000000..c956157
--- /dev/null
+++ b/mlir/test/Target/SPIRV/graph-ops.mlir
@@ -0,0 +1,25 @@
+// RUN: mlir-translate --no-implicit-module --test-spirv-roundtrip %s | FileCheck %s
+// RUN: %if spirv-tools %{ mlir-translate --no-implicit-module --serialize-spirv %s | spirv-val %}
+
+// CHECK: spirv.module Logical Vulkan requires #spirv.vce<v1.3, [VulkanMemoryModel, Shader, Int8, Int16, TensorsARM, GraphARM], [SPV_ARM_tensors, SPV_ARM_graph, SPV_KHR_vulkan_memory_model]> {
+spirv.module Logical Vulkan requires #spirv.vce<v1.3, [VulkanMemoryModel, Shader, Int8, Int16, TensorsARM, GraphARM], [SPV_ARM_tensors, SPV_ARM_graph, SPV_KHR_vulkan_memory_model]> {
+  // CHECK: spirv.GlobalVariable [[VARARG0:@.*]] bind(0, 0) : !spirv.ptr<!spirv.arm.tensor<14x19xi16>, UniformConstant>
+  spirv.GlobalVariable @main_arg_0 bind(0, 0) : !spirv.ptr<!spirv.arm.tensor<14x19xi16>, UniformConstant>
+  // CHECK: spirv.GlobalVariable [[VARRES0:@.*]] bind(0, 1) : !spirv.ptr<!spirv.arm.tensor<2x3xi16>, UniformConstant>
+  spirv.GlobalVariable @main_res_0 bind(0, 1) : !spirv.ptr<!spirv.arm.tensor<2x3xi16>, UniformConstant>
+  // CHECK: spirv.ARM.GraphEntryPoint [[GN:@.*]], [[VARARG0]], [[VARRES0]]
+  spirv.ARM.GraphEntryPoint @main, @main_arg_0, @main_res_0
+  // CHECK: spirv.ARM.Graph [[GN]]({{%.*}}: !spirv.arm.tensor<14x19xi16>) -> !spirv.arm.tensor<2x3xi16> attributes {entry_point = true} {
+  spirv.ARM.Graph @main(%arg0 : !spirv.arm.tensor<14x19xi16>) -> !spirv.arm.tensor<2x3xi16> attributes {entry_point = true} {
+    // CHECK: [[CONST2:%.*]] = spirv.ARM.GraphConstant {graph_constant_id = 42 : i32} : !spirv.arm.tensor<2x3xi16>
+    %0 = spirv.ARM.GraphConstant { graph_constant_id = 42 : i32 } : !spirv.arm.tensor<2x3xi16>
+    // CHECK: spirv.ARM.GraphOutputs [[OUT:%.*]] : !spirv.arm.tensor<2x3xi16>
+    spirv.ARM.GraphOutputs %0 : !spirv.arm.tensor<2x3xi16>
+  }
+
+  // CHECK: spirv.ARM.Graph {{@.*}}({{%.*}}: !spirv.arm.tensor<1x16x16x16xi8>) -> !spirv.arm.tensor<1x16x16x16xi8> attributes {entry_point = false} {
+  spirv.ARM.Graph @empty_graph(%arg0: !spirv.arm.tensor<1x16x16x16xi8>) -> !spirv.arm.tensor<1x16x16x16xi8> {
+    // CHECK: spirv.ARM.GraphOutputs {{%.*}} : !spirv.arm.tensor<1x16x16x16xi8>
+    spirv.ARM.GraphOutputs %arg0 : !spirv.arm.tensor<1x16x16x16xi8>
+  }
+}
diff --git a/mlir/test/lib/Dialect/Linalg/TestDataLayoutPropagation.cpp b/mlir/test/lib/Dialect/Linalg/TestDataLayoutPropagation.cpp
index 2cf25d8..d332270 100644
--- a/mlir/test/lib/Dialect/Linalg/TestDataLayoutPropagation.cpp
+++ b/mlir/test/lib/Dialect/Linalg/TestDataLayoutPropagation.cpp
@@ -34,8 +34,13 @@ struct TestDataLayoutPropagationPass
     RewritePatternSet patterns(context);
     linalg::populateDataLayoutPropagationPatterns(
         patterns, [](OpOperand *opOperand) { return true; });
-    linalg::populateExtractSliceSinkingPatterns(
-        patterns, [](OpOperand *opOperand) { return true; });
+    linalg::ControlPropagationFn controlExtract =
+        [](OpOperand *opOperand) -> bool {
+      Operation *producer = opOperand->get().getDefiningOp();
+      Operation *consumer = opOperand->getOwner();
+      return consumer->getBlock() == producer->getBlock();
+    };
+    linalg::populateExtractSliceSinkingPatterns(patterns, controlExtract);
     if (failed(applyPatternsGreedily(getOperation(), std::move(patterns))))
       return signalPassFailure();
   }
diff --git a/mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp b/mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp
index d6596cd..c2d18462 100644
--- a/mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp
+++ b/mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp
@@ -344,36 +344,6 @@ struct TestVectorTransferOpt
   }
 };
 
-struct TestVectorTransferCollapseInnerMostContiguousDims
-    : public PassWrapper<TestVectorTransferCollapseInnerMostContiguousDims,
-                         OperationPass<func::FuncOp>> {
-  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(
-      TestVectorTransferCollapseInnerMostContiguousDims)
-
-  TestVectorTransferCollapseInnerMostContiguousDims() = default;
-  TestVectorTransferCollapseInnerMostContiguousDims(
-      const TestVectorTransferCollapseInnerMostContiguousDims &pass) = default;
-
-  void getDependentDialects(DialectRegistry &registry) const override {
-    registry.insert<memref::MemRefDialect, affine::AffineDialect>();
-  }
-
-  StringRef getArgument() const final {
-    return "test-vector-transfer-collapse-inner-most-dims";
-  }
-
-  StringRef getDescription() const final {
-    return "Test lowering patterns that reduces the rank of the vector "
-           "transfer memory and vector operands.";
-  }
-
-  void runOnOperation() override {
-    RewritePatternSet patterns(&getContext());
-    populateDropInnerMostUnitDimsXferOpPatterns(patterns);
-    (void)applyPatternsGreedily(getOperation(), std::move(patterns));
-  }
-};
-
 struct TestVectorSinkPatterns
     : public PassWrapper<TestVectorSinkPatterns, OperationPass<func::FuncOp>> {
   MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TestVectorSinkPatterns)
@@ -1079,8 +1049,6 @@ void registerTestVectorLowerings() {
 
   PassRegistration<TestVectorTransferOpt>();
 
-  PassRegistration<TestVectorTransferCollapseInnerMostContiguousDims>();
-
   PassRegistration<TestVectorSinkPatterns>();
 
   PassRegistration<TestVectorReduceToContractPatternsPatterns>();
diff --git a/mlir/test/lib/Dialect/XeGPU/TestXeGPUTransforms.cpp b/mlir/test/lib/Dialect/XeGPU/TestXeGPUTransforms.cpp
index 200323c..e1ba45c 100644
--- a/mlir/test/lib/Dialect/XeGPU/TestXeGPUTransforms.cpp
+++ b/mlir/test/lib/Dialect/XeGPU/TestXeGPUTransforms.cpp
@@ -170,7 +170,8 @@ class TestStepOpPattern : public OpConversionPattern<vector::StepOp> {
     if (!sliceAttr || sliceAttr.getRank() != 1)
       return failure();
 
-    std::optional<SmallVector<int64_t>> sgShape = sliceAttr.getSgDataAsInt();
+    std::optional<SmallVector<int64_t>> sgShape =
+        sliceAttr.getEffectiveSgDataAsInt();
     if (!sgShape)
       return failure();
 
diff --git a/mlir/test/lib/Pass/CMakeLists.txt b/mlir/test/lib/Pass/CMakeLists.txt
index ab52f62..04c9163 100644
--- a/mlir/test/lib/Pass/CMakeLists.txt
+++ b/mlir/test/lib/Pass/CMakeLists.txt
@@ -4,6 +4,7 @@ add_mlir_library(MLIRTestPass
   TestConvertToSPIRVPass.cpp
   TestDynamicPipeline.cpp
   TestPassManager.cpp
+  TestRemarksPass.cpp
   TestSPIRVCPURunnerPipeline.cpp
   TestVulkanRunnerPipeline.cpp
 
diff --git a/mlir/test/lib/Pass/TestRemarksPass.cpp b/mlir/test/lib/Pass/TestRemarksPass.cpp
new file mode 100644
index 0000000..3b25686
--- /dev/null
+++ b/mlir/test/lib/Pass/TestRemarksPass.cpp
@@ -0,0 +1,74 @@
+//===------ TestRemarkPipeline.cpp --- dynamic pipeline test pass --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a pass to test the dynamic pipeline feature.
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/IR/BuiltinOps.h"
+#include "mlir/IR/Location.h"
+#include "mlir/IR/Remarks.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Pass/PassManager.h"
+#include "mlir/Support/WalkResult.h"
+
+using namespace mlir;
+
+namespace {
+
+class TestRemarkPass : public PassWrapper<TestRemarkPass, OperationPass<>> {
+public:
+  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TestRemarkPass)
+
+  StringRef getArgument() const final { return "test-remark"; }
+  StringRef getDescription() const final {
+    return "Tests the remark pipeline feature";
+  }
+
+  TestRemarkPass() = default;
+
+  void runOnOperation() override {
+
+    getOperation()->walk([](Operation *op) {
+      if (isa<ModuleOp>(op))
+        return WalkResult::advance();
+      Location loc = op->getLoc();
+      mlir::remark::missed(loc, remark::RemarkOpts::name("test-remark")
+                                    .category("a-category-1-missed"))
+          << remark::add("This is a test missed remark")
+          << remark::reason("because we are testing the remark pipeline")
+          << remark::suggest("try using the remark pipeline feature");
+
+      mlir::remark::passed(
+          loc,
+          remark::RemarkOpts::name("test-remark").category("category-1-passed"))
+          << remark::add("This is a test passed remark")
+          << remark::reason("because we are testing the remark pipeline")
+          << remark::suggest("try using the remark pipeline feature");
+
+      mlir::remark::failed(
+          loc,
+          remark::RemarkOpts::name("test-remark").category("category-2-failed"))
+          << remark::add("This is a test failed remark")
+          << remark::reason("because we are testing the remark pipeline")
+          << remark::suggest("try using the remark pipeline feature");
+
+      mlir::remark::analysis(loc, remark::RemarkOpts::name("test-remark")
+                                      .category("category-2-analysis"))
+          << remark::add("This is a test analysis remark");
+      return WalkResult::advance();
+    });
+  }
+};
+} // namespace
+
+namespace mlir {
+namespace test {
+void registerTestRemarkPass() { PassRegistration<TestRemarkPass>(); }
+} // namespace test
+} // namespace mlir
diff --git a/mlir/test/lib/Pass/TestVulkanRunnerPipeline.cpp b/mlir/test/lib/Pass/TestVulkanRunnerPipeline.cpp
index f5a6fc5..e30c316 100644
--- a/mlir/test/lib/Pass/TestVulkanRunnerPipeline.cpp
+++ b/mlir/test/lib/Pass/TestVulkanRunnerPipeline.cpp
@@ -13,6 +13,7 @@
 #include "mlir/Conversion/GPUCommon/GPUCommonPass.h"
 #include "mlir/Conversion/GPUToSPIRV/GPUToSPIRVPass.h"
 #include "mlir/Conversion/MemRefToLLVM/MemRefToLLVM.h"
+#include "mlir/Conversion/ReconcileUnrealizedCasts/ReconcileUnrealizedCasts.h"
 #include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/GPU/IR/GPUDialect.h"
 #include "mlir/Dialect/GPU/Transforms/Passes.h"
@@ -73,6 +74,7 @@ void buildTestVulkanRunnerPipeline(OpPassManager &passManager,
   opt.kernelBarePtrCallConv = true;
   opt.kernelIntersperseSizeCallConv = true;
   passManager.addPass(createGpuToLLVMConversionPass(opt));
+  passManager.addPass(createReconcileUnrealizedCastsPass());
 }
 
 } // namespace
diff --git a/mlir/test/lit.site.cfg.py.in b/mlir/test/lit.site.cfg.py.in
index 8a742a2..2fc595d 100644
--- a/mlir/test/lit.site.cfg.py.in
+++ b/mlir/test/lit.site.cfg.py.in
@@ -18,6 +18,7 @@ config.host_cxx = "@HOST_CXX@"
 config.enable_libcxx = "@LLVM_ENABLE_LIBCXX@"
 config.host_cmake = "@CMAKE_COMMAND@"
 config.host_cmake_generator = "@CMAKE_GENERATOR@"
+config.cmake_build_type = "@CMAKE_BUILD_TYPE@"
 config.llvm_use_linker = "@LLVM_USE_LINKER@"
 config.llvm_use_sanitizer = "@LLVM_USE_SANITIZER@"
 config.host_arch = "@HOST_ARCH@"
diff --git a/mlir/tools/mlir-opt/mlir-opt.cpp b/mlir/tools/mlir-opt/mlir-opt.cpp
index 7b992b4..e4620c0 100644
--- a/mlir/tools/mlir-opt/mlir-opt.cpp
+++ b/mlir/tools/mlir-opt/mlir-opt.cpp
@@ -97,6 +97,7 @@ void registerTestDiagnosticsPass();
 void registerTestDiagnosticsMetadataPass();
 void registerTestDominancePass();
 void registerTestDynamicPipelinePass();
+void registerTestRemarkPass();
 void registerTestEmulateNarrowTypePass();
 void registerTestFooAnalysisPass();
 void registerTestComposeSubView();
@@ -243,6 +244,7 @@ void registerTestPasses() {
   mlir::test::registerTestDiagnosticsMetadataPass();
   mlir::test::registerTestDominancePass();
   mlir::test::registerTestDynamicPipelinePass();
+  mlir::test::registerTestRemarkPass();
   mlir::test::registerTestEmulateNarrowTypePass();
   mlir::test::registerTestFooAnalysisPass();
   mlir::test::registerTestComposeSubView();
diff --git a/mlir/unittests/IR/CMakeLists.txt b/mlir/unittests/IR/CMakeLists.txt
index 75cd2d6..dd3b110 100644
--- a/mlir/unittests/IR/CMakeLists.txt
+++ b/mlir/unittests/IR/CMakeLists.txt
@@ -14,7 +14,7 @@ add_mlir_unittest(MLIRIRTests
   MemrefLayoutTest.cpp
   OperationSupportTest.cpp
   PatternMatchTest.cpp
-  RemarkTest.cpp
+  RemarkTest.cpp  
   ShapedTypeTest.cpp
   SymbolTableTest.cpp
   TypeTest.cpp
diff --git a/mlir/unittests/IR/RemarkTest.cpp b/mlir/unittests/IR/RemarkTest.cpp
index 65e1e08..5bfca25 100644
--- a/mlir/unittests/IR/RemarkTest.cpp
+++ b/mlir/unittests/IR/RemarkTest.cpp
@@ -48,7 +48,8 @@ TEST(Remark, TestOutputOptimizationRemark) {
     context.printStackTraceOnDiagnostic(true);
 
     // Setup the remark engine
-    mlir::remark::RemarkCategories cats{/*passed=*/categoryVectorizer,
+    mlir::remark::RemarkCategories cats{/*all=*/"",
+                                        /*passed=*/categoryVectorizer,
                                         /*missed=*/categoryUnroll,
                                         /*analysis=*/categoryRegister,
                                         /*failed=*/categoryInliner};
@@ -197,7 +198,8 @@ TEST(Remark, TestOutputOptimizationRemarkDiagnostic) {
     });
 
     // Setup the remark engine
-    mlir::remark::RemarkCategories cats{/*passed=*/categoryVectorizer,
+    mlir::remark::RemarkCategories cats{/*all=*/"",
+                                        /*passed=*/categoryVectorizer,
                                         /*missed=*/categoryUnroll,
                                         /*analysis=*/categoryRegister,
                                         /*failed=*/categoryUnroll};
@@ -278,7 +280,8 @@ TEST(Remark, TestCustomOptimizationRemarkDiagnostic) {
     Location loc = UnknownLoc::get(&context);
 
     // Setup the remark engine
-    mlir::remark::RemarkCategories cats{/*passed=*/categoryLoopunroll,
+    mlir::remark::RemarkCategories cats{/*all=*/"",
+                                        /*passed=*/categoryLoopunroll,
                                         /*missed=*/std::nullopt,
                                         /*analysis=*/std::nullopt,
                                         /*failed=*/categoryLoopunroll};
diff --git a/utils/bazel/WORKSPACE b/utils/bazel/WORKSPACE
index 20c0fd1..00cfea5 100644
--- a/utils/bazel/WORKSPACE
+++ b/utils/bazel/WORKSPACE
@@ -186,9 +186,9 @@ maybe(
     http_archive,
     name = "nanobind",
     build_file = "@llvm-raw//utils/bazel/third_party_build:nanobind.BUILD",
-    sha256 = "8ce3667dce3e64fc06bfb9b778b6f48731482362fb89a43da156632266cd5a90",
-    strip_prefix = "nanobind-2.9.2",
-    url = "https://github.com/wjakob/nanobind/archive/refs/tags/v2.9.2.tar.gz",
+    sha256 = "bb35deaed7efac5029ed1e33880a415638352f757d49207a8e6013fefb6c49a7",
+    strip_prefix = "nanobind-2.4.0",
+    url = "https://github.com/wjakob/nanobind/archive/refs/tags/v2.4.0.tar.gz",
 )
 
 load("@rules_python//python:repositories.bzl", "py_repositories", "python_register_toolchains")
@@ -199,3 +199,17 @@ python_register_toolchains(
     name = "python_3_12",
     python_version = "3.12",
 )
+
+maybe(
+    http_archive,
+    name = "rules_shell",
+    sha256 = "e6b87c89bd0b27039e3af2c5da01147452f240f75d505f5b6880874f31036307",
+    strip_prefix = "rules_shell-0.6.1",
+    url = "https://github.com/bazelbuild/rules_shell/releases/download/v0.6.1/rules_shell-v0.6.1.tar.gz",
+)
+
+load("@rules_shell//shell:repositories.bzl", "rules_shell_dependencies", "rules_shell_toolchains")
+
+rules_shell_dependencies()
+
+rules_shell_toolchains()
diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
index d9b1bb5..a955f7f 100644
--- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
@@ -818,6 +818,7 @@ libc_support_library(
     hdrs = ["src/__support/arg_list.h"],
     deps = [
         ":__support_common",
+        ":string_memory_utils",
     ],
 )
 
@@ -5003,6 +5004,7 @@ libc_support_library(
         ":__support_cpp_bit",
         ":__support_cpp_cstddef",
         ":__support_macros_attributes",
+        ":string_memory_utils",
     ],
 )
 
@@ -6945,6 +6947,7 @@ libc_function(
     deps = [
         ":__support_common",
         ":__support_macros_config",
+        ":string_memory_utils",
         ":types_size_t",
         ":types_wchar_t",
     ],
@@ -6968,6 +6971,7 @@ libc_function(
     hdrs = ["src/wchar/wmempcpy.h"],
     deps = [
         ":__support_common",
+        ":string_memory_utils",
         ":types_size_t",
         ":types_wchar_t",
     ],
diff --git a/utils/bazel/llvm-project-overlay/libc/test/UnitTest/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/test/UnitTest/BUILD.bazel
index 24baaf1..3183976 100644
--- a/utils/bazel/llvm-project-overlay/libc/test/UnitTest/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/libc/test/UnitTest/BUILD.bazel
@@ -62,6 +62,7 @@ libc_test_library(
         "//libc:__support_libc_errno",
         "//libc:__support_macros_config",
         "//libc:__support_macros_properties_architectures",
+        "//libc:__support_macros_properties_compiler",
         "//libc:__support_macros_properties_types",
         "//libc:__support_stringutil",
         "//libc:__support_uint128",
diff --git a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel
index e6f10b0..8fe8258 100644
--- a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel
@@ -6,6 +6,7 @@ load("@bazel_skylib//rules:common_settings.bzl", "string_flag")
 load("@bazel_skylib//rules:expand_template.bzl", "expand_template")
 load("@rules_cc//cc:defs.bzl", "cc_binary", "cc_library")
 load("@rules_python//python:defs.bzl", "py_binary")
+load("@rules_shell//shell:sh_binary.bzl", "sh_binary")
 load("//mlir:tblgen.bzl", "gentbl_cc_library", "gentbl_filegroup", "td_library")
 load(":binary_alias.bzl", "binary_alias")
 load(":config.bzl", "llvm_config_defines")
diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
index e556d65..66cb795 100644
--- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
@@ -9082,7 +9082,9 @@ cc_library(
         ":Parser",
         ":Pass",
         ":PluginsLib",
+        ":RemarkStreamer",
         ":Support",
+        "//llvm:Remarks",
         "//llvm:Support",
     ],
 )
@@ -10817,6 +10819,7 @@ cc_library(
         ":LinalgTransformOpsIncGen",
         ":LinalgTransforms",
         ":LinalgUtils",
+        ":ParallelCombiningOpInterface",
         ":SCFDialect",
         ":SCFTransforms",
         ":Support",
diff --git a/utils/bazel/llvm-project-overlay/mlir/python/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/python/BUILD.bazel
index 944a911..016794d 100644
--- a/utils/bazel/llvm-project-overlay/mlir/python/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/mlir/python/BUILD.bazel
@@ -33,6 +33,13 @@ filegroup(
 )
 
 filegroup(
+    name = "ExecutionEnginePyIFiles",
+    srcs = [
+        "mlir/_mlir_libs/_mlirExecutionEngine.pyi",
+    ],
+)
+
+filegroup(
     name = "IRPyFiles",
     srcs = [
         "mlir/ir.py",
@@ -47,6 +54,14 @@ filegroup(
 )
 
 filegroup(
+    name = "IRPyIFiles",
+    srcs = [
+        "mlir/_mlir_libs/_mlir/__init__.pyi",
+        "mlir/_mlir_libs/_mlir/ir.pyi",
+    ],
+)
+
+filegroup(
     name = "MlirLibsPyFiles",
     srcs = [
         "mlir/_mlir_libs/__init__.py",
@@ -61,6 +76,13 @@ filegroup(
 )
 
 filegroup(
+    name = "PassManagerPyIFiles",
+    srcs = [
+        "mlir/_mlir_libs/_mlir/passmanager.pyi",
+    ],
+)
+
+filegroup(
     name = "RewritePyFiles",
     srcs = [
         "mlir/rewrite.py",
@@ -638,6 +660,13 @@ gentbl_filegroup(
 )
 
 filegroup(
+    name = "PDLPyIFiles",
+    srcs = [
+        "mlir/_mlir_libs/_mlir/dialects/pdl.pyi",
+    ],
+)
+
+filegroup(
     name = "PDLPyFiles",
     srcs = [
         "mlir/dialects/pdl.py",
@@ -728,6 +757,13 @@ filegroup(
 ##---------------------------------------------------------------------------##
 
 filegroup(
+    name = "QuantPyIFiles",
+    srcs = [
+        "mlir/_mlir_libs/_mlir/dialects/quant.pyi",
+    ],
+)
+
+filegroup(
     name = "QuantPyFiles",
     srcs = [
         "mlir/dialects/quant.py",
diff --git a/utils/bazel/llvm-project-overlay/mlir/tblgen.bzl b/utils/bazel/llvm-project-overlay/mlir/tblgen.bzl
index 2213d220..c949352 100644
--- a/utils/bazel/llvm-project-overlay/mlir/tblgen.bzl
+++ b/utils/bazel/llvm-project-overlay/mlir/tblgen.bzl
@@ -506,6 +506,7 @@ def gentbl_sharded_ops(
       includes: See gentbl_rule.includes
       deps: See gentbl_rule.deps
       strip_include_prefix: Attribute to pass through to cc_library.
+      **kwargs: Passed through to all generated rules.
     """
     cc_lib_name = name + "__gentbl_cc_lib"
     gentbl_cc_library(
diff --git a/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel
index e17cdb2..469fcee 100644
--- a/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel
@@ -535,6 +535,7 @@ cc_library(
         "//mlir:MemRefToSPIRV",
         "//mlir:MemRefTransforms",
         "//mlir:Pass",
+        "//mlir:ReconcileUnrealizedCasts",
         "//mlir:Rewrite",
         "//mlir:SCFToSPIRV",
         "//mlir:SPIRVConversion",
diff --git a/utils/bazel/llvm-project-overlay/mlir/test/Dialect/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/test/Dialect/BUILD.bazel
index daf639f..6a0af96 100644
--- a/utils/bazel/llvm-project-overlay/mlir/test/Dialect/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/mlir/test/Dialect/BUILD.bazel
@@ -10,6 +10,7 @@ package(default_visibility = ["//visibility:public"])
         srcs = [src],
         data = [
             "Vector/td/unroll-elements.mlir",
+            "Vector/td/xfer-drop-unit-dims.mlir",
             "Vector/vector-sink-transform.mlir",
             "//llvm:llvm-symbolizer",
             "//mlir:mlir-opt",
@@ -35,6 +36,7 @@ package(default_visibility = ["//visibility:public"])
             "Transform/*-symbol-decl-and-schedule.mlir",
             "Transform/include/**/*.mlir",
             "Vector/td/unroll-elements.mlir",
+            "Vector/td/xfer-drop-unit-dims.mlir",
             "Vector/vector-sink-transform.mlir",
         ],
     )
author	lntue <lntue@google.com>	2025-09-12 18:51:24 -0700
committer	Vitaly Buka <vitalybuka@google.com>	2025-09-12 18:51:24 -0700
commit	2166ce80fdb0744dbbb67da5cfad5db4668e2b8e (patch)
tree	eefcfd57d644659eb9a97e3481e9b9d610f1b8e3
parent	8864b35f8cad6f5e0fcb041ca9d560c56edd246e (diff)
parent	7aad3830fb208771254b4ae63a01042744471091 (diff)
download	llvm-users/vitalybuka/spr/main.codegencfi-generalize-transparent-union-parameters.zip llvm-users/vitalybuka/spr/main.codegencfi-generalize-transparent-union-parameters.tar.gz llvm-users/vitalybuka/spr/main.codegencfi-generalize-transparent-union-parameters.tar.bz2